From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001
From: "Matt A. Tobin" <mattatobin@localhost.localdomain>
Date: Fri, 2 Feb 2018 04:16:08 -0500
Subject: Add m-esr52 at 52.6.0

---
 security/nss/lib/freebl/Makefile                   |   764 +
 security/nss/lib/freebl/aeskeywrap.c               |   389 +
 security/nss/lib/freebl/alg2268.c                  |   509 +
 security/nss/lib/freebl/alghmac.c                  |   165 +
 security/nss/lib/freebl/alghmac.h                  |    64 +
 security/nss/lib/freebl/arcfive.c                  |    87 +
 security/nss/lib/freebl/arcfour-amd64-gas.s        |    88 +
 security/nss/lib/freebl/arcfour-amd64-masm.asm     |   107 +
 security/nss/lib/freebl/arcfour-amd64-sun.s        |    84 +
 security/nss/lib/freebl/arcfour.c                  |   594 +
 security/nss/lib/freebl/blapi.h                    |  1625 ++
 security/nss/lib/freebl/blapii.h                   |    61 +
 security/nss/lib/freebl/blapit.h                   |   414 +
 security/nss/lib/freebl/blname.c                   |   100 +
 security/nss/lib/freebl/camellia.c                 |  1896 +++
 security/nss/lib/freebl/camellia.h                 |    42 +
 security/nss/lib/freebl/chacha20.c                 |   119 +
 security/nss/lib/freebl/chacha20.h                 |    26 +
 security/nss/lib/freebl/chacha20_vec.c             |   327 +
 security/nss/lib/freebl/chacha20poly1305.c         |   198 +
 security/nss/lib/freebl/chacha20poly1305.h         |    15 +
 security/nss/lib/freebl/config.mk                  |    97 +
 security/nss/lib/freebl/ctr.c                      |   246 +
 security/nss/lib/freebl/ctr.h                      |    53 +
 security/nss/lib/freebl/cts.c                      |   307 +
 security/nss/lib/freebl/cts.h                      |    33 +
 security/nss/lib/freebl/des.c                      |   676 +
 security/nss/lib/freebl/des.h                      |    43 +
 security/nss/lib/freebl/desblapi.c                 |   256 +
 security/nss/lib/freebl/det_rng.c                  |    67 +
 security/nss/lib/freebl/det_rng.h                  |    12 +
 security/nss/lib/freebl/dh.c                       |   452 +
 security/nss/lib/freebl/drbg.c                     |   968 ++
 security/nss/lib/freebl/dsa.c                      |   647 +
 security/nss/lib/freebl/ec.c                       |  1159 ++
 security/nss/lib/freebl/ec.h                       |    21 +
 security/nss/lib/freebl/ecdecode.c                 |   311 +
 security/nss/lib/freebl/ecl/README                 |   267 +
 security/nss/lib/freebl/ecl/curve25519_32.c        |   390 +
 security/nss/lib/freebl/ecl/curve25519_64.c        |   514 +
 security/nss/lib/freebl/ecl/ec_naf.c               |    68 +
 security/nss/lib/freebl/ecl/ecl-curve.h            |   123 +
 security/nss/lib/freebl/ecl/ecl-exp.h              |   167 +
 security/nss/lib/freebl/ecl/ecl-priv.h             |   257 +
 security/nss/lib/freebl/ecl/ecl.c                  |   301 +
 security/nss/lib/freebl/ecl/ecl.h                  |    60 +
 security/nss/lib/freebl/ecl/ecl_curve.c            |    93 +
 security/nss/lib/freebl/ecl/ecl_gf.c               |   958 ++
 security/nss/lib/freebl/ecl/ecl_mult.c             |   305 +
 security/nss/lib/freebl/ecl/ecp.h                  |   106 +
 security/nss/lib/freebl/ecl/ecp_25519.c            |   120 +
 security/nss/lib/freebl/ecl/ecp_256.c              |   401 +
 security/nss/lib/freebl/ecl/ecp_256_32.c           |  1535 ++
 security/nss/lib/freebl/ecl/ecp_384.c              |   258 +
 security/nss/lib/freebl/ecl/ecp_521.c              |   137 +
 security/nss/lib/freebl/ecl/ecp_aff.c              |   308 +
 security/nss/lib/freebl/ecl/ecp_jac.c              |   513 +
 security/nss/lib/freebl/ecl/ecp_jm.c               |   283 +
 security/nss/lib/freebl/ecl/ecp_mont.c             |   154 +
 security/nss/lib/freebl/ecl/tests/ec_naft.c        |   121 +
 security/nss/lib/freebl/ecl/tests/ecp_test.c       |   409 +
 security/nss/lib/freebl/ecl/uint128.c              |    87 +
 security/nss/lib/freebl/ecl/uint128.h              |    35 +
 security/nss/lib/freebl/exports.gyp                |    48 +
 security/nss/lib/freebl/fipsfreebl.c               |  1715 ++
 security/nss/lib/freebl/freebl.def                 |    26 +
 security/nss/lib/freebl/freebl.gyp                 |   408 +
 security/nss/lib/freebl/freebl.rc                  |    68 +
 security/nss/lib/freebl/freebl_hash.def            |    39 +
 security/nss/lib/freebl/freebl_hash_vector.def     |    34 +
 security/nss/lib/freebl/freeblver.c                |    18 +
 security/nss/lib/freebl/gcm.c                      |   860 +
 security/nss/lib/freebl/gcm.h                      |    31 +
 security/nss/lib/freebl/genload.c                  |   167 +
 security/nss/lib/freebl/hmacct.c                   |   335 +
 security/nss/lib/freebl/hmacct.h                   |    38 +
 security/nss/lib/freebl/intel-aes-x64-masm.asm     |   971 ++
 security/nss/lib/freebl/intel-aes-x86-masm.asm     |   949 ++
 security/nss/lib/freebl/intel-aes.h                |   143 +
 security/nss/lib/freebl/intel-aes.s                |  2514 +++
 security/nss/lib/freebl/intel-gcm-wrap.c           |   254 +
 security/nss/lib/freebl/intel-gcm-x64-masm.asm     |  1295 ++
 security/nss/lib/freebl/intel-gcm-x86-masm.asm     |  1209 ++
 security/nss/lib/freebl/intel-gcm.h                |    83 +
 security/nss/lib/freebl/intel-gcm.s                |  1340 ++
 security/nss/lib/freebl/jpake.c                    |   495 +
 security/nss/lib/freebl/ldvector.c                 |   353 +
 security/nss/lib/freebl/loader.c                   |  2126 +++
 security/nss/lib/freebl/loader.h                   |   788 +
 security/nss/lib/freebl/lowhash_vector.c           |   217 +
 security/nss/lib/freebl/manifest.mn                |   195 +
 security/nss/lib/freebl/md2.c                      |   269 +
 security/nss/lib/freebl/md5.c                      |   598 +
 security/nss/lib/freebl/mknewpc2.c                 |   208 +
 security/nss/lib/freebl/mksp.c                     |   119 +
 security/nss/lib/freebl/mpi/Makefile               |   244 +
 security/nss/lib/freebl/mpi/Makefile.os2           |   243 +
 security/nss/lib/freebl/mpi/Makefile.win           |   254 +
 security/nss/lib/freebl/mpi/README                 |   749 +
 security/nss/lib/freebl/mpi/all-tests              |    83 +
 security/nss/lib/freebl/mpi/doc/LICENSE            |    11 +
 security/nss/lib/freebl/mpi/doc/LICENSE-MPL        |     3 +
 security/nss/lib/freebl/mpi/doc/basecvt.pod        |    65 +
 security/nss/lib/freebl/mpi/doc/build              |    30 +
 security/nss/lib/freebl/mpi/doc/div.txt            |    64 +
 security/nss/lib/freebl/mpi/doc/expt.txt           |    94 +
 security/nss/lib/freebl/mpi/doc/gcd.pod            |    28 +
 security/nss/lib/freebl/mpi/doc/invmod.pod         |    34 +
 security/nss/lib/freebl/mpi/doc/isprime.pod        |    63 +
 security/nss/lib/freebl/mpi/doc/lap.pod            |    36 +
 security/nss/lib/freebl/mpi/doc/mpi-test.pod       |    51 +
 security/nss/lib/freebl/mpi/doc/mul.txt            |    77 +
 security/nss/lib/freebl/mpi/doc/pi.txt             |    53 +
 security/nss/lib/freebl/mpi/doc/prime.txt          |  6542 ++++++++
 security/nss/lib/freebl/mpi/doc/prng.pod           |    38 +
 security/nss/lib/freebl/mpi/doc/redux.txt          |    86 +
 security/nss/lib/freebl/mpi/doc/sqrt.txt           |    50 +
 security/nss/lib/freebl/mpi/doc/square.txt         |    72 +
 security/nss/lib/freebl/mpi/doc/timing.txt         |   213 +
 security/nss/lib/freebl/mpi/hpma512.s              |   615 +
 security/nss/lib/freebl/mpi/hppa20.s               |   904 ++
 security/nss/lib/freebl/mpi/hppatch.adb            |    21 +
 security/nss/lib/freebl/mpi/logtab.h               |    28 +
 security/nss/lib/freebl/mpi/make-logtab            |    29 +
 security/nss/lib/freebl/mpi/make-test-arrays       |    98 +
 security/nss/lib/freebl/mpi/mdxptest.c             |   306 +
 security/nss/lib/freebl/mpi/montmulf.c             |   286 +
 security/nss/lib/freebl/mpi/montmulf.h             |    65 +
 security/nss/lib/freebl/mpi/montmulf.il            |   108 +
 security/nss/lib/freebl/mpi/montmulf.s             |  1938 +++
 security/nss/lib/freebl/mpi/montmulfv8.il          |   108 +
 security/nss/lib/freebl/mpi/montmulfv8.s           |  1818 +++
 security/nss/lib/freebl/mpi/montmulfv9.il          |    93 +
 security/nss/lib/freebl/mpi/montmulfv9.s           |  2346 +++
 security/nss/lib/freebl/mpi/mp_comba.c             |  3235 ++++
 .../nss/lib/freebl/mpi/mp_comba_amd64_masm.asm     | 13066 +++++++++++++++
 security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s   | 16097 +++++++++++++++++++
 security/nss/lib/freebl/mpi/mp_gf2m-priv.h         |    73 +
 security/nss/lib/freebl/mpi/mp_gf2m.c              |   678 +
 security/nss/lib/freebl/mpi/mp_gf2m.h              |    28 +
 security/nss/lib/freebl/mpi/mpcpucache.c           |   808 +
 security/nss/lib/freebl/mpi/mpcpucache_amd64.s     |   861 +
 security/nss/lib/freebl/mpi/mpcpucache_x86.s       |   902 ++
 security/nss/lib/freebl/mpi/mpi-config.h           |    68 +
 security/nss/lib/freebl/mpi/mpi-priv.h             |   243 +
 security/nss/lib/freebl/mpi/mpi.c                  |  4839 ++++++
 security/nss/lib/freebl/mpi/mpi.h                  |   313 +
 security/nss/lib/freebl/mpi/mpi_amd64.c            |    32 +
 security/nss/lib/freebl/mpi/mpi_amd64_gas.s        |   389 +
 security/nss/lib/freebl/mpi/mpi_amd64_masm.asm     |   388 +
 security/nss/lib/freebl/mpi/mpi_amd64_sun.s        |   385 +
 security/nss/lib/freebl/mpi/mpi_arm.c              |   175 +
 security/nss/lib/freebl/mpi/mpi_hp.c               |    81 +
 security/nss/lib/freebl/mpi/mpi_i86pc.s            |   313 +
 security/nss/lib/freebl/mpi/mpi_mips.s             |   472 +
 security/nss/lib/freebl/mpi/mpi_sparc.c            |   226 +
 security/nss/lib/freebl/mpi/mpi_sse2.s             |   294 +
 security/nss/lib/freebl/mpi/mpi_x86.s              |   541 +
 security/nss/lib/freebl/mpi/mpi_x86_asm.c          |   531 +
 security/nss/lib/freebl/mpi/mpi_x86_os2.s          |   538 +
 security/nss/lib/freebl/mpi/mplogic.c              |   443 +
 security/nss/lib/freebl/mpi/mplogic.h              |    52 +
 security/nss/lib/freebl/mpi/mpmontg.c              |  1141 ++
 security/nss/lib/freebl/mpi/mpprime.c              |   599 +
 security/nss/lib/freebl/mpi/mpprime.h              |    38 +
 security/nss/lib/freebl/mpi/mpv_sparc.c            |   221 +
 security/nss/lib/freebl/mpi/mpv_sparcv8.s          |  1607 ++
 security/nss/lib/freebl/mpi/mpv_sparcv9.s          |  1645 ++
 security/nss/lib/freebl/mpi/mpvalpha.c             |   183 +
 security/nss/lib/freebl/mpi/mulsqr.c               |    84 +
 security/nss/lib/freebl/mpi/multest                |    76 +
 security/nss/lib/freebl/mpi/primes.c               |   841 +
 security/nss/lib/freebl/mpi/stats                  |    39 +
 security/nss/lib/freebl/mpi/target.mk              |   233 +
 security/nss/lib/freebl/mpi/test-arrays.txt        |    55 +
 security/nss/lib/freebl/mpi/tests/LICENSE          |     6 +
 security/nss/lib/freebl/mpi/tests/LICENSE-MPL      |     3 +
 security/nss/lib/freebl/mpi/tests/mptest-1.c       |    43 +
 security/nss/lib/freebl/mpi/tests/mptest-2.c       |    62 +
 security/nss/lib/freebl/mpi/tests/mptest-3.c       |   105 +
 security/nss/lib/freebl/mpi/tests/mptest-3a.c      |   123 +
 security/nss/lib/freebl/mpi/tests/mptest-4.c       |   111 +
 security/nss/lib/freebl/mpi/tests/mptest-4a.c      |   109 +
 security/nss/lib/freebl/mpi/tests/mptest-4b.c      |   107 +
 security/nss/lib/freebl/mpi/tests/mptest-5.c       |    85 +
 security/nss/lib/freebl/mpi/tests/mptest-5a.c      |   147 +
 security/nss/lib/freebl/mpi/tests/mptest-6.c       |    78 +
 security/nss/lib/freebl/mpi/tests/mptest-7.c       |    85 +
 security/nss/lib/freebl/mpi/tests/mptest-8.c       |    68 +
 security/nss/lib/freebl/mpi/tests/mptest-9.c       |   109 +
 security/nss/lib/freebl/mpi/tests/mptest-b.c       |   230 +
 security/nss/lib/freebl/mpi/tests/pi1k.txt         |     1 +
 security/nss/lib/freebl/mpi/tests/pi2k.txt         |     1 +
 security/nss/lib/freebl/mpi/tests/pi5k.txt         |     1 +
 security/nss/lib/freebl/mpi/timetest               |    99 +
 security/nss/lib/freebl/mpi/types.pl               |   127 +
 security/nss/lib/freebl/mpi/utils/LICENSE          |     4 +
 security/nss/lib/freebl/mpi/utils/LICENSE-MPL      |     3 +
 security/nss/lib/freebl/mpi/utils/PRIMES           |    41 +
 security/nss/lib/freebl/mpi/utils/README           |   206 +
 security/nss/lib/freebl/mpi/utils/basecvt.c        |    68 +
 security/nss/lib/freebl/mpi/utils/bbs_rand.c       |    65 +
 security/nss/lib/freebl/mpi/utils/bbs_rand.h       |    24 +
 security/nss/lib/freebl/mpi/utils/bbsrand.c        |    35 +
 security/nss/lib/freebl/mpi/utils/dec2hex.c        |    40 +
 security/nss/lib/freebl/mpi/utils/exptmod.c        |    55 +
 security/nss/lib/freebl/mpi/utils/fact.c           |    84 +
 security/nss/lib/freebl/mpi/utils/gcd.c            |    95 +
 security/nss/lib/freebl/mpi/utils/hex2dec.c        |    40 +
 security/nss/lib/freebl/mpi/utils/identest.c       |    84 +
 security/nss/lib/freebl/mpi/utils/invmod.c         |    61 +
 security/nss/lib/freebl/mpi/utils/isprime.c        |    89 +
 security/nss/lib/freebl/mpi/utils/lap.c            |    90 +
 security/nss/lib/freebl/mpi/utils/makeprime.c      |   116 +
 security/nss/lib/freebl/mpi/utils/metime.c         |   102 +
 security/nss/lib/freebl/mpi/utils/pi.c             |   171 +
 security/nss/lib/freebl/mpi/utils/primegen.c       |   159 +
 security/nss/lib/freebl/mpi/utils/prng.c           |    57 +
 security/nss/lib/freebl/mpi/utils/ptab.pl          |    26 +
 security/nss/lib/freebl/mpi/utils/sieve.c          |   243 +
 security/nss/lib/freebl/mpi/vis_32.il              |  1291 ++
 security/nss/lib/freebl/mpi/vis_64.il              |   997 ++
 security/nss/lib/freebl/mpi/vis_proto.h            |   234 +
 security/nss/lib/freebl/nsslowhash.c               |   150 +
 security/nss/lib/freebl/nsslowhash.h               |    33 +
 security/nss/lib/freebl/os2_rand.c                 |   334 +
 .../poly1305-donna-x64-sse2-incremental-source.c   |   881 +
 security/nss/lib/freebl/poly1305.c                 |   314 +
 security/nss/lib/freebl/poly1305.h                 |    28 +
 security/nss/lib/freebl/pqg.c                      |  1878 +++
 security/nss/lib/freebl/pqg.h                      |    25 +
 security/nss/lib/freebl/rawhash.c                  |   154 +
 security/nss/lib/freebl/ret_cr16.s                 |    27 +
 security/nss/lib/freebl/rijndael.c                 |  1375 ++
 security/nss/lib/freebl/rijndael.h                 |    67 +
 security/nss/lib/freebl/rijndael32.tab             |  1219 ++
 security/nss/lib/freebl/rijndael_tables.c          |   215 +
 security/nss/lib/freebl/rsa.c                      |  1625 ++
 security/nss/lib/freebl/rsapkcs.c                  |  1385 ++
 security/nss/lib/freebl/secmpi.h                   |    54 +
 security/nss/lib/freebl/secrng.h                   |    65 +
 security/nss/lib/freebl/seed.c                     |   641 +
 security/nss/lib/freebl/seed.h                     |   125 +
 security/nss/lib/freebl/sha-fast-amd64-sun.s       |  2151 +++
 security/nss/lib/freebl/sha256.h                   |    19 +
 security/nss/lib/freebl/sha512.c                   |  1655 ++
 security/nss/lib/freebl/sha_fast.c                 |   545 +
 security/nss/lib/freebl/sha_fast.h                 |   176 +
 security/nss/lib/freebl/shsign.h                   |    14 +
 security/nss/lib/freebl/shvfy.c                    |   534 +
 security/nss/lib/freebl/stubs.c                    |   711 +
 security/nss/lib/freebl/stubs.h                    |    66 +
 security/nss/lib/freebl/sysrand.c                  |    49 +
 security/nss/lib/freebl/tlsprfalg.c                |   134 +
 security/nss/lib/freebl/unix_rand.c                |  1176 ++
 security/nss/lib/freebl/win_rand.c                 |   161 +
 256 files changed, 133850 insertions(+)
 create mode 100644 security/nss/lib/freebl/Makefile
 create mode 100644 security/nss/lib/freebl/aeskeywrap.c
 create mode 100644 security/nss/lib/freebl/alg2268.c
 create mode 100644 security/nss/lib/freebl/alghmac.c
 create mode 100644 security/nss/lib/freebl/alghmac.h
 create mode 100644 security/nss/lib/freebl/arcfive.c
 create mode 100644 security/nss/lib/freebl/arcfour-amd64-gas.s
 create mode 100644 security/nss/lib/freebl/arcfour-amd64-masm.asm
 create mode 100644 security/nss/lib/freebl/arcfour-amd64-sun.s
 create mode 100644 security/nss/lib/freebl/arcfour.c
 create mode 100644 security/nss/lib/freebl/blapi.h
 create mode 100644 security/nss/lib/freebl/blapii.h
 create mode 100644 security/nss/lib/freebl/blapit.h
 create mode 100644 security/nss/lib/freebl/blname.c
 create mode 100644 security/nss/lib/freebl/camellia.c
 create mode 100644 security/nss/lib/freebl/camellia.h
 create mode 100644 security/nss/lib/freebl/chacha20.c
 create mode 100644 security/nss/lib/freebl/chacha20.h
 create mode 100644 security/nss/lib/freebl/chacha20_vec.c
 create mode 100644 security/nss/lib/freebl/chacha20poly1305.c
 create mode 100644 security/nss/lib/freebl/chacha20poly1305.h
 create mode 100644 security/nss/lib/freebl/config.mk
 create mode 100644 security/nss/lib/freebl/ctr.c
 create mode 100644 security/nss/lib/freebl/ctr.h
 create mode 100644 security/nss/lib/freebl/cts.c
 create mode 100644 security/nss/lib/freebl/cts.h
 create mode 100644 security/nss/lib/freebl/des.c
 create mode 100644 security/nss/lib/freebl/des.h
 create mode 100644 security/nss/lib/freebl/desblapi.c
 create mode 100644 security/nss/lib/freebl/det_rng.c
 create mode 100644 security/nss/lib/freebl/det_rng.h
 create mode 100644 security/nss/lib/freebl/dh.c
 create mode 100644 security/nss/lib/freebl/drbg.c
 create mode 100644 security/nss/lib/freebl/dsa.c
 create mode 100644 security/nss/lib/freebl/ec.c
 create mode 100644 security/nss/lib/freebl/ec.h
 create mode 100644 security/nss/lib/freebl/ecdecode.c
 create mode 100644 security/nss/lib/freebl/ecl/README
 create mode 100644 security/nss/lib/freebl/ecl/curve25519_32.c
 create mode 100644 security/nss/lib/freebl/ecl/curve25519_64.c
 create mode 100644 security/nss/lib/freebl/ecl/ec_naf.c
 create mode 100644 security/nss/lib/freebl/ecl/ecl-curve.h
 create mode 100644 security/nss/lib/freebl/ecl/ecl-exp.h
 create mode 100644 security/nss/lib/freebl/ecl/ecl-priv.h
 create mode 100644 security/nss/lib/freebl/ecl/ecl.c
 create mode 100644 security/nss/lib/freebl/ecl/ecl.h
 create mode 100644 security/nss/lib/freebl/ecl/ecl_curve.c
 create mode 100644 security/nss/lib/freebl/ecl/ecl_gf.c
 create mode 100644 security/nss/lib/freebl/ecl/ecl_mult.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp.h
 create mode 100644 security/nss/lib/freebl/ecl/ecp_25519.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp_256.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp_256_32.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp_384.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp_521.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp_aff.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp_jac.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp_jm.c
 create mode 100644 security/nss/lib/freebl/ecl/ecp_mont.c
 create mode 100644 security/nss/lib/freebl/ecl/tests/ec_naft.c
 create mode 100644 security/nss/lib/freebl/ecl/tests/ecp_test.c
 create mode 100644 security/nss/lib/freebl/ecl/uint128.c
 create mode 100644 security/nss/lib/freebl/ecl/uint128.h
 create mode 100644 security/nss/lib/freebl/exports.gyp
 create mode 100644 security/nss/lib/freebl/fipsfreebl.c
 create mode 100644 security/nss/lib/freebl/freebl.def
 create mode 100644 security/nss/lib/freebl/freebl.gyp
 create mode 100644 security/nss/lib/freebl/freebl.rc
 create mode 100644 security/nss/lib/freebl/freebl_hash.def
 create mode 100644 security/nss/lib/freebl/freebl_hash_vector.def
 create mode 100644 security/nss/lib/freebl/freeblver.c
 create mode 100644 security/nss/lib/freebl/gcm.c
 create mode 100644 security/nss/lib/freebl/gcm.h
 create mode 100644 security/nss/lib/freebl/genload.c
 create mode 100644 security/nss/lib/freebl/hmacct.c
 create mode 100644 security/nss/lib/freebl/hmacct.h
 create mode 100644 security/nss/lib/freebl/intel-aes-x64-masm.asm
 create mode 100644 security/nss/lib/freebl/intel-aes-x86-masm.asm
 create mode 100644 security/nss/lib/freebl/intel-aes.h
 create mode 100644 security/nss/lib/freebl/intel-aes.s
 create mode 100644 security/nss/lib/freebl/intel-gcm-wrap.c
 create mode 100644 security/nss/lib/freebl/intel-gcm-x64-masm.asm
 create mode 100644 security/nss/lib/freebl/intel-gcm-x86-masm.asm
 create mode 100644 security/nss/lib/freebl/intel-gcm.h
 create mode 100644 security/nss/lib/freebl/intel-gcm.s
 create mode 100644 security/nss/lib/freebl/jpake.c
 create mode 100644 security/nss/lib/freebl/ldvector.c
 create mode 100644 security/nss/lib/freebl/loader.c
 create mode 100644 security/nss/lib/freebl/loader.h
 create mode 100644 security/nss/lib/freebl/lowhash_vector.c
 create mode 100644 security/nss/lib/freebl/manifest.mn
 create mode 100644 security/nss/lib/freebl/md2.c
 create mode 100644 security/nss/lib/freebl/md5.c
 create mode 100644 security/nss/lib/freebl/mknewpc2.c
 create mode 100644 security/nss/lib/freebl/mksp.c
 create mode 100644 security/nss/lib/freebl/mpi/Makefile
 create mode 100644 security/nss/lib/freebl/mpi/Makefile.os2
 create mode 100644 security/nss/lib/freebl/mpi/Makefile.win
 create mode 100644 security/nss/lib/freebl/mpi/README
 create mode 100755 security/nss/lib/freebl/mpi/all-tests
 create mode 100644 security/nss/lib/freebl/mpi/doc/LICENSE
 create mode 100644 security/nss/lib/freebl/mpi/doc/LICENSE-MPL
 create mode 100644 security/nss/lib/freebl/mpi/doc/basecvt.pod
 create mode 100755 security/nss/lib/freebl/mpi/doc/build
 create mode 100644 security/nss/lib/freebl/mpi/doc/div.txt
 create mode 100644 security/nss/lib/freebl/mpi/doc/expt.txt
 create mode 100644 security/nss/lib/freebl/mpi/doc/gcd.pod
 create mode 100644 security/nss/lib/freebl/mpi/doc/invmod.pod
 create mode 100644 security/nss/lib/freebl/mpi/doc/isprime.pod
 create mode 100644 security/nss/lib/freebl/mpi/doc/lap.pod
 create mode 100644 security/nss/lib/freebl/mpi/doc/mpi-test.pod
 create mode 100644 security/nss/lib/freebl/mpi/doc/mul.txt
 create mode 100644 security/nss/lib/freebl/mpi/doc/pi.txt
 create mode 100644 security/nss/lib/freebl/mpi/doc/prime.txt
 create mode 100644 security/nss/lib/freebl/mpi/doc/prng.pod
 create mode 100644 security/nss/lib/freebl/mpi/doc/redux.txt
 create mode 100644 security/nss/lib/freebl/mpi/doc/sqrt.txt
 create mode 100644 security/nss/lib/freebl/mpi/doc/square.txt
 create mode 100644 security/nss/lib/freebl/mpi/doc/timing.txt
 create mode 100644 security/nss/lib/freebl/mpi/hpma512.s
 create mode 100644 security/nss/lib/freebl/mpi/hppa20.s
 create mode 100644 security/nss/lib/freebl/mpi/hppatch.adb
 create mode 100644 security/nss/lib/freebl/mpi/logtab.h
 create mode 100755 security/nss/lib/freebl/mpi/make-logtab
 create mode 100755 security/nss/lib/freebl/mpi/make-test-arrays
 create mode 100644 security/nss/lib/freebl/mpi/mdxptest.c
 create mode 100644 security/nss/lib/freebl/mpi/montmulf.c
 create mode 100644 security/nss/lib/freebl/mpi/montmulf.h
 create mode 100644 security/nss/lib/freebl/mpi/montmulf.il
 create mode 100644 security/nss/lib/freebl/mpi/montmulf.s
 create mode 100644 security/nss/lib/freebl/mpi/montmulfv8.il
 create mode 100644 security/nss/lib/freebl/mpi/montmulfv8.s
 create mode 100644 security/nss/lib/freebl/mpi/montmulfv9.il
 create mode 100644 security/nss/lib/freebl/mpi/montmulfv9.s
 create mode 100644 security/nss/lib/freebl/mpi/mp_comba.c
 create mode 100644 security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
 create mode 100644 security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
 create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m-priv.h
 create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m.c
 create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m.h
 create mode 100644 security/nss/lib/freebl/mpi/mpcpucache.c
 create mode 100644 security/nss/lib/freebl/mpi/mpcpucache_amd64.s
 create mode 100644 security/nss/lib/freebl/mpi/mpcpucache_x86.s
 create mode 100644 security/nss/lib/freebl/mpi/mpi-config.h
 create mode 100644 security/nss/lib/freebl/mpi/mpi-priv.h
 create mode 100644 security/nss/lib/freebl/mpi/mpi.c
 create mode 100644 security/nss/lib/freebl/mpi/mpi.h
 create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64.c
 create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_gas.s
 create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
 create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_sun.s
 create mode 100644 security/nss/lib/freebl/mpi/mpi_arm.c
 create mode 100644 security/nss/lib/freebl/mpi/mpi_hp.c
 create mode 100644 security/nss/lib/freebl/mpi/mpi_i86pc.s
 create mode 100644 security/nss/lib/freebl/mpi/mpi_mips.s
 create mode 100644 security/nss/lib/freebl/mpi/mpi_sparc.c
 create mode 100644 security/nss/lib/freebl/mpi/mpi_sse2.s
 create mode 100644 security/nss/lib/freebl/mpi/mpi_x86.s
 create mode 100644 security/nss/lib/freebl/mpi/mpi_x86_asm.c
 create mode 100644 security/nss/lib/freebl/mpi/mpi_x86_os2.s
 create mode 100644 security/nss/lib/freebl/mpi/mplogic.c
 create mode 100644 security/nss/lib/freebl/mpi/mplogic.h
 create mode 100644 security/nss/lib/freebl/mpi/mpmontg.c
 create mode 100644 security/nss/lib/freebl/mpi/mpprime.c
 create mode 100644 security/nss/lib/freebl/mpi/mpprime.h
 create mode 100644 security/nss/lib/freebl/mpi/mpv_sparc.c
 create mode 100644 security/nss/lib/freebl/mpi/mpv_sparcv8.s
 create mode 100644 security/nss/lib/freebl/mpi/mpv_sparcv9.s
 create mode 100644 security/nss/lib/freebl/mpi/mpvalpha.c
 create mode 100644 security/nss/lib/freebl/mpi/mulsqr.c
 create mode 100755 security/nss/lib/freebl/mpi/multest
 create mode 100644 security/nss/lib/freebl/mpi/primes.c
 create mode 100755 security/nss/lib/freebl/mpi/stats
 create mode 100644 security/nss/lib/freebl/mpi/target.mk
 create mode 100644 security/nss/lib/freebl/mpi/test-arrays.txt
 create mode 100644 security/nss/lib/freebl/mpi/tests/LICENSE
 create mode 100644 security/nss/lib/freebl/mpi/tests/LICENSE-MPL
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-1.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-2.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-3.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-3a.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-4.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-4a.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-4b.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-5.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-5a.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-6.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-7.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-8.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-9.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-b.c
 create mode 100644 security/nss/lib/freebl/mpi/tests/pi1k.txt
 create mode 100644 security/nss/lib/freebl/mpi/tests/pi2k.txt
 create mode 100644 security/nss/lib/freebl/mpi/tests/pi5k.txt
 create mode 100755 security/nss/lib/freebl/mpi/timetest
 create mode 100755 security/nss/lib/freebl/mpi/types.pl
 create mode 100644 security/nss/lib/freebl/mpi/utils/LICENSE
 create mode 100644 security/nss/lib/freebl/mpi/utils/LICENSE-MPL
 create mode 100644 security/nss/lib/freebl/mpi/utils/PRIMES
 create mode 100644 security/nss/lib/freebl/mpi/utils/README
 create mode 100644 security/nss/lib/freebl/mpi/utils/basecvt.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/bbs_rand.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/bbs_rand.h
 create mode 100644 security/nss/lib/freebl/mpi/utils/bbsrand.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/dec2hex.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/exptmod.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/fact.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/gcd.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/hex2dec.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/identest.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/invmod.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/isprime.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/lap.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/makeprime.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/metime.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/pi.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/primegen.c
 create mode 100644 security/nss/lib/freebl/mpi/utils/prng.c
 create mode 100755 security/nss/lib/freebl/mpi/utils/ptab.pl
 create mode 100644 security/nss/lib/freebl/mpi/utils/sieve.c
 create mode 100644 security/nss/lib/freebl/mpi/vis_32.il
 create mode 100644 security/nss/lib/freebl/mpi/vis_64.il
 create mode 100644 security/nss/lib/freebl/mpi/vis_proto.h
 create mode 100644 security/nss/lib/freebl/nsslowhash.c
 create mode 100644 security/nss/lib/freebl/nsslowhash.h
 create mode 100644 security/nss/lib/freebl/os2_rand.c
 create mode 100644 security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
 create mode 100644 security/nss/lib/freebl/poly1305.c
 create mode 100644 security/nss/lib/freebl/poly1305.h
 create mode 100644 security/nss/lib/freebl/pqg.c
 create mode 100644 security/nss/lib/freebl/pqg.h
 create mode 100644 security/nss/lib/freebl/rawhash.c
 create mode 100644 security/nss/lib/freebl/ret_cr16.s
 create mode 100644 security/nss/lib/freebl/rijndael.c
 create mode 100644 security/nss/lib/freebl/rijndael.h
 create mode 100644 security/nss/lib/freebl/rijndael32.tab
 create mode 100644 security/nss/lib/freebl/rijndael_tables.c
 create mode 100644 security/nss/lib/freebl/rsa.c
 create mode 100644 security/nss/lib/freebl/rsapkcs.c
 create mode 100644 security/nss/lib/freebl/secmpi.h
 create mode 100644 security/nss/lib/freebl/secrng.h
 create mode 100644 security/nss/lib/freebl/seed.c
 create mode 100644 security/nss/lib/freebl/seed.h
 create mode 100644 security/nss/lib/freebl/sha-fast-amd64-sun.s
 create mode 100644 security/nss/lib/freebl/sha256.h
 create mode 100644 security/nss/lib/freebl/sha512.c
 create mode 100644 security/nss/lib/freebl/sha_fast.c
 create mode 100644 security/nss/lib/freebl/sha_fast.h
 create mode 100644 security/nss/lib/freebl/shsign.h
 create mode 100644 security/nss/lib/freebl/shvfy.c
 create mode 100644 security/nss/lib/freebl/stubs.c
 create mode 100644 security/nss/lib/freebl/stubs.h
 create mode 100644 security/nss/lib/freebl/sysrand.c
 create mode 100644 security/nss/lib/freebl/tlsprfalg.c
 create mode 100644 security/nss/lib/freebl/unix_rand.c
 create mode 100644 security/nss/lib/freebl/win_rand.c

(limited to 'security/nss/lib/freebl')

diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile
new file mode 100644
index 000000000..0ce1425f1
--- /dev/null
+++ b/security/nss/lib/freebl/Makefile
@@ -0,0 +1,764 @@
+#! gmake
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#######################################################################
+# (1) Include initial platform-independent assignments (MANDATORY).   #
+#######################################################################
+
+include manifest.mn
+
+#######################################################################
+# (2) Include "global" configuration information. (OPTIONAL)          #
+#######################################################################
+
+include $(CORE_DEPTH)/coreconf/config.mk
+
+#######################################################################
+# (3) Include "component" configuration information. (OPTIONAL)       #
+#######################################################################
+
+
+
+#######################################################################
+# (4) Include "local" platform-dependent assignments (OPTIONAL).      #
+#######################################################################
+
+include config.mk
+
+# default for all platforms
+# unset this on those that have multiple freebl libraries
+FREEBL_BUILD_SINGLE_SHLIB = 1
+
+ifdef USE_64
+	DEFINES += -DNSS_USE_64
+endif
+
+ifdef USE_ABI32_FPU
+	DEFINES += -DNSS_USE_ABI32_FPU
+endif
+
+ifeq ($(FREEBL_NO_DEPEND),1)
+	DEFINES += -DFREEBL_NO_DEPEND
+	STUBS_SRCS = stubs.c
+endif
+
+ifeq ($(FREEBL_LOWHASH),1)
+	DEFINES += -DFREEBL_LOWHASH
+	LOWHASH_SRCS = nsslowhash.c
+	LOWHASH_EXPORTS = nsslowhash.h
+	MAPFILE_SOURCE = freebl_hash_vector.def
+	NEED_STUB_BUILD = 1
+else
+	MAPFILE_SOURCE = freebl.def
+endif
+
+ifdef USE_STUB_BUILD
+	CSRCS           = lowhash_vector.c
+	SIMPLE_OBJS     = $(CSRCS:.c=$(OBJ_SUFFIX))
+	OBJS            = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS))
+	ALL_TRASH :=    $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \
+                $(NOSUCHFILE) so_locations
+	MAPFILE_SOURCE = freebl_hash.def
+endif
+
+# FREEBL_USE_PRELINK
+#
+# Most modern version of Linux support a speed optimization scheme where an
+# application called prelink modifies programs and shared libraries to quickly
+# load if they fit into an already designed address space. In short, prelink
+# scans the list of programs and libraries on your system, assigns them a
+# predefined space in the the address space, then provides the fixups to the
+# library.
+#
+# The modification of the shared library is correctly detected by the freebl
+# FIPS checksum scheme where we check a signed hash of the library against the
+# library itself.
+#
+# The prelink command itself can reverse the process of modification and output
+# the prestine shared library as it was before prelink made it's changes.
+# This option tells Freebl could use prelink to output the original copy of
+# the shared library before prelink modified it.
+#
+# FREEBL_PRELINK_COMMAND
+#
+# This is an optional environment variable which can override the default
+# prelink command. It could be used on systems that did something similiar to 
+# prelink but used a different command and syntax. The only requirement is the 
+# program must take the library as the last argument, the program must output 
+# the original library to standard out, and the program does not need to take 
+# any quoted or imbedded spaces in its arguments (except the path to the 
+# library itself, which can have imbedded spaces or special characters).
+#
+ifdef FREEBL_USE_PRELINK
+	DEFINES += -DFREEBL_USE_PRELINK
+ifdef LINUX
+	DEFINES += -D__GNU_SOURCE=1
+endif
+endif
+ifdef NSS_NO_INIT_SUPPORT
+    DEFINES += -DNSS_NO_INIT_SUPPORT
+endif
+
+ifdef FREEBL_PRELINK_COMMAND
+	DEFINES +=-DFREEBL_PRELINK_COMMAND=\"$(FREEBL_PRELINK_COMMAND)\"
+endif
+# NSS_X86 means the target is a 32-bits x86 CPU architecture
+# NSS_X64 means the target is a 64-bits 64 CPU architecture
+# NSS_X86_OR_X64 means the target is either x86 or x64
+ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH)))
+        DEFINES += -DNSS_X86_OR_X64
+ifneq (,$(USE_64)$(USE_X32))
+        DEFINES += -DNSS_X64
+else
+        DEFINES += -DNSS_X86
+endif
+endif
+
+ifeq ($(OS_TARGET),OSF1)
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_NO_MP_WORD
+    MPI_SRCS += mpvalpha.c
+endif
+
+ifeq (OS2,$(OS_TARGET))
+    ASFILES  = mpi_x86_os2.s
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE 
+    DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+    DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+    DEFINES += -DMP_IS_LITTLE_ENDIAN
+endif
+
+ifeq (,$(filter-out WINNT WIN95,$(OS_TARGET)))
+ifndef USE_64
+# 32-bit Windows
+ifdef NS_USE_GCC
+# Ideally, we want to use assembler
+#     ASFILES  = mpi_x86.s
+#     DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE \
+#                -DMP_ASSEMBLY_DIV_2DX1D
+# but we haven't figured out how to make it work, so we are not
+# using assembler right now.
+    ASFILES  =
+    DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT
+else
+# MSVC
+    MPI_SRCS += mpi_x86_asm.c
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE 
+    DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+    ifdef BUILD_OPT
+	OPTIMIZER += -Ox  # maximum optimization for freebl
+    endif
+    # The Intel AES assembly code requires Visual C++ 2010.
+    # if $(_MSC_VER) >= 1600 (Visual C++ 2010)
+    ifeq ($(firstword $(sort $(_MSC_VER) 1600)),1600)
+	DEFINES += -DUSE_HW_AES -DINTEL_GCM
+	ASFILES += intel-aes-x86-masm.asm intel-gcm-x86-masm.asm
+	EXTRA_SRCS += intel-gcm-wrap.c
+	ifeq ($(CLANG_CL),1)
+	    INTEL_GCM_CLANG_CL = 1
+	endif
+    endif
+endif
+else
+    # -DMP_NO_MP_WORD
+    DEFINES += -DMP_IS_LITTLE_ENDIAN
+ifdef NS_USE_GCC
+# Ideally, we should use amd64 assembly code, but it's not yet mingw-w64
+# compatible.
+else
+# MSVC
+    ifdef BUILD_OPT
+	OPTIMIZER += -Ox  # maximum optimization for freebl
+    endif
+    ASFILES  = arcfour-amd64-masm.asm mpi_amd64_masm.asm mp_comba_amd64_masm.asm
+    DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+    DEFINES += -DNSS_USE_COMBA
+    # The Intel AES assembly code requires Visual C++ 2010 (10.0). The _xgetbv
+    # compiler intrinsic function requires Visual C++ 2010 (10.0) SP1.
+    ifeq ($(_MSC_VER_GE_10SP1),1)
+	DEFINES += -DUSE_HW_AES -DINTEL_GCM
+	ASFILES += intel-aes-x64-masm.asm intel-gcm-x64-masm.asm
+	EXTRA_SRCS += intel-gcm-wrap.c
+	ifeq ($(CLANG_CL),1)
+	    INTEL_GCM_CLANG_CL = 1
+	endif
+    endif
+    MPI_SRCS += mpi_amd64.c
+endif
+endif
+endif
+
+ifeq ($(OS_TARGET),IRIX)
+ifeq ($(USE_N32),1)
+    ASFILES  = mpi_mips.s
+    ifeq ($(NS_USE_GCC),1)
+	ASFLAGS = -Wp,-P -Wp,-traditional -O -mips3
+    else
+	ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 
+    endif
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+    DEFINES += -DMP_USE_UINT_DIGIT
+endif
+endif
+
+ifeq ($(OS_TARGET),Darwin)
+ifeq ($(CPU_ARCH),x86)
+    ASFILES  = mpi_sse2.s
+    DEFINES += -DMP_USE_UINT_DIGIT
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+    DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+endif
+endif # Darwin
+
+ifeq ($(OS_TARGET),Linux)
+ifeq ($(CPU_ARCH),x86_64)
+    ASFILES  = arcfour-amd64-gas.s mpi_amd64_gas.s
+    ASFLAGS += -fPIC -Wa,--noexecstack
+    DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+    DEFINES += -DNSS_USE_COMBA
+    DEFINES += -DMP_IS_LITTLE_ENDIAN
+#   DEFINES += -DMPI_AMD64_ADD
+    # comment the next four lines to turn off Intel HW acceleration.
+    DEFINES += -DUSE_HW_AES -DINTEL_GCM
+    ASFILES += intel-aes.s intel-gcm.s
+    EXTRA_SRCS += intel-gcm-wrap.c
+    INTEL_GCM = 1
+    MPI_SRCS += mpi_amd64.c mp_comba.c
+endif
+ifeq ($(CPU_ARCH),x86)
+    ASFILES  = mpi_x86.s
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE 
+    DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT
+    DEFINES += -DMP_IS_LITTLE_ENDIAN
+    # The floating point ECC code doesn't work on Linux x86 (bug 311432).
+    #ECL_USE_FP = 1
+endif
+ifeq ($(CPU_ARCH),arm)
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE 
+    DEFINES += -DMP_USE_UINT_DIGIT
+    DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
+    MPI_SRCS += mpi_arm.c
+endif
+ifeq ($(CPU_ARCH),ppc)
+ifdef USE_64
+    DEFINES += -DNSS_NO_INIT_SUPPORT
+endif # USE_64
+endif # ppc
+endif # Linux
+
+ifeq ($(OS_TARGET),AIX)
+    DEFINES += -DMP_USE_UINT_DIGIT
+    ifndef USE_64
+	DEFINES += -DMP_NO_DIV_WORD -DMP_NO_ADD_WORD -DMP_NO_SUB_WORD
+    endif
+endif # AIX
+
+ifeq ($(OS_TARGET), HP-UX)
+ifneq ($(OS_TEST), ia64)
+# PA-RISC
+ASFILES += ret_cr16.s
+ifndef USE_64
+    FREEBL_BUILD_SINGLE_SHLIB = 
+    HAVE_ABI32_INT32 = 1
+    HAVE_ABI32_FPU = 1
+endif
+ifdef FREEBL_CHILD_BUILD
+ifdef USE_ABI32_INT32
+# build for DA1.1 (HP PA 1.1) 32-bit ABI build with 32-bit arithmetic
+    DEFINES  += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+    DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
+else
+ifdef USE_64
+# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits 
+    MPI_SRCS += mpi_hp.c 
+    ASFILES  += hpma512.s hppa20.s 
+    DEFINES  += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+else
+# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model 
+# (the 32-bit ABI with 64-bit registers) using 64-bit digits
+    MPI_SRCS += mpi_hp.c 
+    ASFILES  += hpma512.s hppa20.s 
+    DEFINES  += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ifndef NS_USE_GCC
+    ARCHFLAG = -Aa +e +DA2.0 +DS2.0
+endif
+endif
+endif
+endif
+endif
+endif
+
+# The blapi functions are defined not only in the freebl shared
+# libraries but also in the shared libraries linked with loader.c
+# (libsoftokn3.so and libssl3.so).  We need to use GNU ld's
+# -Bsymbolic option or the equivalent option for other linkers
+# to bind the blapi function references in FREEBLVector vector
+# (ldvector.c) to the blapi functions defined in the freebl
+# shared libraries.
+ifeq (,$(filter-out BSD_OS FreeBSD Linux NetBSD OpenBSD, $(OS_TARGET)))
+    MKSHLIB += -Wl,-Bsymbolic
+endif
+
+ifeq ($(OS_TARGET),SunOS)
+
+ifdef NS_USE_GCC
+    ifdef GCC_USE_GNU_LD
+	MKSHLIB += -Wl,-Bsymbolic,-z,now,-z,text
+    else
+	MKSHLIB += -Wl,-B,symbolic,-z,now,-z,text
+    endif # GCC_USE_GNU_LD
+else
+    MKSHLIB += -B symbolic -z now -z text
+endif # NS_USE_GCC
+
+# Sun's WorkShop defines v8, v8plus and v9 architectures.
+# gcc on Solaris defines v8 and v9 "cpus".  
+# gcc's v9 is equivalent to Workshop's v8plus.
+# gcc's -m64 is equivalent to Workshop's v9
+# We always use Sun's assembler, which uses Sun's naming convention.
+ifeq ($(CPU_ARCH),sparc)
+    FREEBL_BUILD_SINGLE_SHLIB=
+    ifdef USE_64
+        HAVE_ABI64_INT = 1
+        HAVE_ABI64_FPU = 1
+    else
+        HAVE_ABI32_FPU = 1
+        HAVE_ABI32_INT64 = 1
+    endif
+    SYSV_SPARC = 1
+    SOLARIS_AS = /usr/ccs/bin/as
+    #### set arch, asm, c flags
+    ifdef NS_USE_GCC
+	ifdef USE_ABI32_INT64
+	    ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plus
+	    SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC
+	endif
+	ifdef USE_ABI32_FPU
+	    ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plusa
+	    SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC
+	endif # USE_ABI32_FPU
+	ifdef USE_ABI64_INT
+	    # this builds for Sparc v9a pure 64-bit architecture
+	    ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9
+	    SOLARIS_AS_FLAGS = -xarch=v9 -K PIC
+	endif
+	ifdef USE_ABI64_FPU
+	    # this builds for Sparc v9a pure 64-bit architecture
+	    # It uses floating point, and 32-bit word size
+	    ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9a
+	    SOLARIS_AS_FLAGS = -xarch=v9a -K PIC
+	endif
+    else # NS_USE_GCC
+	# FPU_TARGET_OPTIMIZER specifies the target processor and cache
+	# properties of the ABI32_FPU and ABI64_FPU architectures for use
+	# by the optimizer.
+	ifeq (,$(findstring Sun WorkShop 6,$(shell $(CC) -V 2>&1)))
+	    # if the compiler is not Forte 6
+	    FPU_TARGET_OPTIMIZER = -xcache=64/32/4:1024/64/4 -xchip=ultra3
+	else
+	    # Forte 6 C compiler generates incorrect code for rijndael.c
+	    # if -xchip=ultra3 is used (Bugzilla bug 333925).  So we revert
+	    # to what we used in NSS 3.10.
+	    FPU_TARGET_OPTIMIZER = -xchip=ultra2
+	endif
+	ifdef USE_ABI32_INT64
+	    # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, 
+	    # 32-bit ABI, it uses 64-bit words, integer arithmetic,
+	    # no FPU (non-VIS cpus).
+	    # These flags were suggested by the compiler group for building
+	    # with SunStudio 10.
+	    ifdef BUILD_OPT
+                SOL_CFLAGS += -xO4
+	    endif
+ 	    SOL_CFLAGS += -xtarget=generic
+	    ARCHFLAG = -xarch=v8plus
+	    SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC
+	endif
+	ifdef USE_ABI32_FPU
+	    # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, 
+	    # 32-bit ABI, it uses FPU code, and 32-bit word size.
+	    # these flags were determined by running cc -### -fast and copying
+	    # the generated flag settings
+	    SOL_CFLAGS += -fsingle -xmemalign=8s
+	    ifdef BUILD_OPT
+                SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1
+                SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all
+                SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend
+                SOL_CFLAGS += -xlibmil -xO5
+	    endif
+	    ARCHFLAG = -xarch=v8plusa
+	    SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC
+	endif
+	ifdef USE_ABI64_INT
+	    # this builds for Sparc v9a pure 64-bit architecture,
+	    # no FPU (non-VIS cpus). For building with SunStudio 10.
+	    ifdef BUILD_OPT
+                SOL_CFLAGS += -xO4
+	    endif
+ 	    SOL_CFLAGS += -xtarget=generic
+	    ARCHFLAG = -xarch=v9
+	    SOLARIS_AS_FLAGS = -xarch=v9 -K PIC
+	endif
+	ifdef USE_ABI64_FPU
+	    # this builds for Sparc v9a pure 64-bit architecture
+	    # It uses floating point, and 32-bit word size.
+	    # See comment for USE_ABI32_FPU.
+	    SOL_CFLAGS += -fsingle -xmemalign=8s
+	    ifdef BUILD_OPT
+                SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1
+                SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all
+                SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend
+                SOL_CFLAGS += -xlibmil -xO5
+	    endif
+	    ARCHFLAG = -xarch=v9a
+	    SOLARIS_AS_FLAGS = -xarch=v9a -K PIC
+	endif
+    endif # NS_USE_GCC
+
+    ### set flags for both GCC and Sun cc
+    ifdef USE_ABI32_INT64
+	# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, 
+	# 32-bit ABI, it uses 64-bit words, integer arithmetic, no FPU
+	# best times are with no MP_ flags specified
+    endif
+    ifdef USE_ABI32_FPU
+	# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, 
+	# 32-bit ABI, it uses FPU code, and 32-bit word size
+	MPI_SRCS += mpi_sparc.c
+	ASFILES  = mpv_sparcv8.s montmulfv8.s
+	DEFINES  += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY
+	DEFINES  += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL
+	ECL_USE_FP = 1
+    endif
+    ifdef USE_ABI64_INT
+	# this builds for Sparc v9a pure 64-bit architecture
+	# best times are with no MP_ flags specified
+    endif
+    ifdef USE_ABI64_FPU
+	# this builds for Sparc v9a pure 64-bit architecture
+	# It uses floating point, and 32-bit word size
+	MPI_SRCS += mpi_sparc.c
+	ASFILES   = mpv_sparcv9.s montmulfv9.s
+	DEFINES  += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY
+	DEFINES  += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL
+	ECL_USE_FP = 1
+    endif
+
+else
+    # Solaris for non-sparc family CPUs
+    ifdef NS_USE_GCC
+	LD = gcc
+	AS = gcc
+	ASFLAGS = -x assembler-with-cpp
+    endif
+    ifeq ($(USE_64),1)
+	# Solaris for AMD64
+	ifdef NS_USE_GCC
+	    ASFILES  = arcfour-amd64-gas.s mpi_amd64_gas.s
+	    ASFLAGS += -march=opteron -m64 -fPIC
+	    MPI_SRCS += mp_comba.c
+	    # comment the next four lines to turn off Intel HW acceleration
+	    ASFILES += intel-gcm.s
+	    EXTRA_SRCS += intel-gcm-wrap.c
+	    INTEL_GCM = 1
+	    DEFINES += -DINTEL_GCM
+	else
+	    ASFILES  = arcfour-amd64-sun.s mpi_amd64_sun.s sha-fast-amd64-sun.s
+ 	    ASFILES += mp_comba_amd64_sun.s mpcpucache_amd64.s
+	    ASFLAGS += -xarch=generic64 -K PIC
+            SOL_CFLAGS += -xprefetch=no
+	    SHA_SRCS =
+ 	    MPCPU_SRCS =
+	    # Intel acceleration for GCM does not build currently with Studio
+	endif
+	DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+	DEFINES += -DNSS_USE_COMBA -DMP_IS_LITTLE_ENDIAN
+	# comment the next two lines to turn off Intel HW acceleration
+	DEFINES += -DUSE_HW_AES
+	ASFILES += intel-aes.s
+	MPI_SRCS += mpi_amd64.c
+    else
+	# Solaris x86
+	DEFINES += -DMP_USE_UINT_DIGIT
+	DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE 
+	DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+	ASFILES  = mpi_i86pc.s
+ 	ifndef NS_USE_GCC
+ 	   MPCPU_SRCS =
+ 	   ASFILES += mpcpucache_x86.s
+ 	endif
+    endif
+endif # Solaris for non-sparc family CPUs
+endif # target == SunOS
+
+ifndef NSS_DISABLE_ECC
+    ifdef ECL_USE_FP
+	#enable floating point ECC code	
+	DEFINES  += -DECL_USE_FP
+	ECL_SRCS += ecp_fp160.c ecp_fp192.c ecp_fp224.c ecp_fp.c
+	ECL_HDRS += ecp_fp.h
+    endif
+endif
+
+# poly1305-donna-x64-sse2-incremental-source.c requires __int128 support
+# in GCC 4.6.0.
+ifdef USE_64
+    ifdef CC_IS_CLANG
+            HAVE_INT128_SUPPORT = 1
+            DEFINES += -DHAVE_INT128_SUPPORT
+    else ifeq (1,$(CC_IS_GCC))
+        ifneq (,$(filter 4.6 4.7 4.8 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
+            HAVE_INT128_SUPPORT = 1
+            DEFINES += -DHAVE_INT128_SUPPORT
+        endif
+        ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
+            HAVE_INT128_SUPPORT = 1
+            DEFINES += -DHAVE_INT128_SUPPORT
+        endif
+    endif
+endif
+
+ifndef NSS_DISABLE_CHACHAPOLY
+    ifeq ($(CPU_ARCH),x86_64)
+        ifdef HAVE_INT128_SUPPORT
+            EXTRA_SRCS += poly1305-donna-x64-sse2-incremental-source.c
+        else
+            EXTRA_SRCS += poly1305.c
+        endif
+
+        ifneq (1,$(CC_IS_GCC))
+            EXTRA_SRCS += chacha20.c
+        else
+            EXTRA_SRCS += chacha20_vec.c
+        endif
+    else
+        EXTRA_SRCS += poly1305.c
+        EXTRA_SRCS += chacha20.c
+    endif # x86_64
+endif # NSS_DISABLE_CHACHAPOLY
+
+ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH)))
+    # All intel architectures get the 64 bit version
+    # With custom uint128 if necessary (faster than generic 32 bit version).
+    ECL_SRCS += curve25519_64.c
+else
+    # All non intel architectures get the generic 32 bit implementation (slow!)
+    ECL_SRCS += curve25519_32.c
+endif
+
+ifndef HAVE_INT128_SUPPORT
+    ECL_SRCS += uint128.c
+endif
+
+#######################################################################
+# (5) Execute "global" rules. (OPTIONAL)                              #
+#######################################################################
+
+include $(CORE_DEPTH)/coreconf/rules.mk
+
+#######################################################################
+# (6) Execute "component" rules. (OPTIONAL)                           #
+#######################################################################
+
+
+
+#######################################################################
+# (7) Execute "local" rules. (OPTIONAL).                              #
+#######################################################################
+
+export:: private_export
+
+rijndael_tables:
+	$(CC) -o $(OBJDIR)/make_rijndael_tab rijndael_tables.c \
+	         $(DEFINES) $(INCLUDES) $(OBJDIR)/libfreebl.a
+	$(OBJDIR)/make_rijndael_tab
+
+vpath %.h mpi ecl
+vpath %.c mpi ecl
+vpath %.S mpi ecl
+vpath %.s mpi ecl
+vpath %.asm mpi ecl
+INCLUDES += -Impi -Iecl
+
+
+DEFINES += -DMP_API_COMPATIBLE
+
+MPI_USERS = dh.c pqg.c dsa.c rsa.c ec.c
+
+MPI_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_SRCS:.c=$(OBJ_SUFFIX)))
+MPI_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_USERS:.c=$(OBJ_SUFFIX)))
+
+$(MPI_OBJS): $(MPI_HDRS)
+
+ECL_USERS = ec.c
+
+ECL_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_SRCS:.c=$(OBJ_SUFFIX)) $(ECL_ASM_SRCS:$(ASM_SUFFIX)=$(OBJ_SUFFIX)))
+ECL_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_USERS:.c=$(OBJ_SUFFIX)))
+
+$(ECL_OBJS): $(ECL_HDRS)
+
+
+
+$(OBJDIR)/sysrand$(OBJ_SUFFIX): sysrand.c unix_rand.c win_rand.c os2_rand.c
+
+$(OBJDIR)/$(PROG_PREFIX)mpprime$(OBJ_SUFFIX): primes.c
+
+$(OBJDIR)/ldvector$(OBJ_SUFFIX) $(OBJDIR)/loader$(OBJ_SUFFIX) : loader.h
+
+ifeq ($(SYSV_SPARC),1)
+
+$(OBJDIR)/mpv_sparcv8.o $(OBJDIR)/mpv_sparcv8x.o $(OBJDIR)/montmulfv8.o : $(OBJDIR)/%.o : %.s
+	@$(MAKE_OBJDIR)
+	$(SOLARIS_AS) -o $@ $(SOLARIS_AS_FLAGS) $<
+
+$(OBJDIR)/mpv_sparcv9.o $(OBJDIR)/montmulfv9.o : $(OBJDIR)/%.o : %.s
+	@$(MAKE_OBJDIR)
+	$(SOLARIS_AS) -o $@ $(SOLARIS_AS_FLAGS) $<
+
+$(OBJDIR)/mpmontg.o: mpmontg.c montmulf.h
+
+endif
+
+ifndef FREEBL_CHILD_BUILD
+
+# Parent build. This is where we decide which shared libraries to build
+
+ifdef FREEBL_BUILD_SINGLE_SHLIB
+
+################### Single shared lib stuff #########################
+SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB
+ALL_TRASH += $(SINGLE_SHLIB_DIR) 
+
+$(SINGLE_SHLIB_DIR):
+	-mkdir -p $(SINGLE_SHLIB_DIR)
+
+release_md libs:: $(SINGLE_SHLIB_DIR)
+	$(MAKE) FREEBL_CHILD_BUILD=1 \
+ OBJDIR=$(SINGLE_SHLIB_DIR) $@
+######################## common stuff #########################
+
+endif
+
+ifdef NEED_STUB_BUILD
+SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB
+ALL_TRASH += $(SINGLE_SHLIB_DIR) 
+$(SINGLE_SHLIB_DIR):
+	-mkdir $(SINGLE_SHLIB_DIR)
+
+release_md libs:: $(SINGLE_SHLIB_DIR)
+	$(MAKE) FREEBL_CHILD_BUILD=1 USE_STUB_BUILD=1 \
+ OBJDIR=$(SINGLE_SHLIB_DIR) $@
+endif
+
+# multiple shared libraries
+
+######################## ABI32_FPU stuff #########################
+ifdef HAVE_ABI32_FPU
+ABI32_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_FPU
+ALL_TRASH += $(ABI32_FPU_DIR) 
+
+$(ABI32_FPU_DIR):
+	-mkdir $(ABI32_FPU_DIR)
+
+release_md libs:: $(ABI32_FPU_DIR)
+	$(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_FPU=1 \
+ OBJDIR=$(ABI32_FPU_DIR) $@
+endif
+
+######################## ABI32_INT32 stuff #########################
+ifdef HAVE_ABI32_INT32
+ABI32_INT32_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT32
+ALL_TRASH += $(ABI32_INT32_DIR) 
+
+$(ABI32_INT32_DIR):
+	-mkdir $(ABI32_INT32_DIR)
+
+release_md libs:: $(ABI32_INT32_DIR)
+	$(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_INT32=1 \
+ OBJDIR=$(ABI32_INT32_DIR) $@
+endif
+
+######################## ABI32_INT64 stuff #########################
+ifdef HAVE_ABI32_INT64
+ABI32_INT64_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT64
+ALL_TRASH += $(ABI32_INT64_DIR) 
+
+$(ABI32_INT64_DIR):
+	-mkdir $(ABI32_INT64_DIR)
+
+release_md libs:: $(ABI32_INT64_DIR)
+	$(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_INT64=1\
+ OBJDIR=$(ABI32_INT64_DIR) $@
+endif
+
+######################## END of 32-bit stuff #########################
+
+# above is 32-bit builds, below is 64-bit builds
+
+######################## ABI64_FPU stuff #########################
+ifdef HAVE_ABI64_FPU
+ABI64_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_FPU
+ALL_TRASH += $(ABI64_FPU_DIR) 
+
+$(ABI64_FPU_DIR):
+	-mkdir $(ABI64_FPU_DIR)
+
+release_md libs:: $(ABI64_FPU_DIR)
+	$(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI64_FPU=1 \
+ OBJDIR=$(ABI64_FPU_DIR) $@
+endif
+
+######################## ABI64_INT stuff #########################
+ifdef HAVE_ABI64_INT
+ABI64_INT_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_INT
+ALL_TRASH += $(ABI64_INT_DIR) 
+
+$(ABI64_INT_DIR):
+	-mkdir $(ABI64_INT_DIR)
+
+release_md libs:: $(ABI64_INT_DIR)
+	$(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI64_INT=1 \
+ OBJDIR=$(ABI64_INT_DIR) $@
+endif
+
+endif  # FREEBL_CHILD_BUILD
+
+
+# Bugzilla Bug 333917: the non-x86 code in desblapi.c seems to violate
+# ANSI C's strict aliasing rules.
+ifeq ($(OS_TARGET),Linux)
+ifneq ($(CPU_ARCH),x86)
+$(OBJDIR)/$(PROG_PREFIX)desblapi$(OBJ_SUFFIX): desblapi.c
+	@$(MAKE_OBJDIR)
+ifdef NEED_ABSOLUTE_PATH
+	$(CC) -o $@ -c $(CFLAGS) -fno-strict-aliasing $(call core_abspath,$<)
+else
+	$(CC) -o $@ -c $(CFLAGS) -fno-strict-aliasing $<
+endif
+endif
+endif
+
+ifdef INTEL_GCM
+#
+# GCM binary needs -mssse3
+#
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3
+
+# The integrated assembler in Clang 3.2 does not support % in the
+# expression of a .set directive. intel-gcm.s uses .set to give
+# symbolic names to registers, for example,
+#     .set  Htbl, %rdi
+# So we can't use Clang's integrated assembler with intel-gcm.s.
+ifdef CC_IS_CLANG
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm$(OBJ_SUFFIX): CFLAGS += -no-integrated-as
+endif
+endif
+
+ifdef INTEL_GCM_CLANG_CL
+#
+# clang-cl needs -mssse3
+#
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3
+endif
diff --git a/security/nss/lib/freebl/aeskeywrap.c b/security/nss/lib/freebl/aeskeywrap.c
new file mode 100644
index 000000000..79ff8a852
--- /dev/null
+++ b/security/nss/lib/freebl/aeskeywrap.c
@@ -0,0 +1,389 @@
+/*
+ * aeskeywrap.c - implement AES Key Wrap algorithm from RFC 3394
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prcpucfg.h"
+#if defined(IS_LITTLE_ENDIAN) || defined(SHA_NO_LONG_LONG)
+#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 0
+#else
+#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 1
+#endif
+#include "prtypes.h" /* for PRUintXX */
+#include "secport.h" /* for PORT_XXX */
+#include "secerr.h"
+#include "blapi.h" /* for AES_ functions */
+#include "rijndael.h"
+
+struct AESKeyWrapContextStr {
+    unsigned char iv[AES_KEY_WRAP_IV_BYTES];
+    AESContext aescx;
+};
+
+/******************************************/
+/*
+** AES key wrap algorithm, RFC 3394
+*/
+
+AESKeyWrapContext *
+AESKeyWrap_AllocateContext(void)
+{
+    AESKeyWrapContext *cx = PORT_New(AESKeyWrapContext);
+    return cx;
+}
+
+SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx,
+                       const unsigned char *key,
+                       unsigned int keylen,
+                       const unsigned char *iv,
+                       int x1,
+                       unsigned int encrypt,
+                       unsigned int x2)
+{
+    SECStatus rv = SECFailure;
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (iv) {
+        memcpy(cx->iv, iv, sizeof cx->iv);
+    } else {
+        memset(cx->iv, 0xA6, sizeof cx->iv);
+    }
+    rv = AES_InitContext(&cx->aescx, key, keylen, NULL, NSS_AES, encrypt,
+                         AES_BLOCK_SIZE);
+    return rv;
+}
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+**  "key" raw key data
+**  "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+                         int encrypt, unsigned int keylen)
+{
+    SECStatus rv;
+    AESKeyWrapContext *cx = AESKeyWrap_AllocateContext();
+    if (!cx)
+        return NULL; /* error is already set */
+    rv = AESKeyWrap_InitContext(cx, key, keylen, iv, 0, encrypt, 0);
+    if (rv != SECSuccess) {
+        PORT_Free(cx);
+        cx = NULL; /* error should already be set */
+    }
+    return cx;
+}
+
+/*
+** Destroy a AES KeyWrap context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit)
+{
+    if (cx) {
+        AES_DestroyContext(&cx->aescx, PR_FALSE);
+        /*  memset(cx, 0, sizeof *cx); */
+        if (freeit)
+            PORT_Free(cx);
+    }
+}
+
+#if !BIG_ENDIAN_WITH_64_BIT_REGISTERS
+
+/* The AES Key Wrap algorithm has 64-bit values that are ALWAYS big-endian
+** (Most significant byte first) in memory.  The only ALU operations done
+** on them are increment, decrement, and XOR.  So, on little-endian CPUs,
+** and on CPUs that lack 64-bit registers, these big-endian 64-bit operations
+** are simulated in the following code.  This is thought to be faster and
+** simpler than trying to convert the data to little-endian and back.
+*/
+
+/* A and T point to two 64-bit values stored most signficant byte first
+** (big endian).  This function increments the 64-bit value T, and then
+** XORs it with A, changing A.
+*/
+static void
+increment_and_xor(unsigned char *A, unsigned char *T)
+{
+    if (!++T[7])
+        if (!++T[6])
+            if (!++T[5])
+                if (!++T[4])
+                    if (!++T[3])
+                        if (!++T[2])
+                            if (!++T[1])
+                                ++T[0];
+
+    A[0] ^= T[0];
+    A[1] ^= T[1];
+    A[2] ^= T[2];
+    A[3] ^= T[3];
+    A[4] ^= T[4];
+    A[5] ^= T[5];
+    A[6] ^= T[6];
+    A[7] ^= T[7];
+}
+
+/* A and T point to two 64-bit values stored most signficant byte first
+** (big endian).  This function XORs T with A, giving a new A, then
+** decrements the 64-bit value T.
+*/
+static void
+xor_and_decrement(PRUint64 *A, PRUint64 *T)
+{
+    unsigned char *TP = (unsigned char *)T;
+    const PRUint64 mask = 0xFF;
+    *A = ((*A & mask << 56) ^ (*T & mask << 56)) |
+         ((*A & mask << 48) ^ (*T & mask << 48)) |
+         ((*A & mask << 40) ^ (*T & mask << 40)) |
+         ((*A & mask << 32) ^ (*T & mask << 32)) |
+         ((*A & mask << 24) ^ (*T & mask << 23)) |
+         ((*A & mask << 16) ^ (*T & mask << 16)) |
+         ((*A & mask << 8) ^ (*T & mask << 8)) |
+         ((*A & mask) ^ (*T & mask));
+
+    if (!TP[7]--)
+        if (!TP[6]--)
+            if (!TP[5]--)
+                if (!TP[4]--)
+                    if (!TP[3]--)
+                        if (!TP[2]--)
+                            if (!TP[1]--)
+                                TP[0]--;
+}
+
+/* Given an unsigned long t (in host byte order), store this value as a
+** 64-bit big-endian value (MSB first) in *pt.
+*/
+static void
+set_t(unsigned char *pt, unsigned long t)
+{
+    pt[7] = (unsigned char)t;
+    t >>= 8;
+    pt[6] = (unsigned char)t;
+    t >>= 8;
+    pt[5] = (unsigned char)t;
+    t >>= 8;
+    pt[4] = (unsigned char)t;
+    t >>= 8;
+    pt[3] = (unsigned char)t;
+    t >>= 8;
+    pt[2] = (unsigned char)t;
+    t >>= 8;
+    pt[1] = (unsigned char)t;
+    t >>= 8;
+    pt[0] = (unsigned char)t;
+}
+
+#endif
+
+/*
+** Perform AES key wrap.
+**  "cx" the context
+**  "output" the output buffer to store the encrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+                   unsigned int *pOutputLen, unsigned int maxOutputLen,
+                   const unsigned char *input, unsigned int inputLen)
+{
+    PRUint64 *R = NULL;
+    unsigned int nBlocks;
+    unsigned int i, j;
+    unsigned int aesLen = AES_BLOCK_SIZE;
+    unsigned int outLen = inputLen + AES_KEY_WRAP_BLOCK_SIZE;
+    SECStatus s = SECFailure;
+    /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */
+    PRUint64 t;
+    PRUint64 B[2];
+
+#define A B[0]
+
+    /* Check args */
+    if (!inputLen || 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return s;
+    }
+#ifdef maybe
+    if (!output && pOutputLen) { /* caller is asking for output size */
+        *pOutputLen = outLen;
+        return SECSuccess;
+    }
+#endif
+    if (maxOutputLen < outLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return s;
+    }
+    if (cx == NULL || output == NULL || input == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return s;
+    }
+    nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE;
+    R = PORT_NewArray(PRUint64, nBlocks + 1);
+    if (!R)
+        return s; /* error is already set. */
+    /*
+    ** 1) Initialize variables.
+    */
+    memcpy(&A, cx->iv, AES_KEY_WRAP_IV_BYTES);
+    memcpy(&R[1], input, inputLen);
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+    t = 0;
+#else
+    memset(&t, 0, sizeof t);
+#endif
+    /*
+    ** 2) Calculate intermediate values.
+    */
+    for (j = 0; j < 6; ++j) {
+        for (i = 1; i <= nBlocks; ++i) {
+            B[1] = R[i];
+            s = AES_Encrypt(&cx->aescx, (unsigned char *)B, &aesLen,
+                            sizeof B, (unsigned char *)B, sizeof B);
+            if (s != SECSuccess)
+                break;
+            R[i] = B[1];
+/* here, increment t and XOR A with t (in big endian order); */
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+            A ^= ++t;
+#else
+            increment_and_xor((unsigned char *)&A, (unsigned char *)&t);
+#endif
+        }
+    }
+    /*
+    ** 3) Output the results.
+    */
+    if (s == SECSuccess) {
+        R[0] = A;
+        memcpy(output, &R[0], outLen);
+        if (pOutputLen)
+            *pOutputLen = outLen;
+    } else if (pOutputLen) {
+        *pOutputLen = 0;
+    }
+    PORT_ZFree(R, outLen);
+    return s;
+}
+#undef A
+
+/*
+** Perform AES key unwrap.
+**  "cx" the context
+**  "output" the output buffer to store the decrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+                   unsigned int *pOutputLen, unsigned int maxOutputLen,
+                   const unsigned char *input, unsigned int inputLen)
+{
+    PRUint64 *R = NULL;
+    unsigned int nBlocks;
+    unsigned int i, j;
+    unsigned int aesLen = AES_BLOCK_SIZE;
+    unsigned int outLen;
+    SECStatus s = SECFailure;
+    /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */
+    PRUint64 t;
+    PRUint64 B[2];
+
+    /* Check args */
+    if (inputLen < 3 * AES_KEY_WRAP_BLOCK_SIZE ||
+        0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return s;
+    }
+    outLen = inputLen - AES_KEY_WRAP_BLOCK_SIZE;
+#ifdef maybe
+    if (!output && pOutputLen) { /* caller is asking for output size */
+        *pOutputLen = outLen;
+        return SECSuccess;
+    }
+#endif
+    if (maxOutputLen < outLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return s;
+    }
+    if (cx == NULL || output == NULL || input == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return s;
+    }
+    nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE;
+    R = PORT_NewArray(PRUint64, nBlocks);
+    if (!R)
+        return s; /* error is already set. */
+    nBlocks--;
+    /*
+    ** 1) Initialize variables.
+    */
+    memcpy(&R[0], input, inputLen);
+    B[0] = R[0];
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+    t = 6UL * nBlocks;
+#else
+    set_t((unsigned char *)&t, 6UL * nBlocks);
+#endif
+    /*
+    ** 2) Calculate intermediate values.
+    */
+    for (j = 0; j < 6; ++j) {
+        for (i = nBlocks; i; --i) {
+/* here, XOR A with t (in big endian order) and decrement t; */
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+            B[0] ^= t--;
+#else
+            xor_and_decrement(&B[0], &t);
+#endif
+            B[1] = R[i];
+            s = AES_Decrypt(&cx->aescx, (unsigned char *)B, &aesLen,
+                            sizeof B, (unsigned char *)B, sizeof B);
+            if (s != SECSuccess)
+                break;
+            R[i] = B[1];
+        }
+    }
+    /*
+    ** 3) Output the results.
+    */
+    if (s == SECSuccess) {
+        int bad = memcmp(&B[0], cx->iv, AES_KEY_WRAP_IV_BYTES);
+        if (!bad) {
+            memcpy(output, &R[1], outLen);
+            if (pOutputLen)
+                *pOutputLen = outLen;
+        } else {
+            s = SECFailure;
+            PORT_SetError(SEC_ERROR_BAD_DATA);
+            if (pOutputLen)
+                *pOutputLen = 0;
+        }
+    } else if (pOutputLen) {
+        *pOutputLen = 0;
+    }
+    PORT_ZFree(R, inputLen);
+    return s;
+}
+#undef A
diff --git a/security/nss/lib/freebl/alg2268.c b/security/nss/lib/freebl/alg2268.c
new file mode 100644
index 000000000..54c6f4dff
--- /dev/null
+++ b/security/nss/lib/freebl/alg2268.c
@@ -0,0 +1,509 @@
+/*
+ * alg2268.c - implementation of the algorithm in RFC 2268
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "blapii.h"
+#include "secerr.h"
+#ifdef XP_UNIX_XXX
+#include <stddef.h> /* for ptrdiff_t */
+#endif
+
+/*
+** RC2 symmetric block cypher
+*/
+
+typedef SECStatus(rc2Func)(RC2Context *cx, unsigned char *output,
+                           const unsigned char *input, unsigned int inputLen);
+
+/* forward declarations */
+static rc2Func rc2_EncryptECB;
+static rc2Func rc2_DecryptECB;
+static rc2Func rc2_EncryptCBC;
+static rc2Func rc2_DecryptCBC;
+
+typedef union {
+    PRUint32 l[2];
+    PRUint16 s[4];
+    PRUint8 b[8];
+} RC2Block;
+
+struct RC2ContextStr {
+    union {
+        PRUint8 Kb[128];
+        PRUint16 Kw[64];
+    } u;
+    RC2Block iv;
+    rc2Func *enc;
+    rc2Func *dec;
+};
+
+#define B u.Kb
+#define K u.Kw
+#define BYTESWAP(x) ((x) << 8 | (x) >> 8)
+#define SWAPK(i) cx->K[i] = (tmpS = cx->K[i], BYTESWAP(tmpS))
+#define RC2_BLOCK_SIZE 8
+
+#define LOAD_HARD(R)                           \
+    R[0] = (PRUint16)input[1] << 8 | input[0]; \
+    R[1] = (PRUint16)input[3] << 8 | input[2]; \
+    R[2] = (PRUint16)input[5] << 8 | input[4]; \
+    R[3] = (PRUint16)input[7] << 8 | input[6];
+#define LOAD_EASY(R)               \
+    R[0] = ((PRUint16 *)input)[0]; \
+    R[1] = ((PRUint16 *)input)[1]; \
+    R[2] = ((PRUint16 *)input)[2]; \
+    R[3] = ((PRUint16 *)input)[3];
+#define STORE_HARD(R)                 \
+    output[0] = (PRUint8)(R[0]);      \
+    output[1] = (PRUint8)(R[0] >> 8); \
+    output[2] = (PRUint8)(R[1]);      \
+    output[3] = (PRUint8)(R[1] >> 8); \
+    output[4] = (PRUint8)(R[2]);      \
+    output[5] = (PRUint8)(R[2] >> 8); \
+    output[6] = (PRUint8)(R[3]);      \
+    output[7] = (PRUint8)(R[3] >> 8);
+#define STORE_EASY(R)               \
+    ((PRUint16 *)output)[0] = R[0]; \
+    ((PRUint16 *)output)[1] = R[1]; \
+    ((PRUint16 *)output)[2] = R[2]; \
+    ((PRUint16 *)output)[3] = R[3];
+
+#if defined(NSS_X86_OR_X64)
+#define LOAD(R) LOAD_EASY(R)
+#define STORE(R) STORE_EASY(R)
+#elif !defined(IS_LITTLE_ENDIAN)
+#define LOAD(R) LOAD_HARD(R)
+#define STORE(R) STORE_HARD(R)
+#else
+#define LOAD(R)                 \
+    if ((ptrdiff_t)input & 1) { \
+        LOAD_HARD(R)            \
+    } else {                    \
+        LOAD_EASY(R)            \
+    }
+#define STORE(R)                \
+    if ((ptrdiff_t)input & 1) { \
+        STORE_HARD(R)           \
+    } else {                    \
+        STORE_EASY(R)           \
+    }
+#endif
+
+static const PRUint8 S[256] = {
+    0331, 0170, 0371, 0304, 0031, 0335, 0265, 0355, 0050, 0351, 0375, 0171, 0112, 0240, 0330, 0235,
+    0306, 0176, 0067, 0203, 0053, 0166, 0123, 0216, 0142, 0114, 0144, 0210, 0104, 0213, 0373, 0242,
+    0027, 0232, 0131, 0365, 0207, 0263, 0117, 0023, 0141, 0105, 0155, 0215, 0011, 0201, 0175, 0062,
+    0275, 0217, 0100, 0353, 0206, 0267, 0173, 0013, 0360, 0225, 0041, 0042, 0134, 0153, 0116, 0202,
+    0124, 0326, 0145, 0223, 0316, 0140, 0262, 0034, 0163, 0126, 0300, 0024, 0247, 0214, 0361, 0334,
+    0022, 0165, 0312, 0037, 0073, 0276, 0344, 0321, 0102, 0075, 0324, 0060, 0243, 0074, 0266, 0046,
+    0157, 0277, 0016, 0332, 0106, 0151, 0007, 0127, 0047, 0362, 0035, 0233, 0274, 0224, 0103, 0003,
+    0370, 0021, 0307, 0366, 0220, 0357, 0076, 0347, 0006, 0303, 0325, 0057, 0310, 0146, 0036, 0327,
+    0010, 0350, 0352, 0336, 0200, 0122, 0356, 0367, 0204, 0252, 0162, 0254, 0065, 0115, 0152, 0052,
+    0226, 0032, 0322, 0161, 0132, 0025, 0111, 0164, 0113, 0237, 0320, 0136, 0004, 0030, 0244, 0354,
+    0302, 0340, 0101, 0156, 0017, 0121, 0313, 0314, 0044, 0221, 0257, 0120, 0241, 0364, 0160, 0071,
+    0231, 0174, 0072, 0205, 0043, 0270, 0264, 0172, 0374, 0002, 0066, 0133, 0045, 0125, 0227, 0061,
+    0055, 0135, 0372, 0230, 0343, 0212, 0222, 0256, 0005, 0337, 0051, 0020, 0147, 0154, 0272, 0311,
+    0323, 0000, 0346, 0317, 0341, 0236, 0250, 0054, 0143, 0026, 0001, 0077, 0130, 0342, 0211, 0251,
+    0015, 0070, 0064, 0033, 0253, 0063, 0377, 0260, 0273, 0110, 0014, 0137, 0271, 0261, 0315, 0056,
+    0305, 0363, 0333, 0107, 0345, 0245, 0234, 0167, 0012, 0246, 0040, 0150, 0376, 0177, 0301, 0255
+};
+
+RC2Context *
+RC2_AllocateContext(void)
+{
+    return PORT_ZNew(RC2Context);
+}
+SECStatus
+RC2_InitContext(RC2Context *cx, const unsigned char *key, unsigned int len,
+                const unsigned char *input, int mode, unsigned int efLen8,
+                unsigned int unused)
+{
+    PRUint8 *L, *L2;
+    int i;
+#if !defined(IS_LITTLE_ENDIAN)
+    PRUint16 tmpS;
+#endif
+    PRUint8 tmpB;
+
+    if (!key || !cx || !len || len > (sizeof cx->B) ||
+        efLen8 > (sizeof cx->B)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (mode == NSS_RC2) {
+        /* groovy */
+    } else if (mode == NSS_RC2_CBC) {
+        if (!input) {
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+        }
+    } else {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    if (mode == NSS_RC2_CBC) {
+        cx->enc = &rc2_EncryptCBC;
+        cx->dec = &rc2_DecryptCBC;
+        LOAD(cx->iv.s);
+    } else {
+        cx->enc = &rc2_EncryptECB;
+        cx->dec = &rc2_DecryptECB;
+    }
+
+    /* Step 0. Copy key into table. */
+    memcpy(cx->B, key, len);
+
+    /* Step 1. Compute all values to the right of the key. */
+    L2 = cx->B;
+    L = L2 + len;
+    tmpB = L[-1];
+    for (i = (sizeof cx->B) - len; i > 0; --i) {
+        *L++ = tmpB = S[(PRUint8)(tmpB + *L2++)];
+    }
+
+    /* step 2. Adjust left most byte of effective key. */
+    i = (sizeof cx->B) - efLen8;
+    L = cx->B + i;
+    *L = tmpB = S[*L]; /* mask is always 0xff */
+
+    /* step 3. Recompute all values to the left of effective key. */
+    L2 = --L + efLen8;
+    while (L >= cx->B) {
+        *L-- = tmpB = S[tmpB ^ *L2--];
+    }
+
+#if !defined(IS_LITTLE_ENDIAN)
+    for (i = 63; i >= 0; --i) {
+        SWAPK(i); /* candidate for unrolling */
+    }
+#endif
+    return SECSuccess;
+}
+
+/*
+** Create a new RC2 context suitable for RC2 encryption/decryption.
+**  "key" raw key data
+**  "len" the number of bytes of key data
+**  "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC)
+**  "mode" one of NSS_RC2 or NSS_RC2_CBC
+**  "effectiveKeyLen" in bytes, not bits.
+**
+** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block
+** chaining" mode.
+*/
+RC2Context *
+RC2_CreateContext(const unsigned char *key, unsigned int len,
+                  const unsigned char *iv, int mode, unsigned efLen8)
+{
+    RC2Context *cx = PORT_ZNew(RC2Context);
+    if (cx) {
+        SECStatus rv = RC2_InitContext(cx, key, len, iv, mode, efLen8, 0);
+        if (rv != SECSuccess) {
+            RC2_DestroyContext(cx, PR_TRUE);
+            cx = NULL;
+        }
+    }
+    return cx;
+}
+
+/*
+** Destroy an RC2 encryption/decryption context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+void
+RC2_DestroyContext(RC2Context *cx, PRBool freeit)
+{
+    if (cx) {
+        memset(cx, 0, sizeof *cx);
+        if (freeit) {
+            PORT_Free(cx);
+        }
+    }
+}
+
+#define ROL(x, k) (x << k | x >> (16 - k))
+#define MIX(j)                                           \
+    R0 = R0 + cx->K[4 * j + 0] + (R3 & R2) + (~R3 & R1); \
+    R0 = ROL(R0, 1);                                     \
+    R1 = R1 + cx->K[4 * j + 1] + (R0 & R3) + (~R0 & R2); \
+    R1 = ROL(R1, 2);                                     \
+    R2 = R2 + cx->K[4 * j + 2] + (R1 & R0) + (~R1 & R3); \
+    R2 = ROL(R2, 3);                                     \
+    R3 = R3 + cx->K[4 * j + 3] + (R2 & R1) + (~R2 & R0); \
+    R3 = ROL(R3, 5)
+#define MASH                  \
+    R0 = R0 + cx->K[R3 & 63]; \
+    R1 = R1 + cx->K[R0 & 63]; \
+    R2 = R2 + cx->K[R1 & 63]; \
+    R3 = R3 + cx->K[R2 & 63]
+
+/* Encrypt one block */
+static void
+rc2_Encrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input)
+{
+    register PRUint16 R0, R1, R2, R3;
+
+    /* step 1. Initialize input. */
+    R0 = input->s[0];
+    R1 = input->s[1];
+    R2 = input->s[2];
+    R3 = input->s[3];
+
+    /* step 2.  Expand Key (already done, in context) */
+    /* step 3.  j = 0 */
+    /* step 4.  Perform 5 mixing rounds. */
+
+    MIX(0);
+    MIX(1);
+    MIX(2);
+    MIX(3);
+    MIX(4);
+
+    /* step 5. Perform 1 mashing round. */
+    MASH;
+
+    /* step 6. Perform 6 mixing rounds. */
+
+    MIX(5);
+    MIX(6);
+    MIX(7);
+    MIX(8);
+    MIX(9);
+    MIX(10);
+
+    /* step 7. Perform 1 mashing round. */
+    MASH;
+
+    /* step 8. Perform 5 mixing rounds. */
+
+    MIX(11);
+    MIX(12);
+    MIX(13);
+    MIX(14);
+    MIX(15);
+
+    /* output results */
+    output->s[0] = R0;
+    output->s[1] = R1;
+    output->s[2] = R2;
+    output->s[3] = R3;
+}
+
+#define ROR(x, k) (x >> k | x << (16 - k))
+#define R_MIX(j)                                         \
+    R3 = ROR(R3, 5);                                     \
+    R3 = R3 - cx->K[4 * j + 3] - (R2 & R1) - (~R2 & R0); \
+    R2 = ROR(R2, 3);                                     \
+    R2 = R2 - cx->K[4 * j + 2] - (R1 & R0) - (~R1 & R3); \
+    R1 = ROR(R1, 2);                                     \
+    R1 = R1 - cx->K[4 * j + 1] - (R0 & R3) - (~R0 & R2); \
+    R0 = ROR(R0, 1);                                     \
+    R0 = R0 - cx->K[4 * j + 0] - (R3 & R2) - (~R3 & R1)
+#define R_MASH                \
+    R3 = R3 - cx->K[R2 & 63]; \
+    R2 = R2 - cx->K[R1 & 63]; \
+    R1 = R1 - cx->K[R0 & 63]; \
+    R0 = R0 - cx->K[R3 & 63]
+
+/* Encrypt one block */
+static void
+rc2_Decrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input)
+{
+    register PRUint16 R0, R1, R2, R3;
+
+    /* step 1. Initialize input. */
+    R0 = input->s[0];
+    R1 = input->s[1];
+    R2 = input->s[2];
+    R3 = input->s[3];
+
+    /* step 2.  Expand Key (already done, in context) */
+    /* step 3.  j = 63 */
+    /* step 4.  Perform 5 r_mixing rounds. */
+    R_MIX(15);
+    R_MIX(14);
+    R_MIX(13);
+    R_MIX(12);
+    R_MIX(11);
+
+    /* step 5.  Perform 1 r_mashing round. */
+    R_MASH;
+
+    /* step 6.  Perform 6 r_mixing rounds. */
+    R_MIX(10);
+    R_MIX(9);
+    R_MIX(8);
+    R_MIX(7);
+    R_MIX(6);
+    R_MIX(5);
+
+    /* step 7.  Perform 1 r_mashing round. */
+    R_MASH;
+
+    /* step 8.  Perform 5 r_mixing rounds. */
+    R_MIX(4);
+    R_MIX(3);
+    R_MIX(2);
+    R_MIX(1);
+    R_MIX(0);
+
+    /* output results */
+    output->s[0] = R0;
+    output->s[1] = R1;
+    output->s[2] = R2;
+    output->s[3] = R3;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_EncryptECB(RC2Context *cx, unsigned char *output,
+               const unsigned char *input, unsigned int inputLen)
+{
+    RC2Block iBlock;
+
+    while (inputLen > 0) {
+        LOAD(iBlock.s)
+        rc2_Encrypt1Block(cx, &iBlock, &iBlock);
+        STORE(iBlock.s)
+        output += RC2_BLOCK_SIZE;
+        input += RC2_BLOCK_SIZE;
+        inputLen -= RC2_BLOCK_SIZE;
+    }
+    return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_DecryptECB(RC2Context *cx, unsigned char *output,
+               const unsigned char *input, unsigned int inputLen)
+{
+    RC2Block iBlock;
+
+    while (inputLen > 0) {
+        LOAD(iBlock.s)
+        rc2_Decrypt1Block(cx, &iBlock, &iBlock);
+        STORE(iBlock.s)
+        output += RC2_BLOCK_SIZE;
+        input += RC2_BLOCK_SIZE;
+        inputLen -= RC2_BLOCK_SIZE;
+    }
+    return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_EncryptCBC(RC2Context *cx, unsigned char *output,
+               const unsigned char *input, unsigned int inputLen)
+{
+    RC2Block iBlock;
+
+    while (inputLen > 0) {
+
+        LOAD(iBlock.s)
+        iBlock.l[0] ^= cx->iv.l[0];
+        iBlock.l[1] ^= cx->iv.l[1];
+        rc2_Encrypt1Block(cx, &iBlock, &iBlock);
+        cx->iv = iBlock;
+        STORE(iBlock.s)
+        output += RC2_BLOCK_SIZE;
+        input += RC2_BLOCK_SIZE;
+        inputLen -= RC2_BLOCK_SIZE;
+    }
+    return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_DecryptCBC(RC2Context *cx, unsigned char *output,
+               const unsigned char *input, unsigned int inputLen)
+{
+    RC2Block iBlock;
+    RC2Block oBlock;
+
+    while (inputLen > 0) {
+        LOAD(iBlock.s)
+        rc2_Decrypt1Block(cx, &oBlock, &iBlock);
+        oBlock.l[0] ^= cx->iv.l[0];
+        oBlock.l[1] ^= cx->iv.l[1];
+        cx->iv = iBlock;
+        STORE(oBlock.s)
+        output += RC2_BLOCK_SIZE;
+        input += RC2_BLOCK_SIZE;
+        inputLen -= RC2_BLOCK_SIZE;
+    }
+    return SECSuccess;
+}
+
+/*
+** Perform RC2 encryption.
+**  "cx" the context
+**  "output" the output buffer to store the encrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+SECStatus
+RC2_Encrypt(RC2Context *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    SECStatus rv = SECSuccess;
+    if (inputLen) {
+        if (inputLen % RC2_BLOCK_SIZE) {
+            PORT_SetError(SEC_ERROR_INPUT_LEN);
+            return SECFailure;
+        }
+        if (maxOutputLen < inputLen) {
+            PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+            return SECFailure;
+        }
+        rv = (*cx->enc)(cx, output, input, inputLen);
+    }
+    if (rv == SECSuccess) {
+        *outputLen = inputLen;
+    }
+    return rv;
+}
+
+/*
+** Perform RC2 decryption.
+**  "cx" the context
+**  "output" the output buffer to store the decrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+SECStatus
+RC2_Decrypt(RC2Context *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    SECStatus rv = SECSuccess;
+    if (inputLen) {
+        if (inputLen % RC2_BLOCK_SIZE) {
+            PORT_SetError(SEC_ERROR_INPUT_LEN);
+            return SECFailure;
+        }
+        if (maxOutputLen < inputLen) {
+            PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+            return SECFailure;
+        }
+        rv = (*cx->dec)(cx, output, input, inputLen);
+    }
+    if (rv == SECSuccess) {
+        *outputLen = inputLen;
+    }
+    return rv;
+}
diff --git a/security/nss/lib/freebl/alghmac.c b/security/nss/lib/freebl/alghmac.c
new file mode 100644
index 000000000..dd8b73c5f
--- /dev/null
+++ b/security/nss/lib/freebl/alghmac.c
@@ -0,0 +1,165 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secport.h"
+#include "hasht.h"
+#include "blapit.h"
+#include "alghmac.h"
+#include "secerr.h"
+
+#define HMAC_PAD_SIZE HASH_BLOCK_LENGTH_MAX
+
+struct HMACContextStr {
+    void *hash;
+    const SECHashObject *hashobj;
+    PRBool wasAllocated;
+    unsigned char ipad[HMAC_PAD_SIZE];
+    unsigned char opad[HMAC_PAD_SIZE];
+};
+
+void
+HMAC_Destroy(HMACContext *cx, PRBool freeit)
+{
+    if (cx == NULL)
+        return;
+
+    PORT_Assert(!freeit == !cx->wasAllocated);
+    if (cx->hash != NULL) {
+        cx->hashobj->destroy(cx->hash, PR_TRUE);
+        PORT_Memset(cx, 0, sizeof *cx);
+    }
+    if (freeit)
+        PORT_Free(cx);
+}
+
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj,
+          const unsigned char *secret, unsigned int secret_len, PRBool isFIPS)
+{
+    unsigned int i;
+    unsigned char hashed_secret[HASH_LENGTH_MAX];
+
+    /* required by FIPS 198 Section 3 */
+    if (isFIPS && secret_len < hash_obj->length / 2) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (cx == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    cx->wasAllocated = PR_FALSE;
+    cx->hashobj = hash_obj;
+    cx->hash = cx->hashobj->create();
+    if (cx->hash == NULL)
+        goto loser;
+
+    if (secret_len > cx->hashobj->blocklength) {
+        cx->hashobj->begin(cx->hash);
+        cx->hashobj->update(cx->hash, secret, secret_len);
+        PORT_Assert(cx->hashobj->length <= sizeof hashed_secret);
+        cx->hashobj->end(cx->hash, hashed_secret, &secret_len,
+                         sizeof hashed_secret);
+        if (secret_len != cx->hashobj->length) {
+            PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+            goto loser;
+        }
+        secret = (const unsigned char *)&hashed_secret[0];
+    }
+
+    PORT_Memset(cx->ipad, 0x36, cx->hashobj->blocklength);
+    PORT_Memset(cx->opad, 0x5c, cx->hashobj->blocklength);
+
+    /* fold secret into padding */
+    for (i = 0; i < secret_len; i++) {
+        cx->ipad[i] ^= secret[i];
+        cx->opad[i] ^= secret[i];
+    }
+    PORT_Memset(hashed_secret, 0, sizeof hashed_secret);
+    return SECSuccess;
+
+loser:
+    PORT_Memset(hashed_secret, 0, sizeof hashed_secret);
+    if (cx->hash != NULL)
+        cx->hashobj->destroy(cx->hash, PR_TRUE);
+    return SECFailure;
+}
+
+HMACContext *
+HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret,
+            unsigned int secret_len, PRBool isFIPS)
+{
+    SECStatus rv;
+    HMACContext *cx = PORT_ZNew(HMACContext);
+    if (cx == NULL)
+        return NULL;
+    rv = HMAC_Init(cx, hash_obj, secret, secret_len, isFIPS);
+    cx->wasAllocated = PR_TRUE;
+    if (rv != SECSuccess) {
+        PORT_Free(cx); /* contains no secret info */
+        cx = NULL;
+    }
+    return cx;
+}
+
+void
+HMAC_Begin(HMACContext *cx)
+{
+    /* start inner hash */
+    cx->hashobj->begin(cx->hash);
+    cx->hashobj->update(cx->hash, cx->ipad, cx->hashobj->blocklength);
+}
+
+void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len)
+{
+    cx->hashobj->update(cx->hash, data, data_len);
+}
+
+SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+            unsigned int max_result_len)
+{
+    if (max_result_len < cx->hashobj->length) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    cx->hashobj->end(cx->hash, result, result_len, max_result_len);
+    if (*result_len != cx->hashobj->length)
+        return SECFailure;
+
+    cx->hashobj->begin(cx->hash);
+    cx->hashobj->update(cx->hash, cx->opad, cx->hashobj->blocklength);
+    cx->hashobj->update(cx->hash, result, *result_len);
+    cx->hashobj->end(cx->hash, result, result_len, max_result_len);
+    return SECSuccess;
+}
+
+HMACContext *
+HMAC_Clone(HMACContext *cx)
+{
+    HMACContext *newcx;
+
+    newcx = (HMACContext *)PORT_ZAlloc(sizeof(HMACContext));
+    if (newcx == NULL)
+        goto loser;
+
+    newcx->wasAllocated = PR_TRUE;
+    newcx->hashobj = cx->hashobj;
+    newcx->hash = cx->hashobj->clone(cx->hash);
+    if (newcx->hash == NULL)
+        goto loser;
+    PORT_Memcpy(newcx->ipad, cx->ipad, cx->hashobj->blocklength);
+    PORT_Memcpy(newcx->opad, cx->opad, cx->hashobj->blocklength);
+    return newcx;
+
+loser:
+    HMAC_Destroy(newcx, PR_TRUE);
+    return NULL;
+}
diff --git a/security/nss/lib/freebl/alghmac.h b/security/nss/lib/freebl/alghmac.h
new file mode 100644
index 000000000..462526ac4
--- /dev/null
+++ b/security/nss/lib/freebl/alghmac.h
@@ -0,0 +1,64 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _ALGHMAC_H_
+#define _ALGHMAC_H_
+
+typedef struct HMACContextStr HMACContext;
+
+SEC_BEGIN_PROTOS
+
+/* destroy HMAC context */
+extern void
+HMAC_Destroy(HMACContext *cx, PRBool freeit);
+
+/* create HMAC context
+ *  hash_obj    hash object from SECRawHashObjects[]
+ *  secret      the secret with which the HMAC is performed.
+ *  secret_len  the length of the secret.
+ *  isFIPS      true if conforming to FIPS 198.
+ *
+ * NULL is returned if an error occurs.
+ */
+extern HMACContext *
+HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret,
+            unsigned int secret_len, PRBool isFIPS);
+
+/* like HMAC_Create, except caller allocates HMACContext. */
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj,
+          const unsigned char *secret, unsigned int secret_len, PRBool isFIPS);
+
+/* reset HMAC for a fresh round */
+extern void
+HMAC_Begin(HMACContext *cx);
+
+/* update HMAC
+ *  cx          HMAC Context
+ *  data        the data to perform HMAC on
+ *  data_len    the length of the data to process
+ */
+extern void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len);
+
+/* Finish HMAC -- place the results within result
+ *  cx          HMAC context
+ *  result      buffer for resulting hmac'd data
+ *  result_len  where the resultant hmac length is stored
+ *  max_result_len  maximum possible length that can be stored in result
+ */
+extern SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+            unsigned int max_result_len);
+
+/* clone a copy of the HMAC state.  this is usefult when you would
+ * need to keep a running hmac but also need to extract portions
+ * partway through the process.
+ */
+extern HMACContext *
+HMAC_Clone(HMACContext *cx);
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/arcfive.c b/security/nss/lib/freebl/arcfive.c
new file mode 100644
index 000000000..dda77710f
--- /dev/null
+++ b/security/nss/lib/freebl/arcfive.c
@@ -0,0 +1,87 @@
+/*
+ * arcfive.c - stubs for RC5 - NOT a working implementation!
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "prerror.h"
+
+/******************************************/
+/*
+** RC5 symmetric block cypher -- 64-bit block size
+*/
+
+/*
+** Create a new RC5 context suitable for RC5 encryption/decryption.
+**      "key" raw key data
+**      "len" the number of bytes of key data
+**      "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC)
+**      "mode" one of NSS_RC5 or NSS_RC5_CBC
+**
+** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block
+** chaining" mode.
+*/
+RC5Context *
+RC5_CreateContext(const SECItem *key, unsigned int rounds,
+                  unsigned int wordSize, const unsigned char *iv, int mode)
+{
+    PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+    return NULL;
+}
+
+/*
+** Destroy an RC5 encryption/decryption context.
+**      "cx" the context
+**      "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+void
+RC5_DestroyContext(RC5Context *cx, PRBool freeit)
+{
+    PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
+
+/*
+** Perform RC5 encryption.
+**      "cx" the context
+**      "output" the output buffer to store the encrypted data.
+**      "outputLen" how much data is stored in "output". Set by the routine
+**         after some data is stored in output.
+**      "maxOutputLen" the maximum amount of data that can ever be
+**         stored in "output"
+**      "input" the input data
+**      "inputLen" the amount of input data
+*/
+SECStatus
+RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+    return SECFailure;
+}
+
+/*
+** Perform RC5 decryption.
+**      "cx" the context
+**      "output" the output buffer to store the decrypted data.
+**      "outputLen" how much data is stored in "output". Set by the routine
+**         after some data is stored in output.
+**      "maxOutputLen" the maximum amount of data that can ever be
+**         stored in "output"
+**      "input" the input data
+**      "inputLen" the amount of input data
+*/
+SECStatus
+RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+    return SECFailure;
+}
diff --git a/security/nss/lib/freebl/arcfour-amd64-gas.s b/security/nss/lib/freebl/arcfour-amd64-gas.s
new file mode 100644
index 000000000..7c4f5358f
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-gas.s
@@ -0,0 +1,88 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# ** ARCFOUR implementation optimized for AMD64.
+# **
+# ** The throughput achieved by this code is about 320 MBytes/sec, on
+# ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.text
+.align 16
+.globl ARCFOUR
+.type ARCFOUR,@function
+ARCFOUR:
+	pushq	%rbp
+	pushq	%rbx
+	movq	%rdi,		%rbp	# key = ARG(key)
+	movq	%rsi,		%rbx	# rbx = ARG(len)
+	movq	%rdx,		%rsi	# in = ARG(in)
+	movq	%rcx,		%rdi	# out = ARG(out)
+	movq	(%rbp),		%rcx	# x = key->x
+	movq	8(%rbp),	%rdx	# y = key->y
+	addq	$16,		%rbp	# d = key->data
+	incq	%rcx			# x++
+	andq	$255,		%rcx	# x &= 0xff
+	leaq	-8(%rbx,%rsi),	%rbx	# rbx = in+len-8
+	movq	%rbx,		%r9	# tmp = in+len-8
+	movq	0(%rbp,%rcx,8),	%rax	# tx = d[x]
+	cmpq	%rsi,		%rbx	# cmp in with in+len-8
+	jl	.Lend			# jump if (in+len-8 < in)
+
+.Lstart:
+	addq	$8,		%rsi		# increment in
+	addq	$8,		%rdi		# increment out
+
+	# generate the next 8 bytes of the rc4 stream into %r8
+	movq	$8,		%r11		# byte counter
+1:	addb	%al,		%dl		# y += tx
+	movl	0(%rbp,%rdx,8),	%ebx		# ty = d[y]
+	movl	%ebx,		0(%rbp,%rcx,8)	# d[x] = ty
+	addb	%al,		%bl		# val = ty + tx
+	movl	%eax,		0(%rbp,%rdx,8)	# d[y] = tx
+	incb	%cl				# x++		(NEXT ROUND)
+	movl	0(%rbp,%rcx,8),	%eax		# tx = d[x]	(NEXT ROUND)
+	movb	0(%rbp,%rbx,8),	%r8b		# val = d[val]
+	decb	%r11b
+	rorq	$8,		%r8		# (ror does not change ZF)
+	jnz 	1b
+
+	# xor 8 bytes
+	xorq	-8(%rsi),	%r8
+	cmpq	%r9,		%rsi		# cmp in+len-8 with in
+	movq	%r8,		-8(%rdi)
+	jle	.Lstart				# jump if (in <= in+len-8)
+
+.Lend:
+	addq	$8,		%r9		# tmp = in+len
+
+	# handle the last bytes, one by one
+1:	cmpq	%rsi,		%r9		# cmp in with in+len
+	jle	.Lfinished			# jump if (in+len <= in)
+	addb	%al,		%dl		# y += tx
+	movl	0(%rbp,%rdx,8),	%ebx		# ty = d[y]
+	movl	%ebx,		0(%rbp,%rcx,8)	# d[x] = ty
+	addb	%al,		%bl		# val = ty + tx
+	movl	%eax,		0(%rbp,%rdx,8)	# d[y] = tx
+	incb	%cl				# x++		(NEXT ROUND)
+	movl	0(%rbp,%rcx,8),	%eax		# tx = d[x]	(NEXT ROUND)
+	movb	0(%rbp,%rbx,8),	%r8b		# val = d[val]
+	xorb	(%rsi),		%r8b		# xor 1 byte
+	movb	%r8b,		(%rdi)
+	incq	%rsi				# in++
+	incq	%rdi				# out++
+	jmp 1b
+
+.Lfinished:
+	decq	%rcx				# x--
+	movb	%dl,		-8(%rbp)	# key->y = y
+	movb	%cl,		-16(%rbp)	# key->x = x
+	popq	%rbx
+	popq	%rbp
+	ret
+.L_ARCFOUR_end:
+.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
+
+# Magic indicating no need for an executable stack
+.section .note.GNU-stack,"",@progbits
+.previous
diff --git a/security/nss/lib/freebl/arcfour-amd64-masm.asm b/security/nss/lib/freebl/arcfour-amd64-masm.asm
new file mode 100644
index 000000000..1601c4f89
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-masm.asm
@@ -0,0 +1,107 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+; ** ARCFOUR implementation optimized for AMD64.
+; **
+; ** The throughput achieved by this code is about 320 MBytes/sec, on
+; ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.CODE
+
+; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen, 
+;                     const unsigned char *input, unsigned char *output);
+
+
+ARCFOUR PROC
+
+        push    rbp
+        push    rbx
+        push    rsi
+        push    rdi
+
+        mov     rbp, rcx                        ; key = ARG(key)
+        mov     rbx, rdx                        ; rbx = ARG(len)
+        mov     rsi, r8                         ; in = ARG(in)
+        mov     rdi, r9                         ; out = ARG(out)
+        mov     rcx, [rbp]                      ; x = key->x
+        mov     rdx, [rbp+8]                    ; y = key->y
+        add     rbp, 16                         ; d = key->data
+        inc     rcx                             ; x++
+        and     rcx, 0ffh                       ; x &= 0xff
+        lea     rbx, [rbx+rsi-8]                ; rbx = in+len-8
+        mov     r9, rbx                         ; tmp = in+len-8
+        mov     rax, [rbp+rcx*8]                ; tx = d[x]
+        cmp     rbx, rsi                        ; cmp in with in+len-8
+        jl      Lend                            ; jump if (in+len-8 < in)
+
+Lstart:
+        add     rsi, 8                          ; increment in
+        add     rdi, 8                          ; increment out
+
+        ;
+        ; generate the next 8 bytes of the rc4 stream into r8
+        ;
+
+        mov     r11, 8                          ; byte counter
+
+@@:
+        add     dl, al                          ; y += tx
+        mov     ebx, [rbp+rdx*8]                ; ty = d[y]
+        mov     [rbp+rcx*8], ebx                ; d[x] = ty
+        add     bl, al                          ; val = ty + tx
+        mov     [rbp+rdx*8], eax                ; d[y] = tx
+        inc     cl                              ; x++ (NEXT ROUND)
+        mov     eax, [rbp+rcx*8]                ; tx = d[x] (NEXT ROUND)
+        mov     r8b, [rbp+rbx*8]                ; val = d[val]
+        dec     r11b
+        ror     r8, 8                           ; (ror does not change ZF)
+        jnz     @b
+
+        ;
+        ; xor 8 bytes
+        ;
+
+        xor     r8, [rsi-8]
+        cmp     rsi, r9                         ; cmp in+len-8 with in
+        mov     [rdi-8], r8
+        jle     Lstart
+
+Lend:
+        add     r9, 8                           ; tmp = in+len
+
+        ;
+        ; handle the last bytes, one by one
+        ;
+
+@@:
+        cmp     r9, rsi                         ; cmp in with in+len
+        jle     Lfinished                       ; jump if (in+len <= in)
+        add     dl, al                          ; y += tx
+        mov     ebx, [rbp+rdx*8]                ; ty = d[y]
+        mov     [rbp+rcx*8], ebx                ; d[x] = ty
+        add     bl, al                          ; val = ty + tx
+        mov     [rbp+rdx*8], eax                ; d[y] = tx
+        inc     cl                              ; x++ (NEXT ROUND)
+        mov     eax, [rbp+rcx*8]                ; tx = d[x] (NEXT ROUND)
+        mov     r8b, [rbp+rbx*8]                ; val = d[val]
+        xor     r8b, [rsi]                      ; xor 1 byte
+        mov     [rdi], r8b
+        inc     rsi                             ; in++
+        inc     rdi
+        jmp     @b
+
+Lfinished:
+        dec     rcx                             ; x--
+        mov     [rbp-8], dl                     ; key->y = y
+        mov     [rbp-16], cl                    ; key->x = x
+
+        pop     rdi
+        pop     rsi
+        pop     rbx
+        pop     rbp
+        ret
+
+ARCFOUR ENDP
+
+END
diff --git a/security/nss/lib/freebl/arcfour-amd64-sun.s b/security/nss/lib/freebl/arcfour-amd64-sun.s
new file mode 100644
index 000000000..8b649f901
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-sun.s
@@ -0,0 +1,84 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/ ** ARCFOUR implementation optimized for AMD64.
+/ **
+/ ** The throughput achieved by this code is about 320 MBytes/sec, on
+/ ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.text
+.align 16
+.globl ARCFOUR
+.type ARCFOUR,@function
+ARCFOUR:
+	pushq	%rbp
+	pushq	%rbx
+	movq	%rdi,		%rbp	/ key = ARG(key)
+	movq	%rsi,		%rbx	/ rbx = ARG(len)
+	movq	%rdx,		%rsi	/ in = ARG(in)
+	movq	%rcx,		%rdi	/ out = ARG(out)
+	movq	(%rbp),		%rcx	/ x = key->x
+	movq	8(%rbp),	%rdx	/ y = key->y
+	addq	$16,		%rbp	/ d = key->data
+	incq	%rcx			/ x++
+	andq	$255,		%rcx	/ x &= 0xff
+	leaq	-8(%rbx,%rsi),	%rbx	/ rbx = in+len-8
+	movq	%rbx,		%r9	/ tmp = in+len-8
+	movq	0(%rbp,%rcx,8),	%rax	/ tx = d[x]
+	cmpq	%rsi,		%rbx	/ cmp in with in+len-8
+	jl	.Lend			/ jump if (in+len-8 < in)
+
+.Lstart:
+	addq	$8,		%rsi		/ increment in
+	addq	$8,		%rdi		/ increment out
+
+	/ generate the next 8 bytes of the rc4 stream into %r8
+	movq	$8,		%r11		/ byte counter
+1:	addb	%al,		%dl		/ y += tx
+	movl	0(%rbp,%rdx,8),	%ebx		/ ty = d[y]
+	movl	%ebx,		0(%rbp,%rcx,8)	/ d[x] = ty
+	addb	%al,		%bl		/ val = ty + tx
+	movl	%eax,		0(%rbp,%rdx,8)	/ d[y] = tx
+	incb	%cl				/ x++		(NEXT ROUND)
+	movl	0(%rbp,%rcx,8),	%eax		/ tx = d[x]	(NEXT ROUND)
+	movb	0(%rbp,%rbx,8),	%r8b		/ val = d[val]
+	decb	%r11b
+	rorq	$8,		%r8		/ (ror does not change ZF)
+	jnz 	1b
+
+	/ xor 8 bytes
+	xorq	-8(%rsi),	%r8
+	cmpq	%r9,		%rsi		/ cmp in+len-8 with in
+	movq	%r8,		-8(%rdi)
+	jle	.Lstart				/ jump if (in <= in+len-8)
+
+.Lend:
+	addq	$8,		%r9		/ tmp = in+len
+
+	/ handle the last bytes, one by one
+1:	cmpq	%rsi,		%r9		/ cmp in with in+len
+	jle	.Lfinished			/ jump if (in+len <= in)
+	addb	%al,		%dl		/ y += tx
+	movl	0(%rbp,%rdx,8),	%ebx		/ ty = d[y]
+	movl	%ebx,		0(%rbp,%rcx,8)	/ d[x] = ty
+	addb	%al,		%bl		/ val = ty + tx
+	movl	%eax,		0(%rbp,%rdx,8)	/ d[y] = tx
+	incb	%cl				/ x++		(NEXT ROUND)
+	movl	0(%rbp,%rcx,8),	%eax		/ tx = d[x]	(NEXT ROUND)
+	movb	0(%rbp,%rbx,8),	%r8b		/ val = d[val]
+	xorb	(%rsi),		%r8b		/ xor 1 byte
+	movb	%r8b,		(%rdi)
+	incq	%rsi				/ in++
+	incq	%rdi				/ out++
+	jmp 1b
+
+.Lfinished:
+	decq	%rcx				/ x--
+	movb	%dl,		-8(%rbp)	/ key->y = y
+	movb	%cl,		-16(%rbp)	/ key->x = x
+	popq	%rbx
+	popq	%rbp
+	ret
+.L_ARCFOUR_end:
+.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
diff --git a/security/nss/lib/freebl/arcfour.c b/security/nss/lib/freebl/arcfour.c
new file mode 100644
index 000000000..e37b45843
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour.c
@@ -0,0 +1,594 @@
+/* arcfour.c - the arc four algorithm.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+
+/* Architecture-dependent defines */
+
+#if defined(SOLARIS) || defined(HPUX) || defined(NSS_X86) || \
+    defined(_WIN64)
+/* Convert the byte-stream to a word-stream */
+#define CONVERT_TO_WORDS
+#endif
+
+#if defined(AIX) || defined(OSF1) || defined(NSS_BEVAND_ARCFOUR)
+/* Treat array variables as words, not bytes, on CPUs that take
+ * much longer to write bytes than to write words, or when using
+ * assembler code that required it.
+ */
+#define USE_WORD
+#endif
+
+#if defined(IS_64) || defined(NSS_BEVAND_ARCFOUR)
+typedef PRUint64 WORD;
+#else
+typedef PRUint32 WORD;
+#endif
+#define WORDSIZE sizeof(WORD)
+
+#if defined(USE_WORD)
+typedef WORD Stype;
+#else
+typedef PRUint8 Stype;
+#endif
+
+#define ARCFOUR_STATE_SIZE 256
+
+#define MASK1BYTE (WORD)(0xff)
+
+#define SWAP(a, b) \
+    tmp = a;       \
+    a = b;         \
+    b = tmp;
+
+/*
+ * State information for stream cipher.
+ */
+struct RC4ContextStr {
+#if defined(NSS_ARCFOUR_IJ_B4_S) || defined(NSS_BEVAND_ARCFOUR)
+    Stype i;
+    Stype j;
+    Stype S[ARCFOUR_STATE_SIZE];
+#else
+    Stype S[ARCFOUR_STATE_SIZE];
+    Stype i;
+    Stype j;
+#endif
+};
+
+/*
+ * array indices [0..255] to initialize cx->S array (faster than loop).
+ */
+static const Stype Kinit[256] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+    0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+    0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+    0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+    0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+    0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+    0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+RC4Context *
+RC4_AllocateContext(void)
+{
+    return PORT_ZNew(RC4Context);
+}
+
+SECStatus
+RC4_InitContext(RC4Context *cx, const unsigned char *key, unsigned int len,
+                const unsigned char *unused1, int unused2,
+                unsigned int unused3, unsigned int unused4)
+{
+    unsigned int i;
+    PRUint8 j, tmp;
+    PRUint8 K[256];
+    PRUint8 *L;
+
+    /* verify the key length. */
+    PORT_Assert(len > 0 && len < ARCFOUR_STATE_SIZE);
+    if (len == 0 || len >= ARCFOUR_STATE_SIZE) {
+        PORT_SetError(SEC_ERROR_BAD_KEY);
+        return SECFailure;
+    }
+    if (cx == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    /* Initialize the state using array indices. */
+    memcpy(cx->S, Kinit, sizeof cx->S);
+    /* Fill in K repeatedly with values from key. */
+    L = K;
+    for (i = sizeof K; i > len; i -= len) {
+        memcpy(L, key, len);
+        L += len;
+    }
+    memcpy(L, key, i);
+    /* Stir the state of the generator.  At this point it is assumed
+     * that the key is the size of the state buffer.  If this is not
+     * the case, the key bytes are repeated to fill the buffer.
+     */
+    j = 0;
+#define ARCFOUR_STATE_STIR(ii) \
+    j = j + cx->S[ii] + K[ii]; \
+    SWAP(cx->S[ii], cx->S[j]);
+    for (i = 0; i < ARCFOUR_STATE_SIZE; i++) {
+        ARCFOUR_STATE_STIR(i);
+    }
+    cx->i = 0;
+    cx->j = 0;
+    return SECSuccess;
+}
+
+/*
+ * Initialize a new generator.
+ */
+RC4Context *
+RC4_CreateContext(const unsigned char *key, int len)
+{
+    RC4Context *cx = RC4_AllocateContext();
+    if (cx) {
+        SECStatus rv = RC4_InitContext(cx, key, len, NULL, 0, 0, 0);
+        if (rv != SECSuccess) {
+            PORT_ZFree(cx, sizeof(*cx));
+            cx = NULL;
+        }
+    }
+    return cx;
+}
+
+void
+RC4_DestroyContext(RC4Context *cx, PRBool freeit)
+{
+    if (freeit)
+        PORT_ZFree(cx, sizeof(*cx));
+}
+
+#if defined(NSS_BEVAND_ARCFOUR)
+extern void ARCFOUR(RC4Context *cx, WORD inputLen,
+                    const unsigned char *input, unsigned char *output);
+#else
+/*
+ * Generate the next byte in the stream.
+ */
+#define ARCFOUR_NEXT_BYTE() \
+    tmpSi = cx->S[++tmpi];  \
+    tmpj += tmpSi;          \
+    tmpSj = cx->S[tmpj];    \
+    cx->S[tmpi] = tmpSj;    \
+    cx->S[tmpj] = tmpSi;    \
+    t = tmpSi + tmpSj;
+
+#ifdef CONVERT_TO_WORDS
+/*
+ * Straight ARCFOUR op.  No optimization.
+ */
+static SECStatus
+rc4_no_opt(RC4Context *cx, unsigned char *output,
+           unsigned int *outputLen, unsigned int maxOutputLen,
+           const unsigned char *input, unsigned int inputLen)
+{
+    PRUint8 t;
+    Stype tmpSi, tmpSj;
+    register PRUint8 tmpi = cx->i;
+    register PRUint8 tmpj = cx->j;
+    unsigned int index;
+    PORT_Assert(maxOutputLen >= inputLen);
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    for (index = 0; index < inputLen; index++) {
+        /* Generate next byte from stream. */
+        ARCFOUR_NEXT_BYTE();
+        /* output = next stream byte XOR next input byte */
+        output[index] = cx->S[t] ^ input[index];
+    }
+    *outputLen = inputLen;
+    cx->i = tmpi;
+    cx->j = tmpj;
+    return SECSuccess;
+}
+
+#else
+/* !CONVERT_TO_WORDS */
+
+/*
+ * Byte-at-a-time ARCFOUR, unrolling the loop into 8 pieces.
+ */
+static SECStatus
+rc4_unrolled(RC4Context *cx, unsigned char *output,
+             unsigned int *outputLen, unsigned int maxOutputLen,
+             const unsigned char *input, unsigned int inputLen)
+{
+    PRUint8 t;
+    Stype tmpSi, tmpSj;
+    register PRUint8 tmpi = cx->i;
+    register PRUint8 tmpj = cx->j;
+    int index;
+    PORT_Assert(maxOutputLen >= inputLen);
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    for (index = inputLen / 8; index-- > 0; input += 8, output += 8) {
+        ARCFOUR_NEXT_BYTE();
+        output[0] = cx->S[t] ^ input[0];
+        ARCFOUR_NEXT_BYTE();
+        output[1] = cx->S[t] ^ input[1];
+        ARCFOUR_NEXT_BYTE();
+        output[2] = cx->S[t] ^ input[2];
+        ARCFOUR_NEXT_BYTE();
+        output[3] = cx->S[t] ^ input[3];
+        ARCFOUR_NEXT_BYTE();
+        output[4] = cx->S[t] ^ input[4];
+        ARCFOUR_NEXT_BYTE();
+        output[5] = cx->S[t] ^ input[5];
+        ARCFOUR_NEXT_BYTE();
+        output[6] = cx->S[t] ^ input[6];
+        ARCFOUR_NEXT_BYTE();
+        output[7] = cx->S[t] ^ input[7];
+    }
+    index = inputLen % 8;
+    if (index) {
+        input += index;
+        output += index;
+        switch (index) {
+            case 7:
+                ARCFOUR_NEXT_BYTE();
+                output[-7] = cx->S[t] ^ input[-7]; /* FALLTHRU */
+            case 6:
+                ARCFOUR_NEXT_BYTE();
+                output[-6] = cx->S[t] ^ input[-6]; /* FALLTHRU */
+            case 5:
+                ARCFOUR_NEXT_BYTE();
+                output[-5] = cx->S[t] ^ input[-5]; /* FALLTHRU */
+            case 4:
+                ARCFOUR_NEXT_BYTE();
+                output[-4] = cx->S[t] ^ input[-4]; /* FALLTHRU */
+            case 3:
+                ARCFOUR_NEXT_BYTE();
+                output[-3] = cx->S[t] ^ input[-3]; /* FALLTHRU */
+            case 2:
+                ARCFOUR_NEXT_BYTE();
+                output[-2] = cx->S[t] ^ input[-2]; /* FALLTHRU */
+            case 1:
+                ARCFOUR_NEXT_BYTE();
+                output[-1] = cx->S[t] ^ input[-1]; /* FALLTHRU */
+            default:
+                /* FALLTHRU */
+                ; /* hp-ux build breaks without this */
+        }
+    }
+    cx->i = tmpi;
+    cx->j = tmpj;
+    *outputLen = inputLen;
+    return SECSuccess;
+}
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT4BYTES_L(n)               \
+    ARCFOUR_NEXT_BYTE();                      \
+    streamWord |= (WORD)cx->S[t] << (n);      \
+    ARCFOUR_NEXT_BYTE();                      \
+    streamWord |= (WORD)cx->S[t] << (n + 8);  \
+    ARCFOUR_NEXT_BYTE();                      \
+    streamWord |= (WORD)cx->S[t] << (n + 16); \
+    ARCFOUR_NEXT_BYTE();                      \
+    streamWord |= (WORD)cx->S[t] << (n + 24);
+#else
+#define ARCFOUR_NEXT4BYTES_B(n)               \
+    ARCFOUR_NEXT_BYTE();                      \
+    streamWord |= (WORD)cx->S[t] << (n + 24); \
+    ARCFOUR_NEXT_BYTE();                      \
+    streamWord |= (WORD)cx->S[t] << (n + 16); \
+    ARCFOUR_NEXT_BYTE();                      \
+    streamWord |= (WORD)cx->S[t] << (n + 8);  \
+    ARCFOUR_NEXT_BYTE();                      \
+    streamWord |= (WORD)cx->S[t] << (n);
+#endif
+
+#if (defined(IS_64) && !defined(__sparc)) || defined(NSS_USE_64)
+/* 64-bit wordsize */
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT_WORD()       \
+    {                             \
+        streamWord = 0;           \
+        ARCFOUR_NEXT4BYTES_L(0);  \
+        ARCFOUR_NEXT4BYTES_L(32); \
+    }
+#else
+#define ARCFOUR_NEXT_WORD()       \
+    {                             \
+        streamWord = 0;           \
+        ARCFOUR_NEXT4BYTES_B(32); \
+        ARCFOUR_NEXT4BYTES_B(0);  \
+    }
+#endif
+#else
+/* 32-bit wordsize */
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT_WORD()      \
+    {                            \
+        streamWord = 0;          \
+        ARCFOUR_NEXT4BYTES_L(0); \
+    }
+#else
+#define ARCFOUR_NEXT_WORD()      \
+    {                            \
+        streamWord = 0;          \
+        ARCFOUR_NEXT4BYTES_B(0); \
+    }
+#endif
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define RSH <<
+#define LSH >>
+#else
+#define RSH >>
+#define LSH <<
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define LEFTMOST_BYTE_SHIFT 0
+#define NEXT_BYTE_SHIFT(shift) shift + 8
+#else
+#define LEFTMOST_BYTE_SHIFT 8 * (WORDSIZE - 1)
+#define NEXT_BYTE_SHIFT(shift) shift - 8
+#endif
+
+#ifdef CONVERT_TO_WORDS
+static SECStatus
+rc4_wordconv(RC4Context *cx, unsigned char *output,
+             unsigned int *outputLen, unsigned int maxOutputLen,
+             const unsigned char *input, unsigned int inputLen)
+{
+    PR_STATIC_ASSERT(sizeof(PRUword) == sizeof(ptrdiff_t));
+    unsigned int inOffset = (PRUword)input % WORDSIZE;
+    unsigned int outOffset = (PRUword)output % WORDSIZE;
+    register WORD streamWord;
+    register const WORD *pInWord;
+    register WORD *pOutWord;
+    register WORD inWord, nextInWord;
+    PRUint8 t;
+    register Stype tmpSi, tmpSj;
+    register PRUint8 tmpi = cx->i;
+    register PRUint8 tmpj = cx->j;
+    unsigned int bufShift, invBufShift;
+    unsigned int i;
+    const unsigned char *finalIn;
+    unsigned char *finalOut;
+
+    PORT_Assert(maxOutputLen >= inputLen);
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    if (inputLen < 2 * WORDSIZE) {
+        /* Ignore word conversion, do byte-at-a-time */
+        return rc4_no_opt(cx, output, outputLen, maxOutputLen, input, inputLen);
+    }
+    *outputLen = inputLen;
+    pInWord = (const WORD *)(input - inOffset);
+    pOutWord = (WORD *)(output - outOffset);
+    if (inOffset <= outOffset) {
+        bufShift = 8 * (outOffset - inOffset);
+        invBufShift = 8 * WORDSIZE - bufShift;
+    } else {
+        invBufShift = 8 * (inOffset - outOffset);
+        bufShift = 8 * WORDSIZE - invBufShift;
+    }
+    /*****************************************************************/
+    /* Step 1:                                                       */
+    /* If the first output word is partial, consume the bytes in the */
+    /* first partial output word by loading one or two words of      */
+    /* input and shifting them accordingly.  Otherwise, just load    */
+    /* in the first word of input.  At the end of this block, at     */
+    /* least one partial word of input should ALWAYS be loaded.      */
+    /*****************************************************************/
+    if (outOffset) {
+        unsigned int byteCount = WORDSIZE - outOffset;
+        for (i = 0; i < byteCount; i++) {
+            ARCFOUR_NEXT_BYTE();
+            output[i] = cx->S[t] ^ input[i];
+        }
+        /* Consumed byteCount bytes of input */
+        inputLen -= byteCount;
+        pInWord++;
+
+        /* move to next word of output */
+        pOutWord++;
+
+        /* If buffers are relatively misaligned, shift the bytes in inWord
+         * to be aligned to the output buffer.
+         */
+        if (inOffset < outOffset) {
+            /* The first input word (which may be partial) has more bytes
+             * than needed.  Copy the remainder to inWord.
+             */
+            unsigned int shift = LEFTMOST_BYTE_SHIFT;
+            inWord = 0;
+            for (i = 0; i < outOffset - inOffset; i++) {
+                inWord |= (WORD)input[byteCount + i] << shift;
+                shift = NEXT_BYTE_SHIFT(shift);
+            }
+        } else if (inOffset > outOffset) {
+            /* Consumed some bytes in the second input word.  Copy the
+             * remainder to inWord.
+             */
+            inWord = *pInWord++;
+            inWord = inWord LSH invBufShift;
+        } else {
+            inWord = 0;
+        }
+    } else {
+        /* output is word-aligned */
+        if (inOffset) {
+            /* Input is not word-aligned.  The first word load of input
+             * will not produce a full word of input bytes, so one word
+             * must be pre-loaded.  The main loop below will load in the
+             * next input word and shift some of its bytes into inWord
+             * in order to create a full input word.  Note that the main
+             * loop must execute at least once because the input must
+             * be at least two words.
+             */
+            unsigned int shift = LEFTMOST_BYTE_SHIFT;
+            inWord = 0;
+            for (i = 0; i < WORDSIZE - inOffset; i++) {
+                inWord |= (WORD)input[i] << shift;
+                shift = NEXT_BYTE_SHIFT(shift);
+            }
+            pInWord++;
+        } else {
+            /* Input is word-aligned.  The first word load of input
+             * will produce a full word of input bytes, so nothing
+             * needs to be loaded here.
+             */
+            inWord = 0;
+        }
+    }
+    /*****************************************************************/
+    /* Step 2: main loop                                             */
+    /* At this point the output buffer is word-aligned.  Any unused  */
+    /* bytes from above will be in inWord (shifted correctly).  If   */
+    /* the input buffer is unaligned relative to the output buffer,  */
+    /* shifting has to be done.                                      */
+    /*****************************************************************/
+    if (bufShift) {
+        /* preloadedByteCount is the number of input bytes pre-loaded
+         * in inWord.
+         */
+        unsigned int preloadedByteCount = bufShift / 8;
+        for (; inputLen >= preloadedByteCount + WORDSIZE;
+             inputLen -= WORDSIZE) {
+            nextInWord = *pInWord++;
+            inWord |= nextInWord RSH bufShift;
+            nextInWord = nextInWord LSH invBufShift;
+            ARCFOUR_NEXT_WORD();
+            *pOutWord++ = inWord ^ streamWord;
+            inWord = nextInWord;
+        }
+        if (inputLen == 0) {
+            /* Nothing left to do. */
+            cx->i = tmpi;
+            cx->j = tmpj;
+            return SECSuccess;
+        }
+        finalIn = (const unsigned char *)pInWord - preloadedByteCount;
+    } else {
+        for (; inputLen >= WORDSIZE; inputLen -= WORDSIZE) {
+            inWord = *pInWord++;
+            ARCFOUR_NEXT_WORD();
+            *pOutWord++ = inWord ^ streamWord;
+        }
+        if (inputLen == 0) {
+            /* Nothing left to do. */
+            cx->i = tmpi;
+            cx->j = tmpj;
+            return SECSuccess;
+        }
+        finalIn = (const unsigned char *)pInWord;
+    }
+    /*****************************************************************/
+    /* Step 3:                                                       */
+    /* Do the remaining partial word of input one byte at a time.    */
+    /*****************************************************************/
+    finalOut = (unsigned char *)pOutWord;
+    for (i = 0; i < inputLen; i++) {
+        ARCFOUR_NEXT_BYTE();
+        finalOut[i] = cx->S[t] ^ finalIn[i];
+    }
+    cx->i = tmpi;
+    cx->j = tmpj;
+    return SECSuccess;
+}
+#endif
+#endif /* NSS_BEVAND_ARCFOUR */
+
+SECStatus
+RC4_Encrypt(RC4Context *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    PORT_Assert(maxOutputLen >= inputLen);
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+#if defined(NSS_BEVAND_ARCFOUR)
+    ARCFOUR(cx, inputLen, input, output);
+    *outputLen = inputLen;
+    return SECSuccess;
+#elif defined(CONVERT_TO_WORDS)
+    /* Convert the byte-stream to a word-stream */
+    return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen);
+#else
+    /* Operate on bytes, but unroll the main loop */
+    return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen);
+#endif
+}
+
+SECStatus
+RC4_Decrypt(RC4Context *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    PORT_Assert(maxOutputLen >= inputLen);
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+/* decrypt and encrypt are same operation. */
+#if defined(NSS_BEVAND_ARCFOUR)
+    ARCFOUR(cx, inputLen, input, output);
+    *outputLen = inputLen;
+    return SECSuccess;
+#elif defined(CONVERT_TO_WORDS)
+    /* Convert the byte-stream to a word-stream */
+    return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen);
+#else
+    /* Operate on bytes, but unroll the main loop */
+    return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen);
+#endif
+}
+
+#undef CONVERT_TO_WORDS
+#undef USE_WORD
diff --git a/security/nss/lib/freebl/blapi.h b/security/nss/lib/freebl/blapi.h
new file mode 100644
index 000000000..e5a6cf30e
--- /dev/null
+++ b/security/nss/lib/freebl/blapi.h
@@ -0,0 +1,1625 @@
+/*
+ * blapi.h - public prototypes for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPI_H_
+#define _BLAPI_H_
+
+#include "blapit.h"
+#include "hasht.h"
+#include "alghmac.h"
+
+SEC_BEGIN_PROTOS
+
+/*
+** RSA encryption/decryption. When encrypting/decrypting the output
+** buffer must be at least the size of the public key modulus.
+*/
+
+extern SECStatus BL_Init(void);
+
+/*
+** Generate and return a new RSA public and private key.
+**  Both keys are encoded in a single RSAPrivateKey structure.
+**  "cx" is the random number generator context
+**  "keySizeInBits" is the size of the key to be generated, in bits.
+**     512, 1024, etc.
+**  "publicExponent" when not NULL is a pointer to some data that
+**     represents the public exponent to use. The data is a byte
+**     encoded integer, in "big endian" order.
+*/
+extern RSAPrivateKey *RSA_NewKey(int keySizeInBits,
+                                 SECItem *publicExponent);
+
+/*
+** Perform a raw public-key operation
+**  Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PublicKeyOp(RSAPublicKey *key,
+                                 unsigned char *output,
+                                 const unsigned char *input);
+
+/*
+** Perform a raw private-key operation
+**  Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PrivateKeyOp(RSAPrivateKey *key,
+                                  unsigned char *output,
+                                  const unsigned char *input);
+
+/*
+** Perform a raw private-key operation, and check the parameters used in
+** the operation for validity by performing a test operation first.
+**  Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+                                               unsigned char *output,
+                                               const unsigned char *input);
+
+/*
+** Perform a check of private key parameters for consistency.
+*/
+extern SECStatus RSA_PrivateKeyCheck(const RSAPrivateKey *key);
+
+/*
+** Given only minimal private key parameters, fill in the rest of the
+** parameters.
+**
+**
+** All the entries, including those supplied by the caller, will be
+** overwritten with data alocated out of the arena.
+**
+** If no arena is supplied, one will be created.
+**
+** The following fields must be supplied in order for this function
+** to succeed:
+**   one of either publicExponent or privateExponent
+**   two more of the following 5 parameters (not counting the above).
+**      modulus (n)
+**      prime1  (p)
+**      prime2  (q)
+**      publicExponent (e)
+**      privateExponent (d)
+**
+** NOTE: if only the publicExponent, privateExponent, and one prime is given,
+** then there may be more than one RSA key that matches that combination. If
+** we find 2 possible valid keys that meet this criteria, we return an error.
+** If we return the wrong key, and the original modulus is compared to the
+** new modulus, both can be factored by calculateing gcd(n_old,n_new) to get
+** the common prime.
+**
+** NOTE: in some cases the publicExponent must be less than 2^23 for this
+** function to work correctly. (The case where we have only one of: modulus
+** prime1 and prime2).
+**
+** All parameters will be replaced in the key structure with new parameters
+** allocated out of the arena. There is no attempt to free the old structures.
+** prime1 will always be greater than prime2 (even if the caller supplies the
+** smaller prime as prime1 or the larger prime as prime2). The parameters are
+** not overwritten on failure.
+**
+** While the remaining Chinese remainder theorem parameters (dp,dp, and qinv)
+** can also be used in reconstructing the private key, they are currently
+** ignored in this implementation.
+*/
+extern SECStatus RSA_PopulatePrivateKey(RSAPrivateKey *key);
+
+/********************************************************************
+** RSA algorithm
+*/
+
+/********************************************************************
+** Raw signing/encryption/decryption operations.
+**
+** No padding or formatting will be applied.
+** inputLen MUST be equivalent to the modulus size (in bytes).
+*/
+extern SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+            unsigned char *output,
+            unsigned int *outputLen,
+            unsigned int maxOutputLen,
+            const unsigned char *input,
+            unsigned int inputLen);
+
+extern SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+                 const unsigned char *sig,
+                 unsigned int sigLen,
+                 const unsigned char *hash,
+                 unsigned int hashLen);
+
+extern SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+                        unsigned char *data,
+                        unsigned int *dataLen,
+                        unsigned int maxDataLen,
+                        const unsigned char *sig,
+                        unsigned int sigLen);
+
+extern SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+               unsigned char *output,
+               unsigned int *outputLen,
+               unsigned int maxOutputLen,
+               const unsigned char *input,
+               unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+               unsigned char *output,
+               unsigned int *outputLen,
+               unsigned int maxOutputLen,
+               const unsigned char *input,
+               unsigned int inputLen);
+
+/********************************************************************
+** RSAES-OAEP encryption/decryption, as defined in RFC 3447, Section 7.1.
+**
+** Note: Only MGF1 is supported as the mask generation function. It will be
+** used with maskHashAlg as the inner hash function.
+**
+** Unless performing Known Answer Tests, "seed" should be NULL, indicating that
+** freebl should generate a random value. Otherwise, it should be an octet
+** string of seedLen bytes, which should be the same size as the output of
+** hashAlg.
+*/
+extern SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *label,
+                unsigned int labelLen,
+                const unsigned char *seed,
+                unsigned int seedLen,
+                unsigned char *output,
+                unsigned int *outputLen,
+                unsigned int maxOutputLen,
+                const unsigned char *input,
+                unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *label,
+                unsigned int labelLen,
+                unsigned char *output,
+                unsigned int *outputLen,
+                unsigned int maxOutputLen,
+                const unsigned char *input,
+                unsigned int inputLen);
+
+/********************************************************************
+** RSAES-PKCS1-v1_5 encryption/decryption, as defined in RFC 3447, Section 7.2.
+*/
+extern SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+                 unsigned char *output,
+                 unsigned int *outputLen,
+                 unsigned int maxOutputLen,
+                 const unsigned char *input,
+                 unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+                 unsigned char *output,
+                 unsigned int *outputLen,
+                 unsigned int maxOutputLen,
+                 const unsigned char *input,
+                 unsigned int inputLen);
+
+/********************************************************************
+** RSASSA-PSS signing/verifying, as defined in RFC 3447, Section 8.1.
+**
+** Note: Only MGF1 is supported as the mask generation function. It will be
+** used with maskHashAlg as the inner hash function.
+**
+** Unless performing Known Answer Tests, "salt" should be NULL, indicating that
+** freebl should generate a random value.
+*/
+extern SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+            HASH_HashType hashAlg,
+            HASH_HashType maskHashAlg,
+            const unsigned char *salt,
+            unsigned int saltLen,
+            unsigned char *output,
+            unsigned int *outputLen,
+            unsigned int maxOutputLen,
+            const unsigned char *input,
+            unsigned int inputLen);
+
+extern SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+                 HASH_HashType hashAlg,
+                 HASH_HashType maskHashAlg,
+                 unsigned int saltLen,
+                 const unsigned char *sig,
+                 unsigned int sigLen,
+                 const unsigned char *hash,
+                 unsigned int hashLen);
+
+/********************************************************************
+** RSASSA-PKCS1-v1_5 signing/verifying, as defined in RFC 3447, Section 8.2.
+**
+** These functions expect as input to be the raw value to be signed. For most
+** cases using PKCS1-v1_5, this should be the value of T, the DER-encoded
+** DigestInfo structure defined in Section 9.2, Step 2.
+** Note: This can also be used for signatures that use PKCS1-v1_5 padding, such
+** as the signatures used in SSL/TLS, which sign a raw hash.
+*/
+extern SECStatus
+RSA_Sign(RSAPrivateKey *key,
+         unsigned char *output,
+         unsigned int *outputLen,
+         unsigned int maxOutputLen,
+         const unsigned char *data,
+         unsigned int dataLen);
+
+extern SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+              const unsigned char *sig,
+              unsigned int sigLen,
+              const unsigned char *data,
+              unsigned int dataLen);
+
+extern SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+                     unsigned char *output,
+                     unsigned int *outputLen,
+                     unsigned int maxOutputLen,
+                     const unsigned char *sig,
+                     unsigned int sigLen);
+
+/********************************************************************
+** DSA signing algorithm
+*/
+
+/* Generate a new random value within the interval [2, q-1].
+*/
+extern SECStatus DSA_NewRandom(PLArenaPool *arena, const SECItem *q,
+                               SECItem *random);
+
+/*
+** Generate and return a new DSA public and private key pair,
+**  both of which are encoded into a single DSAPrivateKey struct.
+**  "params" is a pointer to the PQG parameters for the domain
+**  Uses a random seed.
+*/
+extern SECStatus DSA_NewKey(const PQGParams *params,
+                            DSAPrivateKey **privKey);
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input,  signature->len == size of buffer to hold signature.
+**            digest->len    == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+extern SECStatus DSA_SignDigest(DSAPrivateKey *key,
+                                SECItem *signature,
+                                const SECItem *digest);
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input,  signature->len == size of buffer to hold signature.
+**            digest->len    == size of digest.
+*/
+extern SECStatus DSA_VerifyDigest(DSAPublicKey *key,
+                                  const SECItem *signature,
+                                  const SECItem *digest);
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes long */
+extern SECStatus DSA_NewKeyFromSeed(const PQGParams *params,
+                                    const unsigned char *seed,
+                                    DSAPrivateKey **privKey);
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes. */
+extern SECStatus DSA_SignDigestWithSeed(DSAPrivateKey *key,
+                                        SECItem *signature,
+                                        const SECItem *digest,
+                                        const unsigned char *seed);
+
+/******************************************************
+** Diffie Helman key exchange algorithm
+*/
+
+/* Generates parameters for Diffie-Helman key generation.
+**  primeLen is the length in bytes of prime P to be generated.
+*/
+extern SECStatus DH_GenParam(int primeLen, DHParams **params);
+
+/* Generates a public and private key, both of which are encoded in a single
+**  DHPrivateKey struct. Params is input, privKey are output.
+**  This is Phase 1 of Diffie Hellman.
+*/
+extern SECStatus DH_NewKey(DHParams *params,
+                           DHPrivateKey **privKey);
+
+/*
+** DH_Derive does the Diffie-Hellman phase 2 calculation, using the
+** other party's publicValue, and the prime and our privateValue.
+** maxOutBytes is the requested length of the generated secret in bytes.
+** A zero value means produce a value of any length up to the size of
+** the prime.   If successful, derivedSecret->data is set
+** to the address of the newly allocated buffer containing the derived
+** secret, and derivedSecret->len is the size of the secret produced.
+** The size of the secret produced will depend on the value of outBytes.
+** If outBytes is 0, the key length will be all the significant bytes of
+** the derived secret (leading zeros are dropped). This length could be less
+** than the length of the prime. If outBytes is nonzero, the length of the
+** produced key will be outBytes long. If the key is truncated, the most
+** significant bytes are truncated. If it is expanded, zero bytes are added
+** at the beginning.
+** It is the caller's responsibility to free the allocated buffer
+** containing the derived secret.
+*/
+extern SECStatus DH_Derive(SECItem *publicValue,
+                           SECItem *prime,
+                           SECItem *privateValue,
+                           SECItem *derivedSecret,
+                           unsigned int outBytes);
+
+/*
+** KEA_CalcKey returns octet string with the private key for a dual
+** Diffie-Helman  key generation as specified for government key exchange.
+*/
+extern SECStatus KEA_Derive(SECItem *prime,
+                            SECItem *public1,
+                            SECItem *public2,
+                            SECItem *private1,
+                            SECItem *private2,
+                            SECItem *derivedSecret);
+
+/*
+ * verify that a KEA or DSA public key is a valid key for this prime and
+ * subprime domain.
+ */
+extern PRBool KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime);
+
+/****************************************
+ * J-PAKE key transport
+ */
+
+/* Given gx == g^x, create a Schnorr zero-knowledge proof for the value x
+ * using the specified hash algorithm and signer ID. The signature is
+ * returned in the values gv and r. testRandom must be NULL for a PRNG
+ * generated random committment to be used in the sigature. When testRandom
+ * is non-NULL, that value must contain a value in the subgroup q; that
+ * value will be used instead of a PRNG-generated committment in order to
+ * facilitate known-answer tests.
+ *
+ * If gxIn is non-NULL then it must contain a pre-computed value of g^x that
+ * will be used by the function; in this case, the gxOut parameter must be NULL.
+ * If the gxIn parameter is NULL then gxOut must be non-NULL; in this case
+ * gxOut will contain the value g^x on output.
+ *
+ * gx (if not supplied by the caller), gv, and r will be allocated in the arena.
+ * The arena is *not* optional so do not pass NULL for the arena parameter.
+ * The arena should be zeroed when it is freed.
+ */
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+           const SECItem *signerID, const SECItem *x,
+           const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+           SECItem *gv, SECItem *r);
+
+/* Given gx == g^x, verify the Schnorr zero-knowledge proof (gv, r) for the
+ * value x using the specified hash algorithm and signer ID.
+ *
+ * The arena is *not* optional so do not pass NULL for the arena parameter.
+ */
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg,
+             HASH_HashType hashType, const SECItem *signerID,
+             const SECItem *peerID, const SECItem *gx,
+             const SECItem *gv, const SECItem *r);
+
+/* Call before round 2 with x2, s, and x2s all non-NULL. This will calculate
+ * base = g^(x1+x3+x4) (mod p) and x2s = x2*s (mod q). The values to send in
+ * round 2 (A and the proof of knowledge of x2s) can then be calculated with
+ * JPAKE_Sign using pqg->base = base and x = x2s.
+ *
+ * Call after round 2 with x2, s, and x2s all NULL, and passing (gx1, gx2, gx3)
+ * instead of (gx1, gx3, gx4). This will calculate base = g^(x1+x2+x3). Then call
+ * JPAKE_Verify with pqg->base = base and then JPAKE_Final.
+ *
+ * base and x2s will be allocated in the arena. The arena is *not* optional so
+ * do not pass NULL for the arena parameter. The arena should be zeroed when it
+ * is freed.
+*/
+SECStatus
+JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+             const SECItem *gx1, const SECItem *gx3, const SECItem *gx4,
+             SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s);
+
+/* K = (B/g^(x2*x4*s))^x2 (mod p)
+ *
+ * K will be allocated in the arena. The arena is *not* optional so do not pass
+ * NULL for the arena parameter. The arena should be zeroed when it is freed.
+ */
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+            const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+            const SECItem *B, SECItem *K);
+
+/******************************************************
+** Elliptic Curve algorithms
+*/
+
+/* Generates a public and private key, both of which are encoded
+** in a single ECPrivateKey struct. Params is input, privKey are
+** output.
+*/
+extern SECStatus EC_NewKey(ECParams *params,
+                           ECPrivateKey **privKey);
+
+extern SECStatus EC_NewKeyFromSeed(ECParams *params,
+                                   ECPrivateKey **privKey,
+                                   const unsigned char *seed,
+                                   int seedlen);
+
+/* Validates an EC public key as described in Section 5.2.2 of
+ * X9.62. Such validation prevents against small subgroup attacks
+ * when the ECDH primitive is used with the cofactor.
+ */
+extern SECStatus EC_ValidatePublicKey(ECParams *params,
+                                      SECItem *publicValue);
+
+/*
+** ECDH_Derive performs a scalar point multiplication of a point
+** representing a (peer's) public key and a large integer representing
+** a private key (its own). Both keys must use the same elliptic curve
+** parameters. If the withCofactor parameter is true, the
+** multiplication also uses the cofactor associated with the curve
+** parameters.  The output of this scheme is the x-coordinate of the
+** resulting point. If successful, derivedSecret->data is set to the
+** address of the newly allocated buffer containing the derived
+** secret, and derivedSecret->len is the size of the secret
+** produced. It is the caller's responsibility to free the allocated
+** buffer containing the derived secret.
+*/
+extern SECStatus ECDH_Derive(SECItem *publicValue,
+                             ECParams *params,
+                             SECItem *privateValue,
+                             PRBool withCofactor,
+                             SECItem *derivedSecret);
+
+/* On input,  signature->len == size of buffer to hold signature.
+**            digest->len    == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+extern SECStatus ECDSA_SignDigest(ECPrivateKey *key,
+                                  SECItem *signature,
+                                  const SECItem *digest);
+
+/* On input,  signature->len == size of buffer to hold signature.
+**            digest->len    == size of digest.
+*/
+extern SECStatus ECDSA_VerifyDigest(ECPublicKey *key,
+                                    const SECItem *signature,
+                                    const SECItem *digest);
+
+/* Uses the provided seed. */
+extern SECStatus ECDSA_SignDigestWithSeed(ECPrivateKey *key,
+                                          SECItem *signature,
+                                          const SECItem *digest,
+                                          const unsigned char *seed,
+                                          const int seedlen);
+
+/******************************************/
+/*
+** RC4 symmetric stream cypher
+*/
+
+/*
+** Create a new RC4 context suitable for RC4 encryption/decryption.
+**  "key" raw key data
+**  "len" the number of bytes of key data
+*/
+extern RC4Context *RC4_CreateContext(const unsigned char *key, int len);
+
+extern RC4Context *RC4_AllocateContext(void);
+extern SECStatus RC4_InitContext(RC4Context *cx,
+                                 const unsigned char *key,
+                                 unsigned int keylen,
+                                 const unsigned char *,
+                                 int,
+                                 unsigned int,
+                                 unsigned int);
+
+/*
+** Destroy an RC4 encryption/decryption context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC4_DestroyContext(RC4Context *cx, PRBool freeit);
+
+/*
+** Perform RC4 encryption.
+**  "cx" the context
+**  "output" the output buffer to store the encrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus RC4_Encrypt(RC4Context *cx, unsigned char *output,
+                             unsigned int *outputLen, unsigned int maxOutputLen,
+                             const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC4 decryption.
+**  "cx" the context
+**  "output" the output buffer to store the decrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus RC4_Decrypt(RC4Context *cx, unsigned char *output,
+                             unsigned int *outputLen, unsigned int maxOutputLen,
+                             const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** RC2 symmetric block cypher
+*/
+
+/*
+** Create a new RC2 context suitable for RC2 encryption/decryption.
+**  "key" raw key data
+**  "len" the number of bytes of key data
+**  "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC)
+**  "mode" one of NSS_RC2 or NSS_RC2_CBC
+**  "effectiveKeyLen" is the effective key length (as specified in
+**      RFC 2268) in bytes (not bits).
+**
+** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block
+** chaining" mode.
+*/
+extern RC2Context *RC2_CreateContext(const unsigned char *key, unsigned int len,
+                                     const unsigned char *iv, int mode,
+                                     unsigned effectiveKeyLen);
+extern RC2Context *RC2_AllocateContext(void);
+extern SECStatus RC2_InitContext(RC2Context *cx,
+                                 const unsigned char *key,
+                                 unsigned int keylen,
+                                 const unsigned char *iv,
+                                 int mode,
+                                 unsigned int effectiveKeyLen,
+                                 unsigned int);
+
+/*
+** Destroy an RC2 encryption/decryption context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC2_DestroyContext(RC2Context *cx, PRBool freeit);
+
+/*
+** Perform RC2 encryption.
+**  "cx" the context
+**  "output" the output buffer to store the encrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus RC2_Encrypt(RC2Context *cx, unsigned char *output,
+                             unsigned int *outputLen, unsigned int maxOutputLen,
+                             const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC2 decryption.
+**  "cx" the context
+**  "output" the output buffer to store the decrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus RC2_Decrypt(RC2Context *cx, unsigned char *output,
+                             unsigned int *outputLen, unsigned int maxOutputLen,
+                             const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** RC5 symmetric block cypher -- 64-bit block size
+*/
+
+/*
+** Create a new RC5 context suitable for RC5 encryption/decryption.
+**      "key" raw key data
+**      "len" the number of bytes of key data
+**      "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC)
+**      "mode" one of NSS_RC5 or NSS_RC5_CBC
+**
+** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block
+** chaining" mode.
+*/
+extern RC5Context *RC5_CreateContext(const SECItem *key, unsigned int rounds,
+                                     unsigned int wordSize, const unsigned char *iv, int mode);
+extern RC5Context *RC5_AllocateContext(void);
+extern SECStatus RC5_InitContext(RC5Context *cx,
+                                 const unsigned char *key,
+                                 unsigned int keylen,
+                                 const unsigned char *iv,
+                                 int mode,
+                                 unsigned int rounds,
+                                 unsigned int wordSize);
+
+/*
+** Destroy an RC5 encryption/decryption context.
+**      "cx" the context
+**      "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC5_DestroyContext(RC5Context *cx, PRBool freeit);
+
+/*
+** Perform RC5 encryption.
+**      "cx" the context
+**      "output" the output buffer to store the encrypted data.
+**      "outputLen" how much data is stored in "output". Set by the routine
+**         after some data is stored in output.
+**      "maxOutputLen" the maximum amount of data that can ever be
+**         stored in "output"
+**      "input" the input data
+**      "inputLen" the amount of input data
+*/
+extern SECStatus RC5_Encrypt(RC5Context *cx, unsigned char *output,
+                             unsigned int *outputLen, unsigned int maxOutputLen,
+                             const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC5 decryption.
+**      "cx" the context
+**      "output" the output buffer to store the decrypted data.
+**      "outputLen" how much data is stored in "output". Set by the routine
+**         after some data is stored in output.
+**      "maxOutputLen" the maximum amount of data that can ever be
+**         stored in "output"
+**      "input" the input data
+**      "inputLen" the amount of input data
+*/
+
+extern SECStatus RC5_Decrypt(RC5Context *cx, unsigned char *output,
+                             unsigned int *outputLen, unsigned int maxOutputLen,
+                             const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** DES symmetric block cypher
+*/
+
+/*
+** Create a new DES context suitable for DES encryption/decryption.
+**  "key" raw key data
+**  "len" the number of bytes of key data
+**  "iv" is the CBC initialization vector (if mode is NSS_DES_CBC or
+**     mode is DES_EDE3_CBC)
+**  "mode" one of NSS_DES, NSS_DES_CBC, NSS_DES_EDE3 or NSS_DES_EDE3_CBC
+**  "encrypt" is PR_TRUE if the context will be used for encryption
+**
+** When mode is set to NSS_DES_CBC or NSS_DES_EDE3_CBC then the DES
+** cipher is run in "cipher block chaining" mode.
+*/
+extern DESContext *DES_CreateContext(const unsigned char *key,
+                                     const unsigned char *iv,
+                                     int mode, PRBool encrypt);
+extern DESContext *DES_AllocateContext(void);
+extern SECStatus DES_InitContext(DESContext *cx,
+                                 const unsigned char *key,
+                                 unsigned int keylen,
+                                 const unsigned char *iv,
+                                 int mode,
+                                 unsigned int encrypt,
+                                 unsigned int);
+
+/*
+** Destroy an DES encryption/decryption context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void DES_DestroyContext(DESContext *cx, PRBool freeit);
+
+/*
+** Perform DES encryption.
+**  "cx" the context
+**  "output" the output buffer to store the encrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+**
+** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH
+*/
+extern SECStatus DES_Encrypt(DESContext *cx, unsigned char *output,
+                             unsigned int *outputLen, unsigned int maxOutputLen,
+                             const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform DES decryption.
+**  "cx" the context
+**  "output" the output buffer to store the decrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+**
+** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH
+*/
+extern SECStatus DES_Decrypt(DESContext *cx, unsigned char *output,
+                             unsigned int *outputLen, unsigned int maxOutputLen,
+                             const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** SEED symmetric block cypher
+*/
+extern SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+                   int mode, PRBool encrypt);
+extern SEEDContext *SEED_AllocateContext(void);
+extern SECStatus SEED_InitContext(SEEDContext *cx,
+                                  const unsigned char *key,
+                                  unsigned int keylen,
+                                  const unsigned char *iv,
+                                  int mode, unsigned int encrypt,
+                                  unsigned int);
+extern void SEED_DestroyContext(SEEDContext *cx, PRBool freeit);
+extern SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *output,
+             unsigned int *outputLen, unsigned int maxOutputLen,
+             const unsigned char *input, unsigned int inputLen);
+extern SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *output,
+             unsigned int *outputLen, unsigned int maxOutputLen,
+             const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** AES symmetric block cypher (Rijndael)
+*/
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+**  "key" raw key data
+**  "keylen" the number of bytes of key data (16, 24, or 32)
+**      "blocklen" is the blocksize to use (16, 24, or 32)
+**                        XXX currently only blocksize==16 has been tested!
+*/
+extern AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+                  int mode, int encrypt,
+                  unsigned int keylen, unsigned int blocklen);
+extern AESContext *AES_AllocateContext(void);
+extern SECStatus AES_InitContext(AESContext *cx,
+                                 const unsigned char *key,
+                                 unsigned int keylen,
+                                 const unsigned char *iv,
+                                 int mode,
+                                 unsigned int encrypt,
+                                 unsigned int blocklen);
+
+/*
+** Destroy a AES encryption/decryption context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AES_DestroyContext(AESContext *cx, PRBool freeit);
+
+/*
+** Perform AES encryption.
+**  "cx" the context
+**  "output" the output buffer to store the encrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform AES decryption.
+**  "cx" the context
+**  "output" the output buffer to store the decrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** AES key wrap algorithm, RFC 3394
+*/
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+**  "key" raw key data
+**      "iv"  The 8 byte "initial value"
+**      "encrypt", a boolean, true for key wrapping, false for unwrapping.
+**  "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+                         int encrypt, unsigned int keylen);
+extern AESKeyWrapContext *AESKeyWrap_AllocateContext(void);
+extern SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx,
+                       const unsigned char *key,
+                       unsigned int keylen,
+                       const unsigned char *iv,
+                       int,
+                       unsigned int encrypt,
+                       unsigned int);
+
+/*
+** Destroy a AES KeyWrap context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit);
+
+/*
+** Perform AES key wrap.
+**  "cx" the context
+**  "output" the output buffer to store the encrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+                   unsigned int *outputLen, unsigned int maxOutputLen,
+                   const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform AES key unwrap.
+**  "cx" the context
+**  "output" the output buffer to store the decrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+                   unsigned int *outputLen, unsigned int maxOutputLen,
+                   const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** Camellia symmetric block cypher
+*/
+
+/*
+** Create a new Camellia context suitable for Camellia encryption/decryption.
+**  "key" raw key data
+**  "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+                       int mode, int encrypt, unsigned int keylen);
+
+extern CamelliaContext *Camellia_AllocateContext(void);
+extern SECStatus Camellia_InitContext(CamelliaContext *cx,
+                                      const unsigned char *key,
+                                      unsigned int keylen,
+                                      const unsigned char *iv,
+                                      int mode,
+                                      unsigned int encrypt,
+                                      unsigned int unused);
+/*
+** Destroy a Camellia encryption/decryption context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit);
+
+/*
+** Perform Camellia encryption.
+**  "cx" the context
+**  "output" the output buffer to store the encrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+                 unsigned int *outputLen, unsigned int maxOutputLen,
+                 const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform Camellia decryption.
+**  "cx" the context
+**  "output" the output buffer to store the decrypted data.
+**  "outputLen" how much data is stored in "output". Set by the routine
+**     after some data is stored in output.
+**  "maxOutputLen" the maximum amount of data that can ever be
+**     stored in "output"
+**  "input" the input data
+**  "inputLen" the amount of input data
+*/
+extern SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+                 unsigned int *outputLen, unsigned int maxOutputLen,
+                 const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** ChaCha20+Poly1305 AEAD
+*/
+
+extern SECStatus ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+                                              const unsigned char *key,
+                                              unsigned int keyLen,
+                                              unsigned int tagLen);
+
+extern ChaCha20Poly1305Context *ChaCha20Poly1305_CreateContext(
+    const unsigned char *key, unsigned int keyLen, unsigned int tagLen);
+
+extern void ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx,
+                                            PRBool freeit);
+
+extern SECStatus ChaCha20Poly1305_Seal(
+    const ChaCha20Poly1305Context *ctx, unsigned char *output,
+    unsigned int *outputLen, unsigned int maxOutputLen,
+    const unsigned char *input, unsigned int inputLen,
+    const unsigned char *nonce, unsigned int nonceLen,
+    const unsigned char *ad, unsigned int adLen);
+
+extern SECStatus ChaCha20Poly1305_Open(
+    const ChaCha20Poly1305Context *ctx, unsigned char *output,
+    unsigned int *outputLen, unsigned int maxOutputLen,
+    const unsigned char *input, unsigned int inputLen,
+    const unsigned char *nonce, unsigned int nonceLen,
+    const unsigned char *ad, unsigned int adLen);
+
+/******************************************/
+/*
+** MD5 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using MD5
+*/
+extern SECStatus MD5_Hash(unsigned char *dest, const char *src);
+
+/*
+** Hash a non-null terminated string "src" into "dest" using MD5
+*/
+extern SECStatus MD5_HashBuf(unsigned char *dest, const unsigned char *src,
+                             PRUint32 src_length);
+
+/*
+** Create a new MD5 context
+*/
+extern MD5Context *MD5_NewContext(void);
+
+/*
+** Destroy an MD5 secure hash context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void MD5_DestroyContext(MD5Context *cx, PRBool freeit);
+
+/*
+** Reset an MD5 context, preparing it for a fresh round of hashing
+*/
+extern void MD5_Begin(MD5Context *cx);
+
+/*
+** Update the MD5 hash function with more data.
+**  "cx" the context
+**  "input" the data to hash
+**  "inputLen" the amount of data to hash
+*/
+extern void MD5_Update(MD5Context *cx,
+                       const unsigned char *input, unsigned int inputLen);
+
+/*
+** Finish the MD5 hash function. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 16 bytes of digest data are stored
+**  "digestLen" where the digest length (16) is stored
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void MD5_End(MD5Context *cx, unsigned char *digest,
+                    unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** Export the current state of the MD5 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 16 bytes of digest data are stored
+**  "digestLen" where the digest length (16) is stored (optional)
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void MD5_EndRaw(MD5Context *cx, unsigned char *digest,
+                       unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+ * Return the the size of a buffer needed to flatten the MD5 Context into
+ *    "cx" the context
+ *  returns size;
+ */
+extern unsigned int MD5_FlattenSize(MD5Context *cx);
+
+/*
+ * Flatten the MD5 Context into a buffer:
+ *    "cx" the context
+ *    "space" the buffer to flatten to
+ *  returns status;
+ */
+extern SECStatus MD5_Flatten(MD5Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a MD5 Context
+ *    "space" the buffer of the flattend buffer
+ *    "arg" ptr to void used by cryptographic resurrect
+ *  returns resurected context;
+ */
+extern MD5Context *MD5_Resurrect(unsigned char *space, void *arg);
+extern void MD5_Clone(MD5Context *dest, MD5Context *src);
+
+/*
+** trace the intermediate state info of the MD5 hash.
+*/
+extern void MD5_TraceState(MD5Context *cx);
+
+/******************************************/
+/*
+** MD2 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using MD2
+*/
+extern SECStatus MD2_Hash(unsigned char *dest, const char *src);
+
+/*
+** Create a new MD2 context
+*/
+extern MD2Context *MD2_NewContext(void);
+
+/*
+** Destroy an MD2 secure hash context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void MD2_DestroyContext(MD2Context *cx, PRBool freeit);
+
+/*
+** Reset an MD2 context, preparing it for a fresh round of hashing
+*/
+extern void MD2_Begin(MD2Context *cx);
+
+/*
+** Update the MD2 hash function with more data.
+**  "cx" the context
+**  "input" the data to hash
+**  "inputLen" the amount of data to hash
+*/
+extern void MD2_Update(MD2Context *cx,
+                       const unsigned char *input, unsigned int inputLen);
+
+/*
+** Finish the MD2 hash function. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 16 bytes of digest data are stored
+**  "digestLen" where the digest length (16) is stored
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void MD2_End(MD2Context *cx, unsigned char *digest,
+                    unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+ * Return the the size of a buffer needed to flatten the MD2 Context into
+ *    "cx" the context
+ *  returns size;
+ */
+extern unsigned int MD2_FlattenSize(MD2Context *cx);
+
+/*
+ * Flatten the MD2 Context into a buffer:
+ *    "cx" the context
+ *    "space" the buffer to flatten to
+ *  returns status;
+ */
+extern SECStatus MD2_Flatten(MD2Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a MD2 Context
+ *    "space" the buffer of the flattend buffer
+ *    "arg" ptr to void used by cryptographic resurrect
+ *  returns resurected context;
+ */
+extern MD2Context *MD2_Resurrect(unsigned char *space, void *arg);
+extern void MD2_Clone(MD2Context *dest, MD2Context *src);
+
+/******************************************/
+/*
+** SHA-1 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using SHA-1
+*/
+extern SECStatus SHA1_Hash(unsigned char *dest, const char *src);
+
+/*
+** Hash a non-null terminated string "src" into "dest" using SHA-1
+*/
+extern SECStatus SHA1_HashBuf(unsigned char *dest, const unsigned char *src,
+                              PRUint32 src_length);
+
+/*
+** Create a new SHA-1 context
+*/
+extern SHA1Context *SHA1_NewContext(void);
+
+/*
+** Destroy a SHA-1 secure hash context.
+**  "cx" the context
+**  "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void SHA1_DestroyContext(SHA1Context *cx, PRBool freeit);
+
+/*
+** Reset a SHA-1 context, preparing it for a fresh round of hashing
+*/
+extern void SHA1_Begin(SHA1Context *cx);
+
+/*
+** Update the SHA-1 hash function with more data.
+**  "cx" the context
+**  "input" the data to hash
+**  "inputLen" the amount of data to hash
+*/
+extern void SHA1_Update(SHA1Context *cx, const unsigned char *input,
+                        unsigned int inputLen);
+
+/*
+** Finish the SHA-1 hash function. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 16 bytes of digest data are stored
+**  "digestLen" where the digest length (20) is stored
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void SHA1_End(SHA1Context *cx, unsigned char *digest,
+                     unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** Export the current state of the SHA-1 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 20 bytes of digest data are stored
+**  "digestLen" where the digest length (20) is stored (optional)
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void SHA1_EndRaw(SHA1Context *cx, unsigned char *digest,
+                        unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** trace the intermediate state info of the SHA1 hash.
+*/
+extern void SHA1_TraceState(SHA1Context *cx);
+
+/*
+ * Return the the size of a buffer needed to flatten the SHA-1 Context into
+ *    "cx" the context
+ *  returns size;
+ */
+extern unsigned int SHA1_FlattenSize(SHA1Context *cx);
+
+/*
+ * Flatten the SHA-1 Context into a buffer:
+ *    "cx" the context
+ *    "space" the buffer to flatten to
+ *  returns status;
+ */
+extern SECStatus SHA1_Flatten(SHA1Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a SHA-1 Context
+ *    "space" the buffer of the flattend buffer
+ *    "arg" ptr to void used by cryptographic resurrect
+ *  returns resurected context;
+ */
+extern SHA1Context *SHA1_Resurrect(unsigned char *space, void *arg);
+extern void SHA1_Clone(SHA1Context *dest, SHA1Context *src);
+
+/******************************************/
+
+extern SHA224Context *SHA224_NewContext(void);
+extern void SHA224_DestroyContext(SHA224Context *cx, PRBool freeit);
+extern void SHA224_Begin(SHA224Context *cx);
+extern void SHA224_Update(SHA224Context *cx, const unsigned char *input,
+                          unsigned int inputLen);
+extern void SHA224_End(SHA224Context *cx, unsigned char *digest,
+                       unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-224 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 28 bytes of digest data are stored
+**  "digestLen" where the digest length (28) is stored (optional)
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void SHA224_EndRaw(SHA224Context *cx, unsigned char *digest,
+                          unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA224_HashBuf(unsigned char *dest, const unsigned char *src,
+                                PRUint32 src_length);
+extern SECStatus SHA224_Hash(unsigned char *dest, const char *src);
+extern void SHA224_TraceState(SHA224Context *cx);
+extern unsigned int SHA224_FlattenSize(SHA224Context *cx);
+extern SECStatus SHA224_Flatten(SHA224Context *cx, unsigned char *space);
+extern SHA224Context *SHA224_Resurrect(unsigned char *space, void *arg);
+extern void SHA224_Clone(SHA224Context *dest, SHA224Context *src);
+
+/******************************************/
+
+extern SHA256Context *SHA256_NewContext(void);
+extern void SHA256_DestroyContext(SHA256Context *cx, PRBool freeit);
+extern void SHA256_Begin(SHA256Context *cx);
+extern void SHA256_Update(SHA256Context *cx, const unsigned char *input,
+                          unsigned int inputLen);
+extern void SHA256_End(SHA256Context *cx, unsigned char *digest,
+                       unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-256 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 32 bytes of digest data are stored
+**  "digestLen" where the digest length (32) is stored (optional)
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void SHA256_EndRaw(SHA256Context *cx, unsigned char *digest,
+                          unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA256_HashBuf(unsigned char *dest, const unsigned char *src,
+                                PRUint32 src_length);
+extern SECStatus SHA256_Hash(unsigned char *dest, const char *src);
+extern void SHA256_TraceState(SHA256Context *cx);
+extern unsigned int SHA256_FlattenSize(SHA256Context *cx);
+extern SECStatus SHA256_Flatten(SHA256Context *cx, unsigned char *space);
+extern SHA256Context *SHA256_Resurrect(unsigned char *space, void *arg);
+extern void SHA256_Clone(SHA256Context *dest, SHA256Context *src);
+
+/******************************************/
+
+extern SHA512Context *SHA512_NewContext(void);
+extern void SHA512_DestroyContext(SHA512Context *cx, PRBool freeit);
+extern void SHA512_Begin(SHA512Context *cx);
+extern void SHA512_Update(SHA512Context *cx, const unsigned char *input,
+                          unsigned int inputLen);
+/*
+** Export the current state of the SHA-512 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 64 bytes of digest data are stored
+**  "digestLen" where the digest length (64) is stored (optional)
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void SHA512_EndRaw(SHA512Context *cx, unsigned char *digest,
+                          unsigned int *digestLen, unsigned int maxDigestLen);
+extern void SHA512_End(SHA512Context *cx, unsigned char *digest,
+                       unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA512_HashBuf(unsigned char *dest, const unsigned char *src,
+                                PRUint32 src_length);
+extern SECStatus SHA512_Hash(unsigned char *dest, const char *src);
+extern void SHA512_TraceState(SHA512Context *cx);
+extern unsigned int SHA512_FlattenSize(SHA512Context *cx);
+extern SECStatus SHA512_Flatten(SHA512Context *cx, unsigned char *space);
+extern SHA512Context *SHA512_Resurrect(unsigned char *space, void *arg);
+extern void SHA512_Clone(SHA512Context *dest, SHA512Context *src);
+
+/******************************************/
+
+extern SHA384Context *SHA384_NewContext(void);
+extern void SHA384_DestroyContext(SHA384Context *cx, PRBool freeit);
+extern void SHA384_Begin(SHA384Context *cx);
+extern void SHA384_Update(SHA384Context *cx, const unsigned char *input,
+                          unsigned int inputLen);
+extern void SHA384_End(SHA384Context *cx, unsigned char *digest,
+                       unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-384 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+**  "cx" the context
+**  "digest" where the 48 bytes of digest data are stored
+**  "digestLen" where the digest length (48) is stored (optional)
+**  "maxDigestLen" the maximum amount of data that can ever be
+**     stored in "digest"
+*/
+extern void SHA384_EndRaw(SHA384Context *cx, unsigned char *digest,
+                          unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA384_HashBuf(unsigned char *dest, const unsigned char *src,
+                                PRUint32 src_length);
+extern SECStatus SHA384_Hash(unsigned char *dest, const char *src);
+extern void SHA384_TraceState(SHA384Context *cx);
+extern unsigned int SHA384_FlattenSize(SHA384Context *cx);
+extern SECStatus SHA384_Flatten(SHA384Context *cx, unsigned char *space);
+extern SHA384Context *SHA384_Resurrect(unsigned char *space, void *arg);
+extern void SHA384_Clone(SHA384Context *dest, SHA384Context *src);
+
+/****************************************
+ * implement TLS 1.0 Pseudo Random Function (PRF) and TLS P_hash function
+ */
+
+extern SECStatus
+TLS_PRF(const SECItem *secret, const char *label, SECItem *seed,
+        SECItem *result, PRBool isFIPS);
+
+extern SECStatus
+TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label,
+           SECItem *seed, SECItem *result, PRBool isFIPS);
+
+/******************************************/
+/*
+** Pseudo Random Number Generation.  FIPS compliance desirable.
+*/
+
+/*
+** Initialize the global RNG context and give it some seed input taken
+** from the system.  This function is thread-safe and will only allow
+** the global context to be initialized once.  The seed input is likely
+** small, so it is imperative that RNG_RandomUpdate() be called with
+** additional seed data before the generator is used.  A good way to
+** provide the generator with additional entropy is to call
+** RNG_SystemInfoForRNG().  Note that NSS_Init() does exactly that.
+*/
+extern SECStatus RNG_RNGInit(void);
+
+/*
+** Update the global random number generator with more seeding
+** material
+*/
+extern SECStatus RNG_RandomUpdate(const void *data, size_t bytes);
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+extern SECStatus RNG_GenerateGlobalRandomBytes(void *dest, size_t len);
+
+extern SECStatus RNG_ResetForFuzzing(void);
+
+/* Destroy the global RNG context.  After a call to RNG_RNGShutdown()
+** a call to RNG_RNGInit() is required in order to use the generator again,
+** along with seed data (see the comment above RNG_RNGInit()).
+*/
+extern void RNG_RNGShutdown(void);
+
+extern void RNG_SystemInfoForRNG(void);
+
+/*
+ * FIPS 186-2 Change Notice 1 RNG Algorithm 1, used both to
+ * generate the DSA X parameter and as a generic purpose RNG.
+ *
+ * The following two FIPS186Change functions are needed for
+ * NIST RNG Validation System.
+ */
+
+/*
+ * FIPS186Change_GenerateX is now deprecated. It will return SECFailure with
+ * the error set to PR_NOT_IMPLEMENTED_ERROR.
+ */
+extern SECStatus
+FIPS186Change_GenerateX(unsigned char *XKEY,
+                        const unsigned char *XSEEDj,
+                        unsigned char *x_j);
+
+/*
+ * When generating the DSA X parameter, we generate 2*GSIZE bytes
+ * of random output and reduce it mod q.
+ *
+ * Input: w, 2*GSIZE bytes
+ *        q, DSA_SUBPRIME_LEN bytes
+ * Output: xj, DSA_SUBPRIME_LEN bytes
+ */
+extern SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+                               const unsigned char *q,
+                               unsigned char *xj);
+
+/* To allow NIST KAT tests */
+extern SECStatus
+PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len,
+                         const PRUint8 *nonce, unsigned int nonce_len,
+                         const PRUint8 *personal_string, unsigned int ps_len);
+
+/*
+ * The following functions are for FIPS poweron self test and FIPS algorithm
+ * testing.
+ */
+extern SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+                     const PRUint8 *nonce, unsigned int nonce_len,
+                     const PRUint8 *personal_string, unsigned int ps_len);
+
+extern SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+                const PRUint8 *additional, unsigned int additional_len);
+
+extern SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+                  const PRUint8 *additional, unsigned int additional_len);
+
+extern SECStatus
+PRNGTEST_Uninstantiate(void);
+
+extern SECStatus
+PRNGTEST_RunHealthTests(void);
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of seed and length of h both equal length of P.
+ * All lengths are specified by "j", according to the table above.
+ *
+ * The verify parameters will conform to FIPS186-1.
+ */
+extern SECStatus
+PQG_ParamGen(unsigned int j,      /* input : determines length of P. */
+             PQGParams **pParams, /* output: P Q and G returned here */
+             PQGVerify **pVfy);   /* output: counter and seed. */
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of P specified by j.  Length of h will match length of P.
+ * Length of SEED in bytes specified in seedBytes.
+ * seedBbytes must be in the range [20..255] or an error will result.
+ *
+ * The verify parameters will conform to FIPS186-1.
+ */
+extern SECStatus
+PQG_ParamGenSeedLen(
+    unsigned int j,         /* input : determines length of P. */
+    unsigned int seedBytes, /* input : length of seed in bytes.*/
+    PQGParams **pParams,    /* output: P Q and G returned here */
+    PQGVerify **pVfy);      /* output: counter and seed. */
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of P specified by L in bits.
+ * Length of Q specified by N in bits.
+ * Length of SEED in bytes specified in seedBytes.
+ * seedBbytes must be in the range [N..L*2] or an error will result.
+ *
+ * Not that J uses the above table, L is the length exact. L and N must
+ * match the table below or an error will result:
+ *
+ *  L            N
+ * 1024         160
+ * 2048         224
+ * 2048         256
+ * 3072         256
+ *
+ * If N or seedBytes are set to zero, then PQG_ParamGenSeedLen will
+ * pick a default value (typically the smallest secure value for these
+ * variables).
+ *
+ * The verify parameters will conform to FIPS186-3 using the smallest
+ * permissible hash for the key strength.
+ */
+extern SECStatus
+PQG_ParamGenV2(
+    unsigned int L,         /* input : determines length of P. */
+    unsigned int N,         /* input : determines length of Q. */
+    unsigned int seedBytes, /* input : length of seed in bytes.*/
+    PQGParams **pParams,    /* output: P Q and G returned here */
+    PQGVerify **pVfy);      /* output: counter and seed. */
+
+/*  Test PQGParams for validity as DSS PQG values.
+ *  If vfy is non-NULL, test PQGParams to make sure they were generated
+ *       using the specified seed, counter, and h values.
+ *
+ *  Return value indicates whether Verification operation ran successfully
+ *  to completion, but does not indicate if PQGParams are valid or not.
+ *  If return value is SECSuccess, then *pResult has these meanings:
+ *       SECSuccess: PQGParams are valid.
+ *       SECFailure: PQGParams are invalid.
+ *
+ * Verify the PQG againts the counter, SEED and h.
+ * These tests are specified in FIPS 186-3 Appendix A.1.1.1, A.1.1.3, and A.2.2
+ * PQG_VerifyParams will automatically choose the appropriate test.
+ */
+
+extern SECStatus PQG_VerifyParams(const PQGParams *params,
+                                  const PQGVerify *vfy, SECStatus *result);
+
+extern void PQG_DestroyParams(PQGParams *params);
+
+extern void PQG_DestroyVerify(PQGVerify *vfy);
+
+/*
+ * clean-up any global tables freebl may have allocated after it starts up.
+ * This function is not thread safe and should be called only after the
+ * library has been quiessed.
+ */
+extern void BL_Cleanup(void);
+
+/* unload freebl shared library from memory */
+extern void BL_Unload(void);
+
+/**************************************************************************
+ *  Verify a given Shared library signature                               *
+ **************************************************************************/
+PRBool BLAPI_SHVerify(const char *name, PRFuncPtr addr);
+
+/**************************************************************************
+ *  Verify a given filename's signature                               *
+ **************************************************************************/
+PRBool BLAPI_SHVerifyFile(const char *shName);
+
+/**************************************************************************
+ *  Verify Are Own Shared library signature                               *
+ **************************************************************************/
+PRBool BLAPI_VerifySelf(const char *name);
+
+/*********************************************************************/
+extern const SECHashObject *HASH_GetRawHashObject(HASH_HashType hashType);
+
+extern void BL_SetForkState(PRBool forked);
+
+/*
+** pepare an ECParam structure from DEREncoded params
+ */
+extern SECStatus EC_FillParams(PLArenaPool *arena,
+                               const SECItem *encodedParams, ECParams *params);
+extern SECStatus EC_DecodeParams(const SECItem *encodedParams,
+                                 ECParams **ecparams);
+extern SECStatus EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+                               const ECParams *srcParams);
+
+/*
+ * use the internal table to get the size in bytes of a single EC point
+ */
+extern int EC_GetPointSize(const ECParams *params);
+
+SEC_END_PROTOS
+
+#endif /* _BLAPI_H_ */
diff --git a/security/nss/lib/freebl/blapii.h b/security/nss/lib/freebl/blapii.h
new file mode 100644
index 000000000..6ad2e2892
--- /dev/null
+++ b/security/nss/lib/freebl/blapii.h
@@ -0,0 +1,61 @@
+/*
+ * blapii.h - private data structures and prototypes for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPII_H_
+#define _BLAPII_H_
+
+#include "blapit.h"
+
+/* max block size of supported block ciphers */
+#define MAX_BLOCK_SIZE 16
+
+typedef SECStatus (*freeblCipherFunc)(void *cx, unsigned char *output,
+                                      unsigned int *outputLen, unsigned int maxOutputLen,
+                                      const unsigned char *input, unsigned int inputLen,
+                                      unsigned int blocksize);
+typedef void (*freeblDestroyFunc)(void *cx, PRBool freeit);
+
+SEC_BEGIN_PROTOS
+
+SECStatus BL_FIPSEntryOK(PRBool freeblOnly);
+PRBool BL_POSTRan(PRBool freeblOnly);
+
+#if defined(XP_UNIX) && !defined(NO_FORK_CHECK)
+
+extern PRBool bl_parentForkedAfterC_Initialize;
+
+#define SKIP_AFTER_FORK(x)                 \
+    if (!bl_parentForkedAfterC_Initialize) \
+    x
+
+#else
+
+#define SKIP_AFTER_FORK(x) x
+
+#endif
+
+SEC_END_PROTOS
+
+#if defined(NSS_X86_OR_X64)
+#define HAVE_UNALIGNED_ACCESS 1
+#endif
+
+#if defined(__clang__)
+#define HAVE_NO_SANITIZE_ATTR __has_attribute(no_sanitize)
+#else
+#define HAVE_NO_SANITIZE_ATTR 0
+#endif
+
+#if defined(HAVE_UNALIGNED_ACCESS) && HAVE_NO_SANITIZE_ATTR
+#define NO_SANITIZE_ALIGNMENT __attribute__((no_sanitize("alignment")))
+#else
+#define NO_SANITIZE_ALIGNMENT
+#endif
+
+#undef HAVE_NO_SANITIZE_ATTR
+
+#endif /* _BLAPII_H_ */
diff --git a/security/nss/lib/freebl/blapit.h b/security/nss/lib/freebl/blapit.h
new file mode 100644
index 000000000..2a17b5f46
--- /dev/null
+++ b/security/nss/lib/freebl/blapit.h
@@ -0,0 +1,414 @@
+/*
+ * blapit.h - public data structures for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPIT_H_
+#define _BLAPIT_H_
+
+#include "seccomon.h"
+#include "prlink.h"
+#include "plarena.h"
+#include "ecl-exp.h"
+
+/* RC2 operation modes */
+#define NSS_RC2 0
+#define NSS_RC2_CBC 1
+
+/* RC5 operation modes */
+#define NSS_RC5 0
+#define NSS_RC5_CBC 1
+
+/* DES operation modes */
+#define NSS_DES 0
+#define NSS_DES_CBC 1
+#define NSS_DES_EDE3 2
+#define NSS_DES_EDE3_CBC 3
+
+#define DES_KEY_LENGTH 8 /* Bytes */
+
+/* AES operation modes */
+#define NSS_AES 0
+#define NSS_AES_CBC 1
+#define NSS_AES_CTS 2
+#define NSS_AES_CTR 3
+#define NSS_AES_GCM 4
+
+/* Camellia operation modes */
+#define NSS_CAMELLIA 0
+#define NSS_CAMELLIA_CBC 1
+
+/* SEED operation modes */
+#define NSS_SEED 0
+#define NSS_SEED_CBC 1
+
+#define DSA1_SUBPRIME_LEN 20                             /* Bytes */
+#define DSA1_SIGNATURE_LEN (DSA1_SUBPRIME_LEN * 2)       /* Bytes */
+#define DSA_MAX_SUBPRIME_LEN 32                          /* Bytes */
+#define DSA_MAX_SIGNATURE_LEN (DSA_MAX_SUBPRIME_LEN * 2) /* Bytes */
+
+/*
+ * Mark the old defines as deprecated. This will warn code that expected
+ * DSA1 only that they need to change if the are to support DSA2.
+ */
+#if defined(__GNUC__) && (__GNUC__ > 3)
+/* make GCC warn when we use these #defines */
+typedef int __BLAPI_DEPRECATED __attribute__((deprecated));
+#define DSA_SUBPRIME_LEN ((__BLAPI_DEPRECATED)DSA1_SUBPRIME_LEN)
+#define DSA_SIGNATURE_LEN ((__BLAPI_DEPRECATED)DSA1_SIGNATURE_LEN)
+#define DSA_Q_BITS ((__BLAPI_DEPRECATED)(DSA1_SUBPRIME_LEN * 8))
+#else
+#ifdef _WIN32
+/* This magic gets the windows compiler to give us a deprecation
+ * warning */
+#pragma deprecated(DSA_SUBPRIME_LEN, DSA_SIGNATURE_LEN, DSA_QBITS)
+#endif
+#define DSA_SUBPRIME_LEN DSA1_SUBPRIME_LEN
+#define DSA_SIGNATURE_LEN DSA1_SIGNATURE_LEN
+#define DSA_Q_BITS (DSA1_SUBPRIME_LEN * 8)
+#endif
+
+/* XXX We shouldn't have to hard code this limit. For
+ * now, this is the quickest way to support ECDSA signature
+ * processing (ECDSA signature lengths depend on curve
+ * size). This limit is sufficient for curves upto
+ * 576 bits.
+ */
+#define MAX_ECKEY_LEN 72 /* Bytes */
+
+#define EC_MAX_KEY_BITS 521 /* in bits */
+#define EC_MIN_KEY_BITS 256 /* in bits */
+
+/* EC point compression format */
+#define EC_POINT_FORM_COMPRESSED_Y0 0x02
+#define EC_POINT_FORM_COMPRESSED_Y1 0x03
+#define EC_POINT_FORM_UNCOMPRESSED 0x04
+#define EC_POINT_FORM_HYBRID_Y0 0x06
+#define EC_POINT_FORM_HYBRID_Y1 0x07
+
+/*
+ * Number of bytes each hash algorithm produces
+ */
+#define MD2_LENGTH 16    /* Bytes */
+#define MD5_LENGTH 16    /* Bytes */
+#define SHA1_LENGTH 20   /* Bytes */
+#define SHA256_LENGTH 32 /* bytes */
+#define SHA384_LENGTH 48 /* bytes */
+#define SHA512_LENGTH 64 /* bytes */
+#define HASH_LENGTH_MAX SHA512_LENGTH
+
+/*
+ * Input block size for each hash algorithm.
+ */
+
+#define MD2_BLOCK_LENGTH 64     /* bytes */
+#define MD5_BLOCK_LENGTH 64     /* bytes */
+#define SHA1_BLOCK_LENGTH 64    /* bytes */
+#define SHA224_BLOCK_LENGTH 64  /* bytes */
+#define SHA256_BLOCK_LENGTH 64  /* bytes */
+#define SHA384_BLOCK_LENGTH 128 /* bytes */
+#define SHA512_BLOCK_LENGTH 128 /* bytes */
+#define HASH_BLOCK_LENGTH_MAX SHA512_BLOCK_LENGTH
+
+#define AES_KEY_WRAP_IV_BYTES 8
+#define AES_KEY_WRAP_BLOCK_SIZE 8 /* bytes */
+#define AES_BLOCK_SIZE 16         /* bytes */
+
+#define AES_128_KEY_LENGTH 16 /* bytes */
+#define AES_192_KEY_LENGTH 24 /* bytes */
+#define AES_256_KEY_LENGTH 32 /* bytes */
+
+#define CAMELLIA_BLOCK_SIZE 16 /* bytes */
+
+#define SEED_BLOCK_SIZE 16 /* bytes */
+#define SEED_KEY_LENGTH 16 /* bytes */
+
+#define NSS_FREEBL_DEFAULT_CHUNKSIZE 2048
+
+/*
+ * These values come from the initial key size limits from the PKCS #11
+ * module. They may be arbitrarily adjusted to any value freebl supports.
+ */
+#define RSA_MIN_MODULUS_BITS 128
+#define RSA_MAX_MODULUS_BITS 16384
+#define RSA_MAX_EXPONENT_BITS 64
+#define DH_MIN_P_BITS 128
+#define DH_MAX_P_BITS 16384
+
+/*
+ * The FIPS 186-1 algorithm for generating primes P and Q allows only 9
+ * distinct values for the length of P, and only one value for the
+ * length of Q.
+ * The algorithm uses a variable j to indicate which of the 9 lengths
+ * of P is to be used.
+ * The following table relates j to the lengths of P and Q in bits.
+ *
+ *  j   bits in P   bits in Q
+ *  _   _________   _________
+ *  0    512        160
+ *  1    576        160
+ *  2    640        160
+ *  3    704        160
+ *  4    768        160
+ *  5    832        160
+ *  6    896        160
+ *  7    960        160
+ *  8   1024        160
+ *
+ * The FIPS-186-1 compliant PQG generator takes j as an input parameter.
+ *
+ * FIPS 186-3 algorithm specifies 4 distinct P and Q sizes:
+ *
+ *     bits in P       bits in Q
+ *     _________       _________
+ *      1024           160
+ *      2048           224
+ *      2048           256
+ *      3072           256
+ *
+ * The FIPS-186-3 complaiant PQG generator (PQG V2) takes arbitrary p and q
+ * lengths as input and returns an error if they aren't in this list.
+ */
+
+#define DSA1_Q_BITS 160
+#define DSA_MAX_P_BITS 3072
+#define DSA_MIN_P_BITS 512
+#define DSA_MAX_Q_BITS 256
+#define DSA_MIN_Q_BITS 160
+
+#if DSA_MAX_Q_BITS != DSA_MAX_SUBPRIME_LEN * 8
+#error "Inconsistent declaration of DSA SUBPRIME/Q parameters in blapit.h"
+#endif
+
+/*
+ * function takes desired number of bits in P,
+ * returns index (0..8) or -1 if number of bits is invalid.
+ */
+#define PQG_PBITS_TO_INDEX(bits) \
+    (((bits) < 512 || (bits) > 1024 || (bits) % 64) ? -1 : (int)((bits)-512) / 64)
+
+/*
+ * function takes index (0-8)
+ * returns number of bits in P for that index, or -1 if index is invalid.
+ */
+#define PQG_INDEX_TO_PBITS(j) (((unsigned)(j) > 8) ? -1 : (512 + 64 * (j)))
+
+/***************************************************************************
+** Opaque objects
+*/
+
+struct DESContextStr;
+struct RC2ContextStr;
+struct RC4ContextStr;
+struct RC5ContextStr;
+struct AESContextStr;
+struct CamelliaContextStr;
+struct MD2ContextStr;
+struct MD5ContextStr;
+struct SHA1ContextStr;
+struct SHA256ContextStr;
+struct SHA512ContextStr;
+struct AESKeyWrapContextStr;
+struct SEEDContextStr;
+struct ChaCha20Poly1305ContextStr;
+
+typedef struct DESContextStr DESContext;
+typedef struct RC2ContextStr RC2Context;
+typedef struct RC4ContextStr RC4Context;
+typedef struct RC5ContextStr RC5Context;
+typedef struct AESContextStr AESContext;
+typedef struct CamelliaContextStr CamelliaContext;
+typedef struct MD2ContextStr MD2Context;
+typedef struct MD5ContextStr MD5Context;
+typedef struct SHA1ContextStr SHA1Context;
+typedef struct SHA256ContextStr SHA256Context;
+/* SHA224Context is really a SHA256ContextStr.  This is not a mistake. */
+typedef struct SHA256ContextStr SHA224Context;
+typedef struct SHA512ContextStr SHA512Context;
+/* SHA384Context is really a SHA512ContextStr.  This is not a mistake. */
+typedef struct SHA512ContextStr SHA384Context;
+typedef struct AESKeyWrapContextStr AESKeyWrapContext;
+typedef struct SEEDContextStr SEEDContext;
+typedef struct ChaCha20Poly1305ContextStr ChaCha20Poly1305Context;
+
+/***************************************************************************
+** RSA Public and Private Key structures
+*/
+
+/* member names from PKCS#1, section 7.1 */
+struct RSAPublicKeyStr {
+    PLArenaPool *arena;
+    SECItem modulus;
+    SECItem publicExponent;
+};
+typedef struct RSAPublicKeyStr RSAPublicKey;
+
+/* member names from PKCS#1, section 7.2 */
+struct RSAPrivateKeyStr {
+    PLArenaPool *arena;
+    SECItem version;
+    SECItem modulus;
+    SECItem publicExponent;
+    SECItem privateExponent;
+    SECItem prime1;
+    SECItem prime2;
+    SECItem exponent1;
+    SECItem exponent2;
+    SECItem coefficient;
+};
+typedef struct RSAPrivateKeyStr RSAPrivateKey;
+
+/***************************************************************************
+** DSA Public and Private Key and related structures
+*/
+
+struct PQGParamsStr {
+    PLArenaPool *arena;
+    SECItem prime;    /* p */
+    SECItem subPrime; /* q */
+    SECItem base;     /* g */
+    /* XXX chrisk: this needs to be expanded to hold j and validationParms (RFC2459 7.3.2) */
+};
+typedef struct PQGParamsStr PQGParams;
+
+struct PQGVerifyStr {
+    PLArenaPool *arena; /* includes this struct, seed, & h. */
+    unsigned int counter;
+    SECItem seed;
+    SECItem h;
+};
+typedef struct PQGVerifyStr PQGVerify;
+
+struct DSAPublicKeyStr {
+    PQGParams params;
+    SECItem publicValue;
+};
+typedef struct DSAPublicKeyStr DSAPublicKey;
+
+struct DSAPrivateKeyStr {
+    PQGParams params;
+    SECItem publicValue;
+    SECItem privateValue;
+};
+typedef struct DSAPrivateKeyStr DSAPrivateKey;
+
+/***************************************************************************
+** Diffie-Hellman Public and Private Key and related structures
+** Structure member names suggested by PKCS#3.
+*/
+
+struct DHParamsStr {
+    PLArenaPool *arena;
+    SECItem prime; /* p */
+    SECItem base;  /* g */
+};
+typedef struct DHParamsStr DHParams;
+
+struct DHPublicKeyStr {
+    PLArenaPool *arena;
+    SECItem prime;
+    SECItem base;
+    SECItem publicValue;
+};
+typedef struct DHPublicKeyStr DHPublicKey;
+
+struct DHPrivateKeyStr {
+    PLArenaPool *arena;
+    SECItem prime;
+    SECItem base;
+    SECItem publicValue;
+    SECItem privateValue;
+};
+typedef struct DHPrivateKeyStr DHPrivateKey;
+
+/***************************************************************************
+** Data structures used for elliptic curve parameters and
+** public and private keys.
+*/
+
+/*
+** The ECParams data structures can encode elliptic curve
+** parameters for both GFp and GF2m curves.
+*/
+
+typedef enum { ec_params_explicit,
+               ec_params_named
+} ECParamsType;
+
+typedef enum { ec_field_GFp = 1,
+               ec_field_GF2m,
+               ec_field_plain
+} ECFieldType;
+
+struct ECFieldIDStr {
+    int size; /* field size in bits */
+    ECFieldType type;
+    union {
+        SECItem prime; /* prime p for (GFp) */
+        SECItem poly;  /* irreducible binary polynomial for (GF2m) */
+    } u;
+    int k1; /* first coefficient of pentanomial or
+                         * the only coefficient of trinomial
+                         */
+    int k2; /* two remaining coefficients of pentanomial */
+    int k3;
+};
+typedef struct ECFieldIDStr ECFieldID;
+
+struct ECCurveStr {
+    SECItem a; /* contains octet stream encoding of
+                         * field element (X9.62 section 4.3.3)
+             */
+    SECItem b;
+    SECItem seed;
+};
+typedef struct ECCurveStr ECCurve;
+
+struct ECParamsStr {
+    PLArenaPool *arena;
+    ECParamsType type;
+    ECFieldID fieldID;
+    ECCurve curve;
+    SECItem base;
+    SECItem order;
+    int cofactor;
+    SECItem DEREncoding;
+    ECCurveName name;
+    SECItem curveOID;
+};
+typedef struct ECParamsStr ECParams;
+
+struct ECPublicKeyStr {
+    ECParams ecParams;
+    SECItem publicValue; /* elliptic curve point encoded as
+                * octet stream.
+                */
+};
+typedef struct ECPublicKeyStr ECPublicKey;
+
+struct ECPrivateKeyStr {
+    ECParams ecParams;
+    SECItem publicValue;  /* encoded ec point */
+    SECItem privateValue; /* private big integer */
+    SECItem version;      /* As per SEC 1, Appendix C, Section C.4 */
+};
+typedef struct ECPrivateKeyStr ECPrivateKey;
+
+typedef void *(*BLapiAllocateFunc)(void);
+typedef void (*BLapiDestroyContextFunc)(void *cx, PRBool freeit);
+typedef SECStatus (*BLapiInitContextFunc)(void *cx,
+                                          const unsigned char *key,
+                                          unsigned int keylen,
+                                          const unsigned char *,
+                                          int,
+                                          unsigned int,
+                                          unsigned int);
+typedef SECStatus (*BLapiEncrypt)(void *cx, unsigned char *output,
+                                  unsigned int *outputLen,
+                                  unsigned int maxOutputLen,
+                                  const unsigned char *input,
+                                  unsigned int inputLen);
+
+#endif /* _BLAPIT_H_ */
diff --git a/security/nss/lib/freebl/blname.c b/security/nss/lib/freebl/blname.c
new file mode 100644
index 000000000..4bad74ada
--- /dev/null
+++ b/security/nss/lib/freebl/blname.c
@@ -0,0 +1,100 @@
+/*
+ *  blname.c - determine the freebl library name.
+ *  This Source Code Form is subject to the terms of the Mozilla Public
+ *  License, v. 2.0. If a copy of the MPL was not distributed with this
+ *  file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if defined(FREEBL_LOWHASH)
+static const char* default_name =
+    SHLIB_PREFIX "freeblpriv" SHLIB_VERSION "." SHLIB_SUFFIX;
+#else
+static const char* default_name =
+    SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX;
+#endif
+
+/* getLibName() returns the name of the library to load. */
+
+#if defined(SOLARIS) && defined(__sparc)
+#include <stddef.h>
+#include <strings.h>
+#include <sys/systeminfo.h>
+
+#if defined(NSS_USE_64)
+
+const static char fpu_hybrid_shared_lib[] = "libfreebl_64fpu_3.so";
+const static char int_hybrid_shared_lib[] = "libfreebl_64int_3.so";
+const static char non_hybrid_shared_lib[] = "libfreebl_64fpu_3.so";
+
+const static char int_hybrid_isa[] = "sparcv9";
+const static char fpu_hybrid_isa[] = "sparcv9+vis";
+
+#else
+
+const static char fpu_hybrid_shared_lib[] = "libfreebl_32fpu_3.so";
+const static char int_hybrid_shared_lib[] = "libfreebl_32int64_3.so";
+/* This was for SPARC V8, now obsolete. */
+const static char* const non_hybrid_shared_lib = NULL;
+
+const static char int_hybrid_isa[] = "sparcv8plus";
+const static char fpu_hybrid_isa[] = "sparcv8plus+vis";
+
+#endif
+
+static const char*
+getLibName(void)
+{
+    char* found_int_hybrid;
+    char* found_fpu_hybrid;
+    long buflen;
+    char buf[256];
+
+    buflen = sysinfo(SI_ISALIST, buf, sizeof buf);
+    if (buflen <= 0)
+        return NULL;
+    /* sysinfo output is always supposed to be NUL terminated, but ... */
+    if (buflen < sizeof buf)
+        buf[buflen] = '\0';
+    else
+        buf[(sizeof buf) - 1] = '\0';
+    /* The ISA list is a space separated string of names of ISAs and
+     * ISA extensions, in order of decreasing performance.
+     * There are two different ISAs with which NSS's crypto code can be
+     * accelerated. If both are in the list, we take the first one.
+     * If one is in the list, we use it, and if neither then we use
+     * the base unaccelerated code.
+     */
+    found_int_hybrid = strstr(buf, int_hybrid_isa);
+    found_fpu_hybrid = strstr(buf, fpu_hybrid_isa);
+    if (found_fpu_hybrid &&
+        (!found_int_hybrid ||
+         (found_int_hybrid - found_fpu_hybrid) >= 0)) {
+        return fpu_hybrid_shared_lib;
+    }
+    if (found_int_hybrid) {
+        return int_hybrid_shared_lib;
+    }
+    return non_hybrid_shared_lib;
+}
+
+#elif defined(HPUX) && !defined(NSS_USE_64) && !defined(__ia64)
+#include <unistd.h>
+
+/* This code tests to see if we're running on a PA2.x CPU.
+** It returns true (1) if so, and false (0) otherwise.
+*/
+static const char*
+getLibName(void)
+{
+    long cpu = sysconf(_SC_CPU_VERSION);
+    return (cpu == CPU_PA_RISC2_0)
+               ? "libfreebl_32fpu_3.sl"
+               : "libfreebl_32int_3.sl";
+}
+#else
+/* default case, for platforms/ABIs that have only one freebl shared lib. */
+static const char*
+getLibName(void)
+{
+    return default_name;
+}
+#endif
diff --git a/security/nss/lib/freebl/camellia.c b/security/nss/lib/freebl/camellia.c
new file mode 100644
index 000000000..8a7bcb0fe
--- /dev/null
+++ b/security/nss/lib/freebl/camellia.c
@@ -0,0 +1,1896 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prinit.h"
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "camellia.h"
+#include "sha_fast.h" /* for SHA_HTONL and related configuration macros */
+
+/* key constants */
+
+#define CAMELLIA_SIGMA1L (0xA09E667FL)
+#define CAMELLIA_SIGMA1R (0x3BCC908BL)
+#define CAMELLIA_SIGMA2L (0xB67AE858L)
+#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
+#define CAMELLIA_SIGMA3L (0xC6EF372FL)
+#define CAMELLIA_SIGMA3R (0xE94F82BEL)
+#define CAMELLIA_SIGMA4L (0x54FF53A5L)
+#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
+#define CAMELLIA_SIGMA5L (0x10E527FAL)
+#define CAMELLIA_SIGMA5R (0xDE682D1DL)
+#define CAMELLIA_SIGMA6L (0xB05688C2L)
+#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
+
+/*
+ *  macros
+ */
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+
+/* require a CPU that allows unaligned access */
+
+#if defined(SHA_NEED_TMP_VARIABLE)
+#define CAMELLIA_NEED_TMP_VARIABLE 1
+#endif
+
+#define GETU32(p) SHA_HTONL(*((PRUint32 *)(p)))
+#define PUTU32(ct, st)                       \
+    {                                        \
+        *((PRUint32 *)(ct)) = SHA_HTONL(st); \
+    }
+
+#else /* no unaligned access */
+
+#define GETU32(pt) \
+    (((PRUint32)(pt)[0] << 24) ^ ((PRUint32)(pt)[1] << 16) ^ ((PRUint32)(pt)[2] << 8) ^ ((PRUint32)(pt)[3]))
+
+#define PUTU32(ct, st)                   \
+    {                                    \
+        (ct)[0] = (PRUint8)((st) >> 24); \
+        (ct)[1] = (PRUint8)((st) >> 16); \
+        (ct)[2] = (PRUint8)((st) >> 8);  \
+        (ct)[3] = (PRUint8)(st);         \
+    }
+
+#endif
+
+#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2])
+#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1])
+
+/* rotation right shift 1byte */
+#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24))
+/* rotation left shift 1bit */
+#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31))
+/* rotation left shift 1byte */
+#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24))
+
+#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \
+    do {                                             \
+        w0 = ll;                                     \
+        ll = (ll << bits) + (lr >> (32 - bits));     \
+        lr = (lr << bits) + (rl >> (32 - bits));     \
+        rl = (rl << bits) + (rr >> (32 - bits));     \
+        rr = (rr << bits) + (w0 >> (32 - bits));     \
+    } while (0)
+
+#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \
+    do {                                                \
+        w0 = ll;                                        \
+        w1 = lr;                                        \
+        ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \
+        lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \
+        rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \
+        rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \
+    } while (0)
+
+#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)])
+#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)])
+#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)])
+#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)])
+
+#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
+    do {                                                   \
+        il = xl ^ kl;                                      \
+        ir = xr ^ kr;                                      \
+        t0 = il >> 16;                                     \
+        t1 = ir >> 16;                                     \
+        yl = CAMELLIA_SP1110(ir & 0xff) ^                  \
+             CAMELLIA_SP0222((t1 >> 8) & 0xff) ^           \
+             CAMELLIA_SP3033(t1 & 0xff) ^                  \
+             CAMELLIA_SP4404((ir >> 8) & 0xff);            \
+        yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) ^           \
+             CAMELLIA_SP0222(t0 & 0xff) ^                  \
+             CAMELLIA_SP3033((il >> 8) & 0xff) ^           \
+             CAMELLIA_SP4404(il & 0xff);                   \
+        yl ^= yr;                                          \
+        yr = CAMELLIA_RR8(yr);                             \
+        yr ^= yl;                                          \
+    } while (0)
+
+/*
+ * for speed up
+ *
+ */
+#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \
+    do {                                                                 \
+        t0 = kll;                                                        \
+        t0 &= ll;                                                        \
+        lr ^= CAMELLIA_RL1(t0);                                          \
+        t1 = klr;                                                        \
+        t1 |= lr;                                                        \
+        ll ^= t1;                                                        \
+                                                                         \
+        t2 = krr;                                                        \
+        t2 |= rr;                                                        \
+        rl ^= t2;                                                        \
+        t3 = krl;                                                        \
+        t3 &= rl;                                                        \
+        rr ^= CAMELLIA_RL1(t3);                                          \
+    } while (0)
+
+#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
+    do {                                                         \
+        ir = CAMELLIA_SP1110(xr & 0xff) ^                        \
+             CAMELLIA_SP0222((xr >> 24) & 0xff) ^                \
+             CAMELLIA_SP3033((xr >> 16) & 0xff) ^                \
+             CAMELLIA_SP4404((xr >> 8) & 0xff);                  \
+        il = CAMELLIA_SP1110((xl >> 24) & 0xff) ^                \
+             CAMELLIA_SP0222((xl >> 16) & 0xff) ^                \
+             CAMELLIA_SP3033((xl >> 8) & 0xff) ^                 \
+             CAMELLIA_SP4404(xl & 0xff);                         \
+        il ^= kl;                                                \
+        ir ^= kr;                                                \
+        ir ^= il;                                                \
+        il = CAMELLIA_RR8(il);                                   \
+        il ^= ir;                                                \
+        yl ^= ir;                                                \
+        yr ^= il;                                                \
+    } while (0)
+
+static const PRUint32 camellia_sp1110[256] = {
+    0x70707000, 0x82828200, 0x2c2c2c00, 0xececec00,
+    0xb3b3b300, 0x27272700, 0xc0c0c000, 0xe5e5e500,
+    0xe4e4e400, 0x85858500, 0x57575700, 0x35353500,
+    0xeaeaea00, 0x0c0c0c00, 0xaeaeae00, 0x41414100,
+    0x23232300, 0xefefef00, 0x6b6b6b00, 0x93939300,
+    0x45454500, 0x19191900, 0xa5a5a500, 0x21212100,
+    0xededed00, 0x0e0e0e00, 0x4f4f4f00, 0x4e4e4e00,
+    0x1d1d1d00, 0x65656500, 0x92929200, 0xbdbdbd00,
+    0x86868600, 0xb8b8b800, 0xafafaf00, 0x8f8f8f00,
+    0x7c7c7c00, 0xebebeb00, 0x1f1f1f00, 0xcecece00,
+    0x3e3e3e00, 0x30303000, 0xdcdcdc00, 0x5f5f5f00,
+    0x5e5e5e00, 0xc5c5c500, 0x0b0b0b00, 0x1a1a1a00,
+    0xa6a6a600, 0xe1e1e100, 0x39393900, 0xcacaca00,
+    0xd5d5d500, 0x47474700, 0x5d5d5d00, 0x3d3d3d00,
+    0xd9d9d900, 0x01010100, 0x5a5a5a00, 0xd6d6d600,
+    0x51515100, 0x56565600, 0x6c6c6c00, 0x4d4d4d00,
+    0x8b8b8b00, 0x0d0d0d00, 0x9a9a9a00, 0x66666600,
+    0xfbfbfb00, 0xcccccc00, 0xb0b0b000, 0x2d2d2d00,
+    0x74747400, 0x12121200, 0x2b2b2b00, 0x20202000,
+    0xf0f0f000, 0xb1b1b100, 0x84848400, 0x99999900,
+    0xdfdfdf00, 0x4c4c4c00, 0xcbcbcb00, 0xc2c2c200,
+    0x34343400, 0x7e7e7e00, 0x76767600, 0x05050500,
+    0x6d6d6d00, 0xb7b7b700, 0xa9a9a900, 0x31313100,
+    0xd1d1d100, 0x17171700, 0x04040400, 0xd7d7d700,
+    0x14141400, 0x58585800, 0x3a3a3a00, 0x61616100,
+    0xdedede00, 0x1b1b1b00, 0x11111100, 0x1c1c1c00,
+    0x32323200, 0x0f0f0f00, 0x9c9c9c00, 0x16161600,
+    0x53535300, 0x18181800, 0xf2f2f200, 0x22222200,
+    0xfefefe00, 0x44444400, 0xcfcfcf00, 0xb2b2b200,
+    0xc3c3c300, 0xb5b5b500, 0x7a7a7a00, 0x91919100,
+    0x24242400, 0x08080800, 0xe8e8e800, 0xa8a8a800,
+    0x60606000, 0xfcfcfc00, 0x69696900, 0x50505000,
+    0xaaaaaa00, 0xd0d0d000, 0xa0a0a000, 0x7d7d7d00,
+    0xa1a1a100, 0x89898900, 0x62626200, 0x97979700,
+    0x54545400, 0x5b5b5b00, 0x1e1e1e00, 0x95959500,
+    0xe0e0e000, 0xffffff00, 0x64646400, 0xd2d2d200,
+    0x10101000, 0xc4c4c400, 0x00000000, 0x48484800,
+    0xa3a3a300, 0xf7f7f700, 0x75757500, 0xdbdbdb00,
+    0x8a8a8a00, 0x03030300, 0xe6e6e600, 0xdadada00,
+    0x09090900, 0x3f3f3f00, 0xdddddd00, 0x94949400,
+    0x87878700, 0x5c5c5c00, 0x83838300, 0x02020200,
+    0xcdcdcd00, 0x4a4a4a00, 0x90909000, 0x33333300,
+    0x73737300, 0x67676700, 0xf6f6f600, 0xf3f3f300,
+    0x9d9d9d00, 0x7f7f7f00, 0xbfbfbf00, 0xe2e2e200,
+    0x52525200, 0x9b9b9b00, 0xd8d8d800, 0x26262600,
+    0xc8c8c800, 0x37373700, 0xc6c6c600, 0x3b3b3b00,
+    0x81818100, 0x96969600, 0x6f6f6f00, 0x4b4b4b00,
+    0x13131300, 0xbebebe00, 0x63636300, 0x2e2e2e00,
+    0xe9e9e900, 0x79797900, 0xa7a7a700, 0x8c8c8c00,
+    0x9f9f9f00, 0x6e6e6e00, 0xbcbcbc00, 0x8e8e8e00,
+    0x29292900, 0xf5f5f500, 0xf9f9f900, 0xb6b6b600,
+    0x2f2f2f00, 0xfdfdfd00, 0xb4b4b400, 0x59595900,
+    0x78787800, 0x98989800, 0x06060600, 0x6a6a6a00,
+    0xe7e7e700, 0x46464600, 0x71717100, 0xbababa00,
+    0xd4d4d400, 0x25252500, 0xababab00, 0x42424200,
+    0x88888800, 0xa2a2a200, 0x8d8d8d00, 0xfafafa00,
+    0x72727200, 0x07070700, 0xb9b9b900, 0x55555500,
+    0xf8f8f800, 0xeeeeee00, 0xacacac00, 0x0a0a0a00,
+    0x36363600, 0x49494900, 0x2a2a2a00, 0x68686800,
+    0x3c3c3c00, 0x38383800, 0xf1f1f100, 0xa4a4a400,
+    0x40404000, 0x28282800, 0xd3d3d300, 0x7b7b7b00,
+    0xbbbbbb00, 0xc9c9c900, 0x43434300, 0xc1c1c100,
+    0x15151500, 0xe3e3e300, 0xadadad00, 0xf4f4f400,
+    0x77777700, 0xc7c7c700, 0x80808000, 0x9e9e9e00,
+};
+
+static const PRUint32 camellia_sp0222[256] = {
+    0x00e0e0e0, 0x00050505, 0x00585858, 0x00d9d9d9,
+    0x00676767, 0x004e4e4e, 0x00818181, 0x00cbcbcb,
+    0x00c9c9c9, 0x000b0b0b, 0x00aeaeae, 0x006a6a6a,
+    0x00d5d5d5, 0x00181818, 0x005d5d5d, 0x00828282,
+    0x00464646, 0x00dfdfdf, 0x00d6d6d6, 0x00272727,
+    0x008a8a8a, 0x00323232, 0x004b4b4b, 0x00424242,
+    0x00dbdbdb, 0x001c1c1c, 0x009e9e9e, 0x009c9c9c,
+    0x003a3a3a, 0x00cacaca, 0x00252525, 0x007b7b7b,
+    0x000d0d0d, 0x00717171, 0x005f5f5f, 0x001f1f1f,
+    0x00f8f8f8, 0x00d7d7d7, 0x003e3e3e, 0x009d9d9d,
+    0x007c7c7c, 0x00606060, 0x00b9b9b9, 0x00bebebe,
+    0x00bcbcbc, 0x008b8b8b, 0x00161616, 0x00343434,
+    0x004d4d4d, 0x00c3c3c3, 0x00727272, 0x00959595,
+    0x00ababab, 0x008e8e8e, 0x00bababa, 0x007a7a7a,
+    0x00b3b3b3, 0x00020202, 0x00b4b4b4, 0x00adadad,
+    0x00a2a2a2, 0x00acacac, 0x00d8d8d8, 0x009a9a9a,
+    0x00171717, 0x001a1a1a, 0x00353535, 0x00cccccc,
+    0x00f7f7f7, 0x00999999, 0x00616161, 0x005a5a5a,
+    0x00e8e8e8, 0x00242424, 0x00565656, 0x00404040,
+    0x00e1e1e1, 0x00636363, 0x00090909, 0x00333333,
+    0x00bfbfbf, 0x00989898, 0x00979797, 0x00858585,
+    0x00686868, 0x00fcfcfc, 0x00ececec, 0x000a0a0a,
+    0x00dadada, 0x006f6f6f, 0x00535353, 0x00626262,
+    0x00a3a3a3, 0x002e2e2e, 0x00080808, 0x00afafaf,
+    0x00282828, 0x00b0b0b0, 0x00747474, 0x00c2c2c2,
+    0x00bdbdbd, 0x00363636, 0x00222222, 0x00383838,
+    0x00646464, 0x001e1e1e, 0x00393939, 0x002c2c2c,
+    0x00a6a6a6, 0x00303030, 0x00e5e5e5, 0x00444444,
+    0x00fdfdfd, 0x00888888, 0x009f9f9f, 0x00656565,
+    0x00878787, 0x006b6b6b, 0x00f4f4f4, 0x00232323,
+    0x00484848, 0x00101010, 0x00d1d1d1, 0x00515151,
+    0x00c0c0c0, 0x00f9f9f9, 0x00d2d2d2, 0x00a0a0a0,
+    0x00555555, 0x00a1a1a1, 0x00414141, 0x00fafafa,
+    0x00434343, 0x00131313, 0x00c4c4c4, 0x002f2f2f,
+    0x00a8a8a8, 0x00b6b6b6, 0x003c3c3c, 0x002b2b2b,
+    0x00c1c1c1, 0x00ffffff, 0x00c8c8c8, 0x00a5a5a5,
+    0x00202020, 0x00898989, 0x00000000, 0x00909090,
+    0x00474747, 0x00efefef, 0x00eaeaea, 0x00b7b7b7,
+    0x00151515, 0x00060606, 0x00cdcdcd, 0x00b5b5b5,
+    0x00121212, 0x007e7e7e, 0x00bbbbbb, 0x00292929,
+    0x000f0f0f, 0x00b8b8b8, 0x00070707, 0x00040404,
+    0x009b9b9b, 0x00949494, 0x00212121, 0x00666666,
+    0x00e6e6e6, 0x00cecece, 0x00ededed, 0x00e7e7e7,
+    0x003b3b3b, 0x00fefefe, 0x007f7f7f, 0x00c5c5c5,
+    0x00a4a4a4, 0x00373737, 0x00b1b1b1, 0x004c4c4c,
+    0x00919191, 0x006e6e6e, 0x008d8d8d, 0x00767676,
+    0x00030303, 0x002d2d2d, 0x00dedede, 0x00969696,
+    0x00262626, 0x007d7d7d, 0x00c6c6c6, 0x005c5c5c,
+    0x00d3d3d3, 0x00f2f2f2, 0x004f4f4f, 0x00191919,
+    0x003f3f3f, 0x00dcdcdc, 0x00797979, 0x001d1d1d,
+    0x00525252, 0x00ebebeb, 0x00f3f3f3, 0x006d6d6d,
+    0x005e5e5e, 0x00fbfbfb, 0x00696969, 0x00b2b2b2,
+    0x00f0f0f0, 0x00313131, 0x000c0c0c, 0x00d4d4d4,
+    0x00cfcfcf, 0x008c8c8c, 0x00e2e2e2, 0x00757575,
+    0x00a9a9a9, 0x004a4a4a, 0x00575757, 0x00848484,
+    0x00111111, 0x00454545, 0x001b1b1b, 0x00f5f5f5,
+    0x00e4e4e4, 0x000e0e0e, 0x00737373, 0x00aaaaaa,
+    0x00f1f1f1, 0x00dddddd, 0x00595959, 0x00141414,
+    0x006c6c6c, 0x00929292, 0x00545454, 0x00d0d0d0,
+    0x00787878, 0x00707070, 0x00e3e3e3, 0x00494949,
+    0x00808080, 0x00505050, 0x00a7a7a7, 0x00f6f6f6,
+    0x00777777, 0x00939393, 0x00868686, 0x00838383,
+    0x002a2a2a, 0x00c7c7c7, 0x005b5b5b, 0x00e9e9e9,
+    0x00eeeeee, 0x008f8f8f, 0x00010101, 0x003d3d3d,
+};
+
+static const PRUint32 camellia_sp3033[256] = {
+    0x38003838, 0x41004141, 0x16001616, 0x76007676,
+    0xd900d9d9, 0x93009393, 0x60006060, 0xf200f2f2,
+    0x72007272, 0xc200c2c2, 0xab00abab, 0x9a009a9a,
+    0x75007575, 0x06000606, 0x57005757, 0xa000a0a0,
+    0x91009191, 0xf700f7f7, 0xb500b5b5, 0xc900c9c9,
+    0xa200a2a2, 0x8c008c8c, 0xd200d2d2, 0x90009090,
+    0xf600f6f6, 0x07000707, 0xa700a7a7, 0x27002727,
+    0x8e008e8e, 0xb200b2b2, 0x49004949, 0xde00dede,
+    0x43004343, 0x5c005c5c, 0xd700d7d7, 0xc700c7c7,
+    0x3e003e3e, 0xf500f5f5, 0x8f008f8f, 0x67006767,
+    0x1f001f1f, 0x18001818, 0x6e006e6e, 0xaf00afaf,
+    0x2f002f2f, 0xe200e2e2, 0x85008585, 0x0d000d0d,
+    0x53005353, 0xf000f0f0, 0x9c009c9c, 0x65006565,
+    0xea00eaea, 0xa300a3a3, 0xae00aeae, 0x9e009e9e,
+    0xec00ecec, 0x80008080, 0x2d002d2d, 0x6b006b6b,
+    0xa800a8a8, 0x2b002b2b, 0x36003636, 0xa600a6a6,
+    0xc500c5c5, 0x86008686, 0x4d004d4d, 0x33003333,
+    0xfd00fdfd, 0x66006666, 0x58005858, 0x96009696,
+    0x3a003a3a, 0x09000909, 0x95009595, 0x10001010,
+    0x78007878, 0xd800d8d8, 0x42004242, 0xcc00cccc,
+    0xef00efef, 0x26002626, 0xe500e5e5, 0x61006161,
+    0x1a001a1a, 0x3f003f3f, 0x3b003b3b, 0x82008282,
+    0xb600b6b6, 0xdb00dbdb, 0xd400d4d4, 0x98009898,
+    0xe800e8e8, 0x8b008b8b, 0x02000202, 0xeb00ebeb,
+    0x0a000a0a, 0x2c002c2c, 0x1d001d1d, 0xb000b0b0,
+    0x6f006f6f, 0x8d008d8d, 0x88008888, 0x0e000e0e,
+    0x19001919, 0x87008787, 0x4e004e4e, 0x0b000b0b,
+    0xa900a9a9, 0x0c000c0c, 0x79007979, 0x11001111,
+    0x7f007f7f, 0x22002222, 0xe700e7e7, 0x59005959,
+    0xe100e1e1, 0xda00dada, 0x3d003d3d, 0xc800c8c8,
+    0x12001212, 0x04000404, 0x74007474, 0x54005454,
+    0x30003030, 0x7e007e7e, 0xb400b4b4, 0x28002828,
+    0x55005555, 0x68006868, 0x50005050, 0xbe00bebe,
+    0xd000d0d0, 0xc400c4c4, 0x31003131, 0xcb00cbcb,
+    0x2a002a2a, 0xad00adad, 0x0f000f0f, 0xca00caca,
+    0x70007070, 0xff00ffff, 0x32003232, 0x69006969,
+    0x08000808, 0x62006262, 0x00000000, 0x24002424,
+    0xd100d1d1, 0xfb00fbfb, 0xba00baba, 0xed00eded,
+    0x45004545, 0x81008181, 0x73007373, 0x6d006d6d,
+    0x84008484, 0x9f009f9f, 0xee00eeee, 0x4a004a4a,
+    0xc300c3c3, 0x2e002e2e, 0xc100c1c1, 0x01000101,
+    0xe600e6e6, 0x25002525, 0x48004848, 0x99009999,
+    0xb900b9b9, 0xb300b3b3, 0x7b007b7b, 0xf900f9f9,
+    0xce00cece, 0xbf00bfbf, 0xdf00dfdf, 0x71007171,
+    0x29002929, 0xcd00cdcd, 0x6c006c6c, 0x13001313,
+    0x64006464, 0x9b009b9b, 0x63006363, 0x9d009d9d,
+    0xc000c0c0, 0x4b004b4b, 0xb700b7b7, 0xa500a5a5,
+    0x89008989, 0x5f005f5f, 0xb100b1b1, 0x17001717,
+    0xf400f4f4, 0xbc00bcbc, 0xd300d3d3, 0x46004646,
+    0xcf00cfcf, 0x37003737, 0x5e005e5e, 0x47004747,
+    0x94009494, 0xfa00fafa, 0xfc00fcfc, 0x5b005b5b,
+    0x97009797, 0xfe00fefe, 0x5a005a5a, 0xac00acac,
+    0x3c003c3c, 0x4c004c4c, 0x03000303, 0x35003535,
+    0xf300f3f3, 0x23002323, 0xb800b8b8, 0x5d005d5d,
+    0x6a006a6a, 0x92009292, 0xd500d5d5, 0x21002121,
+    0x44004444, 0x51005151, 0xc600c6c6, 0x7d007d7d,
+    0x39003939, 0x83008383, 0xdc00dcdc, 0xaa00aaaa,
+    0x7c007c7c, 0x77007777, 0x56005656, 0x05000505,
+    0x1b001b1b, 0xa400a4a4, 0x15001515, 0x34003434,
+    0x1e001e1e, 0x1c001c1c, 0xf800f8f8, 0x52005252,
+    0x20002020, 0x14001414, 0xe900e9e9, 0xbd00bdbd,
+    0xdd00dddd, 0xe400e4e4, 0xa100a1a1, 0xe000e0e0,
+    0x8a008a8a, 0xf100f1f1, 0xd600d6d6, 0x7a007a7a,
+    0xbb00bbbb, 0xe300e3e3, 0x40004040, 0x4f004f4f,
+};
+
+static const PRUint32 camellia_sp4404[256] = {
+    0x70700070, 0x2c2c002c, 0xb3b300b3, 0xc0c000c0,
+    0xe4e400e4, 0x57570057, 0xeaea00ea, 0xaeae00ae,
+    0x23230023, 0x6b6b006b, 0x45450045, 0xa5a500a5,
+    0xeded00ed, 0x4f4f004f, 0x1d1d001d, 0x92920092,
+    0x86860086, 0xafaf00af, 0x7c7c007c, 0x1f1f001f,
+    0x3e3e003e, 0xdcdc00dc, 0x5e5e005e, 0x0b0b000b,
+    0xa6a600a6, 0x39390039, 0xd5d500d5, 0x5d5d005d,
+    0xd9d900d9, 0x5a5a005a, 0x51510051, 0x6c6c006c,
+    0x8b8b008b, 0x9a9a009a, 0xfbfb00fb, 0xb0b000b0,
+    0x74740074, 0x2b2b002b, 0xf0f000f0, 0x84840084,
+    0xdfdf00df, 0xcbcb00cb, 0x34340034, 0x76760076,
+    0x6d6d006d, 0xa9a900a9, 0xd1d100d1, 0x04040004,
+    0x14140014, 0x3a3a003a, 0xdede00de, 0x11110011,
+    0x32320032, 0x9c9c009c, 0x53530053, 0xf2f200f2,
+    0xfefe00fe, 0xcfcf00cf, 0xc3c300c3, 0x7a7a007a,
+    0x24240024, 0xe8e800e8, 0x60600060, 0x69690069,
+    0xaaaa00aa, 0xa0a000a0, 0xa1a100a1, 0x62620062,
+    0x54540054, 0x1e1e001e, 0xe0e000e0, 0x64640064,
+    0x10100010, 0x00000000, 0xa3a300a3, 0x75750075,
+    0x8a8a008a, 0xe6e600e6, 0x09090009, 0xdddd00dd,
+    0x87870087, 0x83830083, 0xcdcd00cd, 0x90900090,
+    0x73730073, 0xf6f600f6, 0x9d9d009d, 0xbfbf00bf,
+    0x52520052, 0xd8d800d8, 0xc8c800c8, 0xc6c600c6,
+    0x81810081, 0x6f6f006f, 0x13130013, 0x63630063,
+    0xe9e900e9, 0xa7a700a7, 0x9f9f009f, 0xbcbc00bc,
+    0x29290029, 0xf9f900f9, 0x2f2f002f, 0xb4b400b4,
+    0x78780078, 0x06060006, 0xe7e700e7, 0x71710071,
+    0xd4d400d4, 0xabab00ab, 0x88880088, 0x8d8d008d,
+    0x72720072, 0xb9b900b9, 0xf8f800f8, 0xacac00ac,
+    0x36360036, 0x2a2a002a, 0x3c3c003c, 0xf1f100f1,
+    0x40400040, 0xd3d300d3, 0xbbbb00bb, 0x43430043,
+    0x15150015, 0xadad00ad, 0x77770077, 0x80800080,
+    0x82820082, 0xecec00ec, 0x27270027, 0xe5e500e5,
+    0x85850085, 0x35350035, 0x0c0c000c, 0x41410041,
+    0xefef00ef, 0x93930093, 0x19190019, 0x21210021,
+    0x0e0e000e, 0x4e4e004e, 0x65650065, 0xbdbd00bd,
+    0xb8b800b8, 0x8f8f008f, 0xebeb00eb, 0xcece00ce,
+    0x30300030, 0x5f5f005f, 0xc5c500c5, 0x1a1a001a,
+    0xe1e100e1, 0xcaca00ca, 0x47470047, 0x3d3d003d,
+    0x01010001, 0xd6d600d6, 0x56560056, 0x4d4d004d,
+    0x0d0d000d, 0x66660066, 0xcccc00cc, 0x2d2d002d,
+    0x12120012, 0x20200020, 0xb1b100b1, 0x99990099,
+    0x4c4c004c, 0xc2c200c2, 0x7e7e007e, 0x05050005,
+    0xb7b700b7, 0x31310031, 0x17170017, 0xd7d700d7,
+    0x58580058, 0x61610061, 0x1b1b001b, 0x1c1c001c,
+    0x0f0f000f, 0x16160016, 0x18180018, 0x22220022,
+    0x44440044, 0xb2b200b2, 0xb5b500b5, 0x91910091,
+    0x08080008, 0xa8a800a8, 0xfcfc00fc, 0x50500050,
+    0xd0d000d0, 0x7d7d007d, 0x89890089, 0x97970097,
+    0x5b5b005b, 0x95950095, 0xffff00ff, 0xd2d200d2,
+    0xc4c400c4, 0x48480048, 0xf7f700f7, 0xdbdb00db,
+    0x03030003, 0xdada00da, 0x3f3f003f, 0x94940094,
+    0x5c5c005c, 0x02020002, 0x4a4a004a, 0x33330033,
+    0x67670067, 0xf3f300f3, 0x7f7f007f, 0xe2e200e2,
+    0x9b9b009b, 0x26260026, 0x37370037, 0x3b3b003b,
+    0x96960096, 0x4b4b004b, 0xbebe00be, 0x2e2e002e,
+    0x79790079, 0x8c8c008c, 0x6e6e006e, 0x8e8e008e,
+    0xf5f500f5, 0xb6b600b6, 0xfdfd00fd, 0x59590059,
+    0x98980098, 0x6a6a006a, 0x46460046, 0xbaba00ba,
+    0x25250025, 0x42420042, 0xa2a200a2, 0xfafa00fa,
+    0x07070007, 0x55550055, 0xeeee00ee, 0x0a0a000a,
+    0x49490049, 0x68680068, 0x38380038, 0xa4a400a4,
+    0x28280028, 0x7b7b007b, 0xc9c900c9, 0xc1c100c1,
+    0xe3e300e3, 0xf4f400f4, 0xc7c700c7, 0x9e9e009e,
+};
+
+/**
+ * Stuff related to the Camellia key schedule
+ */
+#define subl(x) subL[(x)]
+#define subr(x) subR[(x)]
+
+void
+camellia_setup128(const unsigned char *key, PRUint32 *subkey)
+{
+    PRUint32 kll, klr, krl, krr;
+    PRUint32 il, ir, t0, t1, w0, w1;
+    PRUint32 kw4l, kw4r, dw, tl, tr;
+    PRUint32 subL[26];
+    PRUint32 subR[26];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+    PRUint32 tmp;
+#endif
+
+    /**
+     *  k == kll || klr || krl || krr (|| is concatination)
+     */
+    kll = GETU32(key);
+    klr = GETU32(key + 4);
+    krl = GETU32(key + 8);
+    krr = GETU32(key + 12);
+    /**
+     * generate KL dependent subkeys
+     */
+    subl(0) = kll;
+    subr(0) = klr;
+    subl(1) = krl;
+    subr(1) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+    subl(4) = kll;
+    subr(4) = klr;
+    subl(5) = krl;
+    subr(5) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+    subl(10) = kll;
+    subr(10) = klr;
+    subl(11) = krl;
+    subr(11) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+    subl(13) = krl;
+    subr(13) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+    subl(16) = kll;
+    subr(16) = klr;
+    subl(17) = krl;
+    subr(17) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+    subl(18) = kll;
+    subr(18) = klr;
+    subl(19) = krl;
+    subr(19) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+    subl(22) = kll;
+    subr(22) = klr;
+    subl(23) = krl;
+    subr(23) = krr;
+
+    /* generate KA */
+    kll = subl(0);
+    klr = subr(0);
+    krl = subl(1);
+    krr = subr(1);
+    CAMELLIA_F(kll, klr,
+               CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
+               w0, w1, il, ir, t0, t1);
+    krl ^= w0;
+    krr ^= w1;
+    CAMELLIA_F(krl, krr,
+               CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R,
+               kll, klr, il, ir, t0, t1);
+    CAMELLIA_F(kll, klr,
+               CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R,
+               krl, krr, il, ir, t0, t1);
+    krl ^= w0;
+    krr ^= w1;
+    CAMELLIA_F(krl, krr,
+               CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R,
+               w0, w1, il, ir, t0, t1);
+    kll ^= w0;
+    klr ^= w1;
+
+    /* generate KA dependent subkeys */
+    subl(2) = kll;
+    subr(2) = klr;
+    subl(3) = krl;
+    subr(3) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+    subl(6) = kll;
+    subr(6) = klr;
+    subl(7) = krl;
+    subr(7) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+    subl(8) = kll;
+    subr(8) = klr;
+    subl(9) = krl;
+    subr(9) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+    subl(12) = kll;
+    subr(12) = klr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+    subl(14) = kll;
+    subr(14) = klr;
+    subl(15) = krl;
+    subr(15) = krr;
+    CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34);
+    subl(20) = kll;
+    subr(20) = klr;
+    subl(21) = krl;
+    subr(21) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+    subl(24) = kll;
+    subr(24) = klr;
+    subl(25) = krl;
+    subr(25) = krr;
+
+    /* absorb kw2 to other subkeys */
+    subl(3) ^= subl(1);
+    subr(3) ^= subr(1);
+    subl(5) ^= subl(1);
+    subr(5) ^= subr(1);
+    subl(7) ^= subl(1);
+    subr(7) ^= subr(1);
+    subl(1) ^= subr(1) & ~subr(9);
+    dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw);
+    subl(11) ^= subl(1);
+    subr(11) ^= subr(1);
+    subl(13) ^= subl(1);
+    subr(13) ^= subr(1);
+    subl(15) ^= subl(1);
+    subr(15) ^= subr(1);
+    subl(1) ^= subr(1) & ~subr(17);
+    dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw);
+    subl(19) ^= subl(1);
+    subr(19) ^= subr(1);
+    subl(21) ^= subl(1);
+    subr(21) ^= subr(1);
+    subl(23) ^= subl(1);
+    subr(23) ^= subr(1);
+    subl(24) ^= subl(1);
+    subr(24) ^= subr(1);
+
+    /* absorb kw4 to other subkeys */
+    kw4l = subl(25);
+    kw4r = subr(25);
+    subl(22) ^= kw4l;
+    subr(22) ^= kw4r;
+    subl(20) ^= kw4l;
+    subr(20) ^= kw4r;
+    subl(18) ^= kw4l;
+    subr(18) ^= kw4r;
+    kw4l ^= kw4r & ~subr(16);
+    dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw);
+    subl(14) ^= kw4l;
+    subr(14) ^= kw4r;
+    subl(12) ^= kw4l;
+    subr(12) ^= kw4r;
+    subl(10) ^= kw4l;
+    subr(10) ^= kw4r;
+    kw4l ^= kw4r & ~subr(8);
+    dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw);
+    subl(6) ^= kw4l;
+    subr(6) ^= kw4r;
+    subl(4) ^= kw4l;
+    subr(4) ^= kw4r;
+    subl(2) ^= kw4l;
+    subr(2) ^= kw4r;
+    subl(0) ^= kw4l;
+    subr(0) ^= kw4r;
+
+    /* key XOR is end of F-function */
+    CamelliaSubkeyL(0) = subl(0) ^ subl(2);
+    CamelliaSubkeyR(0) = subr(0) ^ subr(2);
+    CamelliaSubkeyL(2) = subl(3);
+    CamelliaSubkeyR(2) = subr(3);
+    CamelliaSubkeyL(3) = subl(2) ^ subl(4);
+    CamelliaSubkeyR(3) = subr(2) ^ subr(4);
+    CamelliaSubkeyL(4) = subl(3) ^ subl(5);
+    CamelliaSubkeyR(4) = subr(3) ^ subr(5);
+    CamelliaSubkeyL(5) = subl(4) ^ subl(6);
+    CamelliaSubkeyR(5) = subr(4) ^ subr(6);
+    CamelliaSubkeyL(6) = subl(5) ^ subl(7);
+    CamelliaSubkeyR(6) = subr(5) ^ subr(7);
+    tl = subl(10) ^ (subr(10) & ~subr(8));
+    dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(7) = subl(6) ^ tl;
+    CamelliaSubkeyR(7) = subr(6) ^ tr;
+    CamelliaSubkeyL(8) = subl(8);
+    CamelliaSubkeyR(8) = subr(8);
+    CamelliaSubkeyL(9) = subl(9);
+    CamelliaSubkeyR(9) = subr(9);
+    tl = subl(7) ^ (subr(7) & ~subr(9));
+    dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(10) = tl ^ subl(11);
+    CamelliaSubkeyR(10) = tr ^ subr(11);
+    CamelliaSubkeyL(11) = subl(10) ^ subl(12);
+    CamelliaSubkeyR(11) = subr(10) ^ subr(12);
+    CamelliaSubkeyL(12) = subl(11) ^ subl(13);
+    CamelliaSubkeyR(12) = subr(11) ^ subr(13);
+    CamelliaSubkeyL(13) = subl(12) ^ subl(14);
+    CamelliaSubkeyR(13) = subr(12) ^ subr(14);
+    CamelliaSubkeyL(14) = subl(13) ^ subl(15);
+    CamelliaSubkeyR(14) = subr(13) ^ subr(15);
+    tl = subl(18) ^ (subr(18) & ~subr(16));
+    dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(15) = subl(14) ^ tl;
+    CamelliaSubkeyR(15) = subr(14) ^ tr;
+    CamelliaSubkeyL(16) = subl(16);
+    CamelliaSubkeyR(16) = subr(16);
+    CamelliaSubkeyL(17) = subl(17);
+    CamelliaSubkeyR(17) = subr(17);
+    tl = subl(15) ^ (subr(15) & ~subr(17));
+    dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(18) = tl ^ subl(19);
+    CamelliaSubkeyR(18) = tr ^ subr(19);
+    CamelliaSubkeyL(19) = subl(18) ^ subl(20);
+    CamelliaSubkeyR(19) = subr(18) ^ subr(20);
+    CamelliaSubkeyL(20) = subl(19) ^ subl(21);
+    CamelliaSubkeyR(20) = subr(19) ^ subr(21);
+    CamelliaSubkeyL(21) = subl(20) ^ subl(22);
+    CamelliaSubkeyR(21) = subr(20) ^ subr(22);
+    CamelliaSubkeyL(22) = subl(21) ^ subl(23);
+    CamelliaSubkeyR(22) = subr(21) ^ subr(23);
+    CamelliaSubkeyL(23) = subl(22);
+    CamelliaSubkeyR(23) = subr(22);
+    CamelliaSubkeyL(24) = subl(24) ^ subl(23);
+    CamelliaSubkeyR(24) = subr(24) ^ subr(23);
+
+    /* apply the inverse of the last half of P-function */
+    dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
+    dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
+    dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
+    dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
+    dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
+    dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
+    dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
+    dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
+    dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
+    dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
+    dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
+    dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
+    dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
+    dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
+    dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
+    dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
+    dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
+    dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
+
+    return;
+}
+
+void
+camellia_setup256(const unsigned char *key, PRUint32 *subkey)
+{
+    PRUint32 kll, klr, krl, krr;     /* left half of key */
+    PRUint32 krll, krlr, krrl, krrr; /* right half of key */
+    PRUint32 il, ir, t0, t1, w0, w1; /* temporary variables */
+    PRUint32 kw4l, kw4r, dw, tl, tr;
+    PRUint32 subL[34];
+    PRUint32 subR[34];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+    PRUint32 tmp;
+#endif
+
+    /**
+     *  key = (kll || klr || krl || krr || krll || krlr || krrl || krrr)
+     *  (|| is concatination)
+     */
+
+    kll = GETU32(key);
+    klr = GETU32(key + 4);
+    krl = GETU32(key + 8);
+    krr = GETU32(key + 12);
+    krll = GETU32(key + 16);
+    krlr = GETU32(key + 20);
+    krrl = GETU32(key + 24);
+    krrr = GETU32(key + 28);
+
+    /* generate KL dependent subkeys */
+    subl(0) = kll;
+    subr(0) = klr;
+    subl(1) = krl;
+    subr(1) = krr;
+    CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45);
+    subl(12) = kll;
+    subr(12) = klr;
+    subl(13) = krl;
+    subr(13) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+    subl(16) = kll;
+    subr(16) = klr;
+    subl(17) = krl;
+    subr(17) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+    subl(22) = kll;
+    subr(22) = klr;
+    subl(23) = krl;
+    subr(23) = krr;
+    CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34);
+    subl(30) = kll;
+    subr(30) = klr;
+    subl(31) = krl;
+    subr(31) = krr;
+
+    /* generate KR dependent subkeys */
+    CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15);
+    subl(4) = krll;
+    subr(4) = krlr;
+    subl(5) = krrl;
+    subr(5) = krrr;
+    CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15);
+    subl(8) = krll;
+    subr(8) = krlr;
+    subl(9) = krrl;
+    subr(9) = krrr;
+    CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+    subl(18) = krll;
+    subr(18) = krlr;
+    subl(19) = krrl;
+    subr(19) = krrr;
+    CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34);
+    subl(26) = krll;
+    subr(26) = krlr;
+    subl(27) = krrl;
+    subr(27) = krrr;
+    CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34);
+
+    /* generate KA */
+    kll = subl(0) ^ krll;
+    klr = subr(0) ^ krlr;
+    krl = subl(1) ^ krrl;
+    krr = subr(1) ^ krrr;
+    CAMELLIA_F(kll, klr,
+               CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
+               w0, w1, il, ir, t0, t1);
+    krl ^= w0;
+    krr ^= w1;
+    CAMELLIA_F(krl, krr,
+               CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R,
+               kll, klr, il, ir, t0, t1);
+    kll ^= krll;
+    klr ^= krlr;
+    CAMELLIA_F(kll, klr,
+               CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R,
+               krl, krr, il, ir, t0, t1);
+    krl ^= w0 ^ krrl;
+    krr ^= w1 ^ krrr;
+    CAMELLIA_F(krl, krr,
+               CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R,
+               w0, w1, il, ir, t0, t1);
+    kll ^= w0;
+    klr ^= w1;
+
+    /* generate KB */
+    krll ^= kll;
+    krlr ^= klr;
+    krrl ^= krl;
+    krrr ^= krr;
+    CAMELLIA_F(krll, krlr,
+               CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R,
+               w0, w1, il, ir, t0, t1);
+    krrl ^= w0;
+    krrr ^= w1;
+    CAMELLIA_F(krrl, krrr,
+               CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R,
+               w0, w1, il, ir, t0, t1);
+    krll ^= w0;
+    krlr ^= w1;
+
+    /* generate KA dependent subkeys */
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+    subl(6) = kll;
+    subr(6) = klr;
+    subl(7) = krl;
+    subr(7) = krr;
+    CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+    subl(14) = kll;
+    subr(14) = klr;
+    subl(15) = krl;
+    subr(15) = krr;
+    subl(24) = klr;
+    subr(24) = krl;
+    subl(25) = krr;
+    subr(25) = kll;
+    CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49);
+    subl(28) = kll;
+    subr(28) = klr;
+    subl(29) = krl;
+    subr(29) = krr;
+
+    /* generate KB dependent subkeys */
+    subl(2) = krll;
+    subr(2) = krlr;
+    subl(3) = krrl;
+    subr(3) = krrr;
+    CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+    subl(10) = krll;
+    subr(10) = krlr;
+    subl(11) = krrl;
+    subr(11) = krrr;
+    CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+    subl(20) = krll;
+    subr(20) = krlr;
+    subl(21) = krrl;
+    subr(21) = krrr;
+    CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51);
+    subl(32) = krll;
+    subr(32) = krlr;
+    subl(33) = krrl;
+    subr(33) = krrr;
+
+    /* absorb kw2 to other subkeys */
+    subl(3) ^= subl(1);
+    subr(3) ^= subr(1);
+    subl(5) ^= subl(1);
+    subr(5) ^= subr(1);
+    subl(7) ^= subl(1);
+    subr(7) ^= subr(1);
+    subl(1) ^= subr(1) & ~subr(9);
+    dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw);
+    subl(11) ^= subl(1);
+    subr(11) ^= subr(1);
+    subl(13) ^= subl(1);
+    subr(13) ^= subr(1);
+    subl(15) ^= subl(1);
+    subr(15) ^= subr(1);
+    subl(1) ^= subr(1) & ~subr(17);
+    dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw);
+    subl(19) ^= subl(1);
+    subr(19) ^= subr(1);
+    subl(21) ^= subl(1);
+    subr(21) ^= subr(1);
+    subl(23) ^= subl(1);
+    subr(23) ^= subr(1);
+    subl(1) ^= subr(1) & ~subr(25);
+    dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw);
+    subl(27) ^= subl(1);
+    subr(27) ^= subr(1);
+    subl(29) ^= subl(1);
+    subr(29) ^= subr(1);
+    subl(31) ^= subl(1);
+    subr(31) ^= subr(1);
+    subl(32) ^= subl(1);
+    subr(32) ^= subr(1);
+
+    /* absorb kw4 to other subkeys */
+    kw4l = subl(33);
+    kw4r = subr(33);
+    subl(30) ^= kw4l;
+    subr(30) ^= kw4r;
+    subl(28) ^= kw4l;
+    subr(28) ^= kw4r;
+    subl(26) ^= kw4l;
+    subr(26) ^= kw4r;
+    kw4l ^= kw4r & ~subr(24);
+    dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw);
+    subl(22) ^= kw4l;
+    subr(22) ^= kw4r;
+    subl(20) ^= kw4l;
+    subr(20) ^= kw4r;
+    subl(18) ^= kw4l;
+    subr(18) ^= kw4r;
+    kw4l ^= kw4r & ~subr(16);
+    dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw);
+    subl(14) ^= kw4l;
+    subr(14) ^= kw4r;
+    subl(12) ^= kw4l;
+    subr(12) ^= kw4r;
+    subl(10) ^= kw4l;
+    subr(10) ^= kw4r;
+    kw4l ^= kw4r & ~subr(8);
+    dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw);
+    subl(6) ^= kw4l;
+    subr(6) ^= kw4r;
+    subl(4) ^= kw4l;
+    subr(4) ^= kw4r;
+    subl(2) ^= kw4l;
+    subr(2) ^= kw4r;
+    subl(0) ^= kw4l;
+    subr(0) ^= kw4r;
+
+    /* key XOR is end of F-function */
+    CamelliaSubkeyL(0) = subl(0) ^ subl(2);
+    CamelliaSubkeyR(0) = subr(0) ^ subr(2);
+    CamelliaSubkeyL(2) = subl(3);
+    CamelliaSubkeyR(2) = subr(3);
+    CamelliaSubkeyL(3) = subl(2) ^ subl(4);
+    CamelliaSubkeyR(3) = subr(2) ^ subr(4);
+    CamelliaSubkeyL(4) = subl(3) ^ subl(5);
+    CamelliaSubkeyR(4) = subr(3) ^ subr(5);
+    CamelliaSubkeyL(5) = subl(4) ^ subl(6);
+    CamelliaSubkeyR(5) = subr(4) ^ subr(6);
+    CamelliaSubkeyL(6) = subl(5) ^ subl(7);
+    CamelliaSubkeyR(6) = subr(5) ^ subr(7);
+    tl = subl(10) ^ (subr(10) & ~subr(8));
+    dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(7) = subl(6) ^ tl;
+    CamelliaSubkeyR(7) = subr(6) ^ tr;
+    CamelliaSubkeyL(8) = subl(8);
+    CamelliaSubkeyR(8) = subr(8);
+    CamelliaSubkeyL(9) = subl(9);
+    CamelliaSubkeyR(9) = subr(9);
+    tl = subl(7) ^ (subr(7) & ~subr(9));
+    dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(10) = tl ^ subl(11);
+    CamelliaSubkeyR(10) = tr ^ subr(11);
+    CamelliaSubkeyL(11) = subl(10) ^ subl(12);
+    CamelliaSubkeyR(11) = subr(10) ^ subr(12);
+    CamelliaSubkeyL(12) = subl(11) ^ subl(13);
+    CamelliaSubkeyR(12) = subr(11) ^ subr(13);
+    CamelliaSubkeyL(13) = subl(12) ^ subl(14);
+    CamelliaSubkeyR(13) = subr(12) ^ subr(14);
+    CamelliaSubkeyL(14) = subl(13) ^ subl(15);
+    CamelliaSubkeyR(14) = subr(13) ^ subr(15);
+    tl = subl(18) ^ (subr(18) & ~subr(16));
+    dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(15) = subl(14) ^ tl;
+    CamelliaSubkeyR(15) = subr(14) ^ tr;
+    CamelliaSubkeyL(16) = subl(16);
+    CamelliaSubkeyR(16) = subr(16);
+    CamelliaSubkeyL(17) = subl(17);
+    CamelliaSubkeyR(17) = subr(17);
+    tl = subl(15) ^ (subr(15) & ~subr(17));
+    dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(18) = tl ^ subl(19);
+    CamelliaSubkeyR(18) = tr ^ subr(19);
+    CamelliaSubkeyL(19) = subl(18) ^ subl(20);
+    CamelliaSubkeyR(19) = subr(18) ^ subr(20);
+    CamelliaSubkeyL(20) = subl(19) ^ subl(21);
+    CamelliaSubkeyR(20) = subr(19) ^ subr(21);
+    CamelliaSubkeyL(21) = subl(20) ^ subl(22);
+    CamelliaSubkeyR(21) = subr(20) ^ subr(22);
+    CamelliaSubkeyL(22) = subl(21) ^ subl(23);
+    CamelliaSubkeyR(22) = subr(21) ^ subr(23);
+    tl = subl(26) ^ (subr(26) & ~subr(24));
+    dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(23) = subl(22) ^ tl;
+    CamelliaSubkeyR(23) = subr(22) ^ tr;
+    CamelliaSubkeyL(24) = subl(24);
+    CamelliaSubkeyR(24) = subr(24);
+    CamelliaSubkeyL(25) = subl(25);
+    CamelliaSubkeyR(25) = subr(25);
+    tl = subl(23) ^ (subr(23) & ~subr(25));
+    dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw);
+    CamelliaSubkeyL(26) = tl ^ subl(27);
+    CamelliaSubkeyR(26) = tr ^ subr(27);
+    CamelliaSubkeyL(27) = subl(26) ^ subl(28);
+    CamelliaSubkeyR(27) = subr(26) ^ subr(28);
+    CamelliaSubkeyL(28) = subl(27) ^ subl(29);
+    CamelliaSubkeyR(28) = subr(27) ^ subr(29);
+    CamelliaSubkeyL(29) = subl(28) ^ subl(30);
+    CamelliaSubkeyR(29) = subr(28) ^ subr(30);
+    CamelliaSubkeyL(30) = subl(29) ^ subl(31);
+    CamelliaSubkeyR(30) = subr(29) ^ subr(31);
+    CamelliaSubkeyL(31) = subl(30);
+    CamelliaSubkeyR(31) = subr(30);
+    CamelliaSubkeyL(32) = subl(32) ^ subl(31);
+    CamelliaSubkeyR(32) = subr(32) ^ subr(31);
+
+    /* apply the inverse of the last half of P-function */
+    dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
+    dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
+    dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
+    dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
+    dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
+    dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
+    dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
+    dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
+    dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
+    dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
+    dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
+    dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
+    dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
+    dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
+    dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
+    dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
+    dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
+    dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
+    dw = CamelliaSubkeyL(26) ^ CamelliaSubkeyR(26), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(26) = CamelliaSubkeyL(26) ^ dw, CamelliaSubkeyL(26) = dw;
+    dw = CamelliaSubkeyL(27) ^ CamelliaSubkeyR(27), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(27) = CamelliaSubkeyL(27) ^ dw, CamelliaSubkeyL(27) = dw;
+    dw = CamelliaSubkeyL(28) ^ CamelliaSubkeyR(28), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(28) = CamelliaSubkeyL(28) ^ dw, CamelliaSubkeyL(28) = dw;
+    dw = CamelliaSubkeyL(29) ^ CamelliaSubkeyR(29), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(29) = CamelliaSubkeyL(29) ^ dw, CamelliaSubkeyL(29) = dw;
+    dw = CamelliaSubkeyL(30) ^ CamelliaSubkeyR(30), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw;
+    dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw);
+    CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw, CamelliaSubkeyL(31) = dw;
+
+    return;
+}
+
+void
+camellia_setup192(const unsigned char *key, PRUint32 *subkey)
+{
+    unsigned char kk[32];
+    PRUint32 krll, krlr, krrl, krrr;
+
+    memcpy(kk, key, 24);
+    memcpy((unsigned char *)&krll, key + 16, 4);
+    memcpy((unsigned char *)&krlr, key + 20, 4);
+    krrl = ~krll;
+    krrr = ~krlr;
+    memcpy(kk + 24, (unsigned char *)&krrl, 4);
+    memcpy(kk + 28, (unsigned char *)&krrr, 4);
+    camellia_setup256(kk, subkey);
+    return;
+}
+
+/**
+ * Stuff related to camellia encryption/decryption
+ *
+ */
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_encrypt128(const PRUint32 *subkey,
+                    unsigned char *output,
+                    const unsigned char *input)
+{
+    PRUint32 il, ir, t0, t1;
+    PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+    PRUint32 tmp;
+#endif
+
+    io[0] = GETU32(input);
+    io[1] = GETU32(input + 4);
+    io[2] = GETU32(input + 8);
+    io[3] = GETU32(input + 12);
+
+    /* pre whitening but absorb kw2*/
+    io[0] ^= CamelliaSubkeyL(0);
+    io[1] ^= CamelliaSubkeyR(0);
+    /* main iteration */
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+                 CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+                 CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+                     io[0], io[1], il, ir, t0, t1);
+
+    /* post whitening but kw4 */
+    io[2] ^= CamelliaSubkeyL(24);
+    io[3] ^= CamelliaSubkeyR(24);
+
+    t0 = io[0];
+    t1 = io[1];
+    io[0] = io[2];
+    io[1] = io[3];
+    io[2] = t0;
+    io[3] = t1;
+
+    PUTU32(output, io[0]);
+    PUTU32(output + 4, io[1]);
+    PUTU32(output + 8, io[2]);
+    PUTU32(output + 12, io[3]);
+
+    return SECSuccess;
+}
+
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_decrypt128(const PRUint32 *subkey,
+                    unsigned char *output,
+                    const unsigned char *input)
+{
+    PRUint32 il, ir, t0, t1; /* temporary valiables */
+    PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+    PRUint32 tmp;
+#endif
+
+    io[0] = GETU32(input);
+    io[1] = GETU32(input + 4);
+    io[2] = GETU32(input + 8);
+    io[3] = GETU32(input + 12);
+
+    /* pre whitening but absorb kw2*/
+    io[0] ^= CamelliaSubkeyL(24);
+    io[1] ^= CamelliaSubkeyR(24);
+
+    /* main iteration */
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+                 CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+                 CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+                     io[0], io[1], il, ir, t0, t1);
+
+    /* post whitening but kw4 */
+    io[2] ^= CamelliaSubkeyL(0);
+    io[3] ^= CamelliaSubkeyR(0);
+
+    t0 = io[0];
+    t1 = io[1];
+    io[0] = io[2];
+    io[1] = io[3];
+    io[2] = t0;
+    io[3] = t1;
+
+    PUTU32(output, io[0]);
+    PUTU32(output + 4, io[1]);
+    PUTU32(output + 8, io[2]);
+    PUTU32(output + 12, io[3]);
+
+    return SECSuccess;
+}
+
+/**
+ * stuff for 192 and 256bit encryption/decryption
+ */
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_encrypt256(const PRUint32 *subkey,
+                    unsigned char *output,
+                    const unsigned char *input)
+{
+    PRUint32 il, ir, t0, t1; /* temporary valiables */
+    PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+    PRUint32 tmp;
+#endif
+
+    io[0] = GETU32(input);
+    io[1] = GETU32(input + 4);
+    io[2] = GETU32(input + 8);
+    io[3] = GETU32(input + 12);
+
+    /* pre whitening but absorb kw2*/
+    io[0] ^= CamelliaSubkeyL(0);
+    io[1] ^= CamelliaSubkeyR(0);
+
+    /* main iteration */
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+                 CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+                 CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(24), CamelliaSubkeyR(24),
+                 CamelliaSubkeyL(25), CamelliaSubkeyR(25),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(26), CamelliaSubkeyR(26),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(27), CamelliaSubkeyR(27),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(28), CamelliaSubkeyR(28),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(29), CamelliaSubkeyR(29),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(30), CamelliaSubkeyR(30),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(31), CamelliaSubkeyR(31),
+                     io[0], io[1], il, ir, t0, t1);
+
+    /* post whitening but kw4 */
+    io[2] ^= CamelliaSubkeyL(32);
+    io[3] ^= CamelliaSubkeyR(32);
+
+    t0 = io[0];
+    t1 = io[1];
+    io[0] = io[2];
+    io[1] = io[3];
+    io[2] = t0;
+    io[3] = t1;
+
+    PUTU32(output, io[0]);
+    PUTU32(output + 4, io[1]);
+    PUTU32(output + 8, io[2]);
+    PUTU32(output + 12, io[3]);
+
+    return SECSuccess;
+}
+
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_decrypt256(const PRUint32 *subkey,
+                    unsigned char *output,
+                    const unsigned char *input)
+{
+    PRUint32 il, ir, t0, t1; /* temporary valiables */
+    PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+    PRUint32 tmp;
+#endif
+
+    io[0] = GETU32(input);
+    io[1] = GETU32(input + 4);
+    io[2] = GETU32(input + 8);
+    io[3] = GETU32(input + 12);
+
+    /* pre whitening but absorb kw2*/
+    io[0] ^= CamelliaSubkeyL(32);
+    io[1] ^= CamelliaSubkeyR(32);
+
+    /* main iteration */
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(31), CamelliaSubkeyR(31),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(30), CamelliaSubkeyR(30),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(29), CamelliaSubkeyR(29),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(28), CamelliaSubkeyR(28),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(27), CamelliaSubkeyR(27),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(26), CamelliaSubkeyR(26),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(25), CamelliaSubkeyR(25),
+                 CamelliaSubkeyL(24), CamelliaSubkeyR(24),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+                 CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+                     io[0], io[1], il, ir, t0, t1);
+
+    CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+                 CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+                 CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+                 t0, t1, il, ir);
+
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+                     io[0], io[1], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[0], io[1],
+                     CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+                     io[2], io[3], il, ir, t0, t1);
+    CAMELLIA_ROUNDSM(io[2], io[3],
+                     CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+                     io[0], io[1], il, ir, t0, t1);
+
+    /* post whitening but kw4 */
+    io[2] ^= CamelliaSubkeyL(0);
+    io[3] ^= CamelliaSubkeyR(0);
+
+    t0 = io[0];
+    t1 = io[1];
+    io[0] = io[2];
+    io[1] = io[3];
+    io[2] = t0;
+    io[3] = t1;
+
+    PUTU32(output, io[0]);
+    PUTU32(output + 4, io[1]);
+    PUTU32(output + 8, io[2]);
+    PUTU32(output + 12, io[3]);
+
+    return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * Stuff related to the Camellia key schedule
+ *
+ *************************************************************************/
+
+SECStatus
+camellia_key_expansion(CamelliaContext *cx,
+                       const unsigned char *key,
+                       const unsigned int keysize)
+{
+    cx->keysize = keysize;
+
+    switch (keysize) {
+        case 16:
+            camellia_setup128(key, cx->expandedKey);
+            break;
+        case 24:
+            camellia_setup192(key, cx->expandedKey);
+            break;
+        case 32:
+            camellia_setup256(key, cx->expandedKey);
+            break;
+        default:
+            break;
+    }
+    return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ *  Camellia modes of operation (ECB and CBC)
+ *
+ *************************************************************************/
+
+SECStatus
+camellia_encryptECB(CamelliaContext *cx, unsigned char *output,
+                    unsigned int *outputLen, unsigned int maxOutputLen,
+                    const unsigned char *input, unsigned int inputLen)
+{
+    CamelliaBlockFunc *encryptor;
+
+    encryptor = (cx->keysize == 16)
+                    ? &camellia_encrypt128
+                    : &camellia_encrypt256;
+
+    while (inputLen > 0) {
+        (*encryptor)(cx->expandedKey, output, input);
+
+        output += CAMELLIA_BLOCK_SIZE;
+        input += CAMELLIA_BLOCK_SIZE;
+        inputLen -= CAMELLIA_BLOCK_SIZE;
+    }
+    return SECSuccess;
+}
+
+SECStatus
+camellia_encryptCBC(CamelliaContext *cx, unsigned char *output,
+                    unsigned int *outputLen, unsigned int maxOutputLen,
+                    const unsigned char *input, unsigned int inputLen)
+{
+    unsigned int j;
+    unsigned char *lastblock;
+    unsigned char inblock[CAMELLIA_BLOCK_SIZE];
+    CamelliaBlockFunc *encryptor;
+
+    if (!inputLen)
+        return SECSuccess;
+    lastblock = cx->iv;
+
+    encryptor = (cx->keysize == 16)
+                    ? &camellia_encrypt128
+                    : &camellia_encrypt256;
+
+    while (inputLen > 0) {
+        /* XOR with the last block (IV if first block) */
+        for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+            inblock[j] = input[j] ^ lastblock[j];
+        /* encrypt */
+        (*encryptor)(cx->expandedKey, output, inblock);
+
+        /* move to the next block */
+        lastblock = output;
+        output += CAMELLIA_BLOCK_SIZE;
+        input += CAMELLIA_BLOCK_SIZE;
+        inputLen -= CAMELLIA_BLOCK_SIZE;
+    }
+    memcpy(cx->iv, lastblock, CAMELLIA_BLOCK_SIZE);
+    return SECSuccess;
+}
+
+SECStatus
+camellia_decryptECB(CamelliaContext *cx, unsigned char *output,
+                    unsigned int *outputLen, unsigned int maxOutputLen,
+                    const unsigned char *input, unsigned int inputLen)
+{
+    CamelliaBlockFunc *decryptor;
+
+    decryptor = (cx->keysize == 16)
+                    ? &camellia_decrypt128
+                    : &camellia_decrypt256;
+
+    while (inputLen > 0) {
+
+        (*decryptor)(cx->expandedKey, output, input);
+
+        output += CAMELLIA_BLOCK_SIZE;
+        input += CAMELLIA_BLOCK_SIZE;
+        inputLen -= CAMELLIA_BLOCK_SIZE;
+    }
+    return SECSuccess;
+}
+
+SECStatus
+camellia_decryptCBC(CamelliaContext *cx, unsigned char *output,
+                    unsigned int *outputLen, unsigned int maxOutputLen,
+                    const unsigned char *input, unsigned int inputLen)
+{
+    const unsigned char *in;
+    unsigned char *out;
+    unsigned int j;
+    unsigned char newIV[CAMELLIA_BLOCK_SIZE];
+    CamelliaBlockFunc *decryptor;
+
+    if (!inputLen)
+        return SECSuccess;
+
+    PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
+
+    in = input + (inputLen - CAMELLIA_BLOCK_SIZE);
+    memcpy(newIV, in, CAMELLIA_BLOCK_SIZE);
+    out = output + (inputLen - CAMELLIA_BLOCK_SIZE);
+
+    decryptor = (cx->keysize == 16)
+                    ? &camellia_decrypt128
+                    : &camellia_decrypt256;
+
+    while (inputLen > CAMELLIA_BLOCK_SIZE) {
+        (*decryptor)(cx->expandedKey, out, in);
+
+        for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+            out[j] ^= in[(int)(j - CAMELLIA_BLOCK_SIZE)];
+
+        out -= CAMELLIA_BLOCK_SIZE;
+        in -= CAMELLIA_BLOCK_SIZE;
+        inputLen -= CAMELLIA_BLOCK_SIZE;
+    }
+    if (in == input) {
+        (*decryptor)(cx->expandedKey, out, in);
+
+        for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+            out[j] ^= cx->iv[j];
+    }
+    memcpy(cx->iv, newIV, CAMELLIA_BLOCK_SIZE);
+    return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * BLAPI Interface functions
+ *
+ *************************************************************************/
+
+CamelliaContext *
+Camellia_AllocateContext(void)
+{
+    return PORT_ZNew(CamelliaContext);
+}
+
+SECStatus
+Camellia_InitContext(CamelliaContext *cx, const unsigned char *key,
+                     unsigned int keysize,
+                     const unsigned char *iv, int mode, unsigned int encrypt,
+                     unsigned int unused)
+{
+    if (key == NULL ||
+        (keysize != 16 && keysize != 24 && keysize != 32)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (mode == NSS_CAMELLIA_CBC && iv == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (mode == NSS_CAMELLIA_CBC) {
+        memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE);
+        cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC;
+    } else {
+        cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB;
+    }
+
+    /* Generate expanded key */
+    if (camellia_key_expansion(cx, key, keysize) != SECSuccess)
+        goto cleanup;
+
+    return SECSuccess;
+cleanup:
+    return SECFailure;
+}
+
+/*
+ * Camellia_CreateContext
+ * create a new context for Camellia operations
+ */
+
+CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+                       int mode, int encrypt,
+                       unsigned int keysize)
+{
+    CamelliaContext *cx;
+
+    if (key == NULL ||
+        (keysize != 16 && keysize != 24 && keysize != 32)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return NULL;
+    }
+    if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return NULL;
+    }
+    if (mode == NSS_CAMELLIA_CBC && iv == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return NULL;
+    }
+    cx = PORT_ZNew(CamelliaContext);
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return NULL;
+    }
+
+    /* copy in the iv, if neccessary */
+    if (mode == NSS_CAMELLIA_CBC) {
+        memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE);
+        cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC;
+    } else {
+        cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB;
+    }
+    /* copy keysize */
+    cx->keysize = keysize;
+
+    /* Generate expanded key */
+    if (camellia_key_expansion(cx, key, keysize) != SECSuccess)
+        goto cleanup;
+
+    return cx;
+cleanup:
+    PORT_ZFree(cx, sizeof *cx);
+    return NULL;
+}
+
+/*
+ * Camellia_DestroyContext
+ *
+ * Zero an Camellia cipher context.  If freeit is true, also free the pointer
+ * to the context.
+ */
+void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit)
+{
+    if (cx)
+        memset(cx, 0, sizeof *cx);
+    if (freeit)
+        PORT_Free(cx);
+}
+
+/*
+ * Camellia_Encrypt
+ *
+ * Encrypt an arbitrary-length buffer.  The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+                 unsigned int *outputLen, unsigned int maxOutputLen,
+                 const unsigned char *input, unsigned int inputLen)
+{
+
+    /* Check args */
+    if (cx == NULL || output == NULL || input == NULL ||
+        outputLen == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    if (inputLen % CAMELLIA_BLOCK_SIZE != 0) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    *outputLen = inputLen;
+
+    return (*cx->worker)(cx, output, outputLen, maxOutputLen,
+                         input, inputLen);
+}
+
+/*
+ * Camellia_Decrypt
+ *
+ * Decrypt and arbitrary-length buffer.  The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+                 unsigned int *outputLen, unsigned int maxOutputLen,
+                 const unsigned char *input, unsigned int inputLen)
+{
+
+    /* Check args */
+    if (cx == NULL || output == NULL || input == NULL || outputLen == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (inputLen % CAMELLIA_BLOCK_SIZE != 0) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    *outputLen = inputLen;
+
+    return (*cx->worker)(cx, output, outputLen, maxOutputLen,
+                         input, inputLen);
+}
diff --git a/security/nss/lib/freebl/camellia.h b/security/nss/lib/freebl/camellia.h
new file mode 100644
index 000000000..15114db9a
--- /dev/null
+++ b/security/nss/lib/freebl/camellia.h
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _CAMELLIA_H_
+#define _CAMELLIA_H_ 1
+
+#define CAMELLIA_BLOCK_SIZE 16  /* bytes */
+#define CAMELLIA_MIN_KEYSIZE 16 /* bytes */
+#define CAMELLIA_MAX_KEYSIZE 32 /* bytes */
+
+#define CAMELLIA_MAX_EXPANDEDKEY (34 * 2) /* 32bit unit */
+
+typedef PRUint32 KEY_TABLE_TYPE[CAMELLIA_MAX_EXPANDEDKEY];
+
+typedef SECStatus CamelliaFunc(CamelliaContext *cx, unsigned char *output,
+                               unsigned int *outputLen,
+                               unsigned int maxOutputLen,
+                               const unsigned char *input,
+                               unsigned int inputLen);
+
+typedef SECStatus CamelliaBlockFunc(const PRUint32 *subkey,
+                                    unsigned char *output,
+                                    const unsigned char *input);
+
+/* CamelliaContextStr
+ *
+ * Values which maintain the state for Camellia encryption/decryption.
+ *
+ * keysize     - the number of key bits
+ * worker      - the encryption/decryption function to use with this context
+ * iv          - initialization vector for CBC mode
+ * expandedKey - the round keys in 4-byte words
+ */
+struct CamelliaContextStr {
+    PRUint32 keysize; /* bytes */
+    CamelliaFunc *worker;
+    PRUint32 expandedKey[CAMELLIA_MAX_EXPANDEDKEY];
+    PRUint8 iv[CAMELLIA_BLOCK_SIZE];
+};
+
+#endif /* _CAMELLIA_H_ */
diff --git a/security/nss/lib/freebl/chacha20.c b/security/nss/lib/freebl/chacha20.c
new file mode 100644
index 000000000..f55d1e670
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20.c
@@ -0,0 +1,119 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Adopted from the public domain code in NaCl by djb. */
+
+#include <string.h>
+#include <stdio.h>
+
+#include "prtypes.h"
+#include "secport.h"
+#include "chacha20.h"
+
+#if defined(_MSC_VER)
+#pragma intrinsic(_lrotl)
+#define ROTL32(x, n) _lrotl(x, n)
+#else
+#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n)))
+#endif
+
+#define ROTATE(v, c) ROTL32((v), (c))
+
+#define U32TO8_LITTLE(p, v)          \
+    {                                \
+        (p)[0] = ((v)) & 0xff;       \
+        (p)[1] = ((v) >> 8) & 0xff;  \
+        (p)[2] = ((v) >> 16) & 0xff; \
+        (p)[3] = ((v) >> 24) & 0xff; \
+    }
+#define U8TO32_LITTLE(p)                                \
+    (((PRUint32)((p)[0])) | ((PRUint32)((p)[1]) << 8) | \
+     ((PRUint32)((p)[2]) << 16) | ((PRUint32)((p)[3]) << 24))
+
+#define QUARTERROUND(x, a, b, c, d) \
+    x[a] = x[a] + x[b];             \
+    x[d] = ROTATE(x[d] ^ x[a], 16); \
+    x[c] = x[c] + x[d];             \
+    x[b] = ROTATE(x[b] ^ x[c], 12); \
+    x[a] = x[a] + x[b];             \
+    x[d] = ROTATE(x[d] ^ x[a], 8);  \
+    x[c] = x[c] + x[d];             \
+    x[b] = ROTATE(x[b] ^ x[c], 7);
+
+static void
+ChaChaCore(unsigned char output[64], const PRUint32 input[16], int num_rounds)
+{
+    PRUint32 x[16];
+    int i;
+
+    PORT_Memcpy(x, input, sizeof(PRUint32) * 16);
+    for (i = num_rounds; i > 0; i -= 2) {
+        QUARTERROUND(x, 0, 4, 8, 12)
+        QUARTERROUND(x, 1, 5, 9, 13)
+        QUARTERROUND(x, 2, 6, 10, 14)
+        QUARTERROUND(x, 3, 7, 11, 15)
+        QUARTERROUND(x, 0, 5, 10, 15)
+        QUARTERROUND(x, 1, 6, 11, 12)
+        QUARTERROUND(x, 2, 7, 8, 13)
+        QUARTERROUND(x, 3, 4, 9, 14)
+    }
+
+    for (i = 0; i < 16; ++i) {
+        x[i] = x[i] + input[i];
+    }
+    for (i = 0; i < 16; ++i) {
+        U32TO8_LITTLE(output + 4 * i, x[i]);
+    }
+}
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+void
+ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inLen,
+            const unsigned char key[32], const unsigned char nonce[12],
+            uint32_t counter)
+{
+    unsigned char block[64];
+    PRUint32 input[16];
+    unsigned int i;
+
+    input[4] = U8TO32_LITTLE(key + 0);
+    input[5] = U8TO32_LITTLE(key + 4);
+    input[6] = U8TO32_LITTLE(key + 8);
+    input[7] = U8TO32_LITTLE(key + 12);
+
+    input[8] = U8TO32_LITTLE(key + 16);
+    input[9] = U8TO32_LITTLE(key + 20);
+    input[10] = U8TO32_LITTLE(key + 24);
+    input[11] = U8TO32_LITTLE(key + 28);
+
+    input[0] = U8TO32_LITTLE(sigma + 0);
+    input[1] = U8TO32_LITTLE(sigma + 4);
+    input[2] = U8TO32_LITTLE(sigma + 8);
+    input[3] = U8TO32_LITTLE(sigma + 12);
+
+    input[12] = counter;
+    input[13] = U8TO32_LITTLE(nonce + 0);
+    input[14] = U8TO32_LITTLE(nonce + 4);
+    input[15] = U8TO32_LITTLE(nonce + 8);
+
+    while (inLen >= 64) {
+        ChaChaCore(block, input, 20);
+        for (i = 0; i < 64; i++) {
+            out[i] = in[i] ^ block[i];
+        }
+
+        input[12]++;
+        inLen -= 64;
+        in += 64;
+        out += 64;
+    }
+
+    if (inLen > 0) {
+        ChaChaCore(block, input, 20);
+        for (i = 0; i < inLen; i++) {
+            out[i] = in[i] ^ block[i];
+        }
+    }
+}
diff --git a/security/nss/lib/freebl/chacha20.h b/security/nss/lib/freebl/chacha20.h
new file mode 100644
index 000000000..7e396fa8c
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20.h
@@ -0,0 +1,26 @@
+/*
+ * chacha20.h - header file for ChaCha20 implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef FREEBL_CHACHA20_H_
+#define FREEBL_CHACHA20_H_
+
+#if defined(_MSC_VER) && _MSC_VER < 1600
+#include "prtypes.h"
+typedef PRUint32 uint32_t;
+typedef PRUint64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* ChaCha20XOR encrypts |inLen| bytes from |in| with the given key and
+ * nonce and writes the result to |out|, which may be equal to |in|. The
+ * initial block counter is specified by |counter|. */
+extern void ChaCha20XOR(unsigned char *out, const unsigned char *in,
+                        unsigned int inLen, const unsigned char key[32],
+                        const unsigned char nonce[12], uint32_t counter);
+
+#endif /* FREEBL_CHACHA20_H_ */
diff --git a/security/nss/lib/freebl/chacha20_vec.c b/security/nss/lib/freebl/chacha20_vec.c
new file mode 100644
index 000000000..12f94d897
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20_vec.c
@@ -0,0 +1,327 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and
+ * marked as public domain. It was been altered to allow for non-aligned inputs
+ * and to allow the block counter to be passed in specifically. */
+
+#include <string.h>
+
+#include "chacha20.h"
+#include "blapii.h"
+
+#ifndef CHACHA_RNDS
+#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */
+#endif
+
+/* Architecture-neutral way to specify 16-byte vector of ints              */
+typedef unsigned vec __attribute__((vector_size(16)));
+
+/* This implementation is designed for Neon, SSE and AltiVec machines. The
+ * following specify how to do certain vector operations efficiently on
+ * each architecture, using intrinsics.
+ * This implementation supports parallel processing of multiple blocks,
+ * including potentially using general-purpose registers.
+ */
+#if __ARM_NEON__
+#include <arm_neon.h>
+#define GPR_TOO 1
+#define VBPI 2
+#define ONE (vec) vsetq_lane_u32(1, vdupq_n_u32(0), 0)
+#define LOAD(m) (vec)(*((vec *)(m)))
+#define STORE(m, r) (*((vec *)(m))) = (r)
+#define ROTV1(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 1)
+#define ROTV2(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 2)
+#define ROTV3(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 3)
+#define ROTW16(x) (vec) vrev32q_u16((uint16x8_t)x)
+#if __clang__
+#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7 })) ^ (x >> ((vec){ 25, 25, 25, 25 }))
+#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8 })) ^ (x >> ((vec){ 24, 24, 24, 24 }))
+#define ROTW12(x) (x << ((vec){ 12, 12, 12, 12 })) ^ (x >> ((vec){ 20, 20, 20, 20 }))
+#else
+#define ROTW7(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 7), (uint32x4_t)x, 25)
+#define ROTW8(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 8), (uint32x4_t)x, 24)
+#define ROTW12(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 12), (uint32x4_t)x, 20)
+#endif
+#elif __SSE2__
+#include <emmintrin.h>
+#define GPR_TOO 0
+#if __clang__
+#define VBPI 4
+#else
+#define VBPI 3
+#endif
+#define ONE (vec) _mm_set_epi32(0, 0, 0, 1)
+#define LOAD(m) (vec) _mm_loadu_si128((__m128i *)(m))
+#define STORE(m, r) _mm_storeu_si128((__m128i *)(m), (__m128i)(r))
+#define ROTV1(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(0, 3, 2, 1))
+#define ROTV2(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(1, 0, 3, 2))
+#define ROTV3(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(2, 1, 0, 3))
+#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i)x, 25))
+#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x, 12) ^ _mm_srli_epi32((__m128i)x, 20))
+#if __SSSE3__
+#include <tmmintrin.h>
+#define ROTW8(x) (vec) _mm_shuffle_epi8((__m128i)x, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))
+#define ROTW16(x) (vec) _mm_shuffle_epi8((__m128i)x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))
+#else
+#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i)x, 24))
+#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x, 16) ^ _mm_srli_epi32((__m128i)x, 16))
+#endif
+#else
+#error-- Implementation supports only machines with neon or SSE2
+#endif
+
+#ifndef REVV_BE
+#define REVV_BE(x) (x)
+#endif
+
+#ifndef REVW_BE
+#define REVW_BE(x) (x)
+#endif
+
+#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration   */
+
+#define DQROUND_VECTORS(a, b, c, d) \
+    a += b;                         \
+    d ^= a;                         \
+    d = ROTW16(d);                  \
+    c += d;                         \
+    b ^= c;                         \
+    b = ROTW12(b);                  \
+    a += b;                         \
+    d ^= a;                         \
+    d = ROTW8(d);                   \
+    c += d;                         \
+    b ^= c;                         \
+    b = ROTW7(b);                   \
+    b = ROTV1(b);                   \
+    c = ROTV2(c);                   \
+    d = ROTV3(d);                   \
+    a += b;                         \
+    d ^= a;                         \
+    d = ROTW16(d);                  \
+    c += d;                         \
+    b ^= c;                         \
+    b = ROTW12(b);                  \
+    a += b;                         \
+    d ^= a;                         \
+    d = ROTW8(d);                   \
+    c += d;                         \
+    b ^= c;                         \
+    b = ROTW7(b);                   \
+    b = ROTV3(b);                   \
+    c = ROTV2(c);                   \
+    d = ROTV1(d);
+
+#define QROUND_WORDS(a, b, c, d) \
+    a = a + b;                   \
+    d ^= a;                      \
+    d = d << 16 | d >> 16;       \
+    c = c + d;                   \
+    b ^= c;                      \
+    b = b << 12 | b >> 20;       \
+    a = a + b;                   \
+    d ^= a;                      \
+    d = d << 8 | d >> 24;        \
+    c = c + d;                   \
+    b ^= c;                      \
+    b = b << 7 | b >> 25;
+
+#define WRITE_XOR(in, op, d, v0, v1, v2, v3)           \
+    STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \
+    STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \
+    STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \
+    STORE(op + d + 12, LOAD(in + d + 12) ^ REVV_BE(v3));
+
+void NO_SANITIZE_ALIGNMENT
+ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inlen,
+            const unsigned char key[32], const unsigned char nonce[12],
+            uint32_t counter)
+{
+    unsigned iters, i, *op = (unsigned *)out, *ip = (unsigned *)in, *kp;
+#if defined(__ARM_NEON__)
+    unsigned *np;
+#endif
+    vec s0, s1, s2, s3;
+#if !defined(__ARM_NEON__) && !defined(__SSE2__)
+    __attribute__((aligned(16))) unsigned key[8], nonce[4];
+#endif
+    __attribute__((aligned(16))) unsigned chacha_const[] =
+        { 0x61707865, 0x3320646E, 0x79622D32, 0x6B206574 };
+#if defined(__ARM_NEON__) || defined(__SSE2__)
+    kp = (unsigned *)key;
+#else
+    ((vec *)key)[0] = REVV_BE(((vec *)key)[0]);
+    ((vec *)key)[1] = REVV_BE(((vec *)key)[1]);
+    ((unsigned *)nonce)[0] = REVW_BE(((unsigned *)nonce)[0]);
+    ((unsigned *)nonce)[1] = REVW_BE(((unsigned *)nonce)[1]);
+    ((unsigned *)nonce)[2] = REVW_BE(((unsigned *)nonce)[2]);
+    ((unsigned *)nonce)[3] = REVW_BE(((unsigned *)nonce)[3]);
+    kp = (unsigned *)key;
+    np = (unsigned *)nonce;
+#endif
+#if defined(__ARM_NEON__)
+    np = (unsigned *)nonce;
+#endif
+    s0 = LOAD(chacha_const);
+    s1 = LOAD(&((vec *)kp)[0]);
+    s2 = LOAD(&((vec *)kp)[1]);
+    s3 = (vec){
+        counter,
+        ((uint32_t *)nonce)[0],
+        ((uint32_t *)nonce)[1],
+        ((uint32_t *)nonce)[2]
+    };
+
+    for (iters = 0; iters < inlen / (BPI * 64); iters++) {
+#if GPR_TOO
+        register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8,
+            x9, x10, x11, x12, x13, x14, x15;
+#endif
+#if VBPI > 2
+        vec v8, v9, v10, v11;
+#endif
+#if VBPI > 3
+        vec v12, v13, v14, v15;
+#endif
+
+        vec v0, v1, v2, v3, v4, v5, v6, v7;
+        v4 = v0 = s0;
+        v5 = v1 = s1;
+        v6 = v2 = s2;
+        v3 = s3;
+        v7 = v3 + ONE;
+#if VBPI > 2
+        v8 = v4;
+        v9 = v5;
+        v10 = v6;
+        v11 = v7 + ONE;
+#endif
+#if VBPI > 3
+        v12 = v8;
+        v13 = v9;
+        v14 = v10;
+        v15 = v11 + ONE;
+#endif
+#if GPR_TOO
+        x0 = chacha_const[0];
+        x1 = chacha_const[1];
+        x2 = chacha_const[2];
+        x3 = chacha_const[3];
+        x4 = kp[0];
+        x5 = kp[1];
+        x6 = kp[2];
+        x7 = kp[3];
+        x8 = kp[4];
+        x9 = kp[5];
+        x10 = kp[6];
+        x11 = kp[7];
+        x12 = counter + BPI * iters + (BPI - 1);
+        x13 = np[0];
+        x14 = np[1];
+        x15 = np[2];
+#endif
+        for (i = CHACHA_RNDS / 2; i; i--) {
+            DQROUND_VECTORS(v0, v1, v2, v3)
+            DQROUND_VECTORS(v4, v5, v6, v7)
+#if VBPI > 2
+            DQROUND_VECTORS(v8, v9, v10, v11)
+#endif
+#if VBPI > 3
+            DQROUND_VECTORS(v12, v13, v14, v15)
+#endif
+#if GPR_TOO
+            QROUND_WORDS(x0, x4, x8, x12)
+            QROUND_WORDS(x1, x5, x9, x13)
+            QROUND_WORDS(x2, x6, x10, x14)
+            QROUND_WORDS(x3, x7, x11, x15)
+            QROUND_WORDS(x0, x5, x10, x15)
+            QROUND_WORDS(x1, x6, x11, x12)
+            QROUND_WORDS(x2, x7, x8, x13)
+            QROUND_WORDS(x3, x4, x9, x14)
+#endif
+        }
+
+        WRITE_XOR(ip, op, 0, v0 + s0, v1 + s1, v2 + s2, v3 + s3)
+        s3 += ONE;
+        WRITE_XOR(ip, op, 16, v4 + s0, v5 + s1, v6 + s2, v7 + s3)
+        s3 += ONE;
+#if VBPI > 2
+        WRITE_XOR(ip, op, 32, v8 + s0, v9 + s1, v10 + s2, v11 + s3)
+        s3 += ONE;
+#endif
+#if VBPI > 3
+        WRITE_XOR(ip, op, 48, v12 + s0, v13 + s1, v14 + s2, v15 + s3)
+        s3 += ONE;
+#endif
+        ip += VBPI * 16;
+        op += VBPI * 16;
+#if GPR_TOO
+        op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0]));
+        op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1]));
+        op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2]));
+        op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3]));
+        op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0]));
+        op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1]));
+        op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2]));
+        op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3]));
+        op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4]));
+        op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5]));
+        op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6]));
+        op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7]));
+        op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter + BPI * iters + (BPI - 1)));
+        op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13 + np[0]));
+        op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[1]));
+        op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[2]));
+        s3 += ONE;
+        ip += 16;
+        op += 16;
+#endif
+    }
+
+    for (iters = inlen % (BPI * 64) / 64; iters != 0; iters--) {
+        vec v0 = s0, v1 = s1, v2 = s2, v3 = s3;
+        for (i = CHACHA_RNDS / 2; i; i--) {
+            DQROUND_VECTORS(v0, v1, v2, v3);
+        }
+        WRITE_XOR(ip, op, 0, v0 + s0, v1 + s1, v2 + s2, v3 + s3)
+        s3 += ONE;
+        ip += 16;
+        op += 16;
+    }
+
+    inlen = inlen % 64;
+    if (inlen) {
+        __attribute__((aligned(16))) vec buf[4];
+        vec v0, v1, v2, v3;
+        v0 = s0;
+        v1 = s1;
+        v2 = s2;
+        v3 = s3;
+        for (i = CHACHA_RNDS / 2; i; i--) {
+            DQROUND_VECTORS(v0, v1, v2, v3);
+        }
+
+        if (inlen >= 16) {
+            STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0));
+            if (inlen >= 32) {
+                STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1));
+                if (inlen >= 48) {
+                    STORE(op + 8, LOAD(ip + 8) ^ REVV_BE(v2 + s2));
+                    buf[3] = REVV_BE(v3 + s3);
+                } else {
+                    buf[2] = REVV_BE(v2 + s2);
+                }
+            } else {
+                buf[1] = REVV_BE(v1 + s1);
+            }
+        } else {
+            buf[0] = REVV_BE(v0 + s0);
+        }
+
+        for (i = inlen & ~15; i < inlen; i++) {
+            ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i];
+        }
+    }
+}
diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c
new file mode 100644
index 000000000..cd265e1ff
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20poly1305.c
@@ -0,0 +1,198 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <string.h>
+#include <stdio.h>
+
+#include "seccomon.h"
+#include "secerr.h"
+#include "blapit.h"
+
+#ifndef NSS_DISABLE_CHACHAPOLY
+#include "poly1305.h"
+#include "chacha20.h"
+#include "chacha20poly1305.h"
+#endif
+
+/* Poly1305Do writes the Poly1305 authenticator of the given additional data
+ * and ciphertext to |out|. */
+#ifndef NSS_DISABLE_CHACHAPOLY
+static void
+Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
+           const unsigned char *ciphertext, unsigned int ciphertextLen,
+           const unsigned char key[32])
+{
+    poly1305_state state;
+    unsigned int j;
+    unsigned char lengthBytes[8];
+    static const unsigned char zeros[15];
+    unsigned int i;
+
+    Poly1305Init(&state, key);
+    Poly1305Update(&state, ad, adLen);
+    if (adLen % 16 > 0) {
+        Poly1305Update(&state, zeros, 16 - adLen % 16);
+    }
+    Poly1305Update(&state, ciphertext, ciphertextLen);
+    if (ciphertextLen % 16 > 0) {
+        Poly1305Update(&state, zeros, 16 - ciphertextLen % 16);
+    }
+    j = adLen;
+    for (i = 0; i < sizeof(lengthBytes); i++) {
+        lengthBytes[i] = j;
+        j >>= 8;
+    }
+    Poly1305Update(&state, lengthBytes, sizeof(lengthBytes));
+    j = ciphertextLen;
+    for (i = 0; i < sizeof(lengthBytes); i++) {
+        lengthBytes[i] = j;
+        j >>= 8;
+    }
+    Poly1305Update(&state, lengthBytes, sizeof(lengthBytes));
+    Poly1305Finish(&state, out);
+}
+#endif
+
+SECStatus
+ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+                             const unsigned char *key, unsigned int keyLen,
+                             unsigned int tagLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+    return SECFailure;
+#else
+    if (keyLen != 32) {
+        PORT_SetError(SEC_ERROR_BAD_KEY);
+        return SECFailure;
+    }
+    if (tagLen == 0 || tagLen > 16) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+
+    PORT_Memcpy(ctx->key, key, sizeof(ctx->key));
+    ctx->tagLen = tagLen;
+
+    return SECSuccess;
+#endif
+}
+
+ChaCha20Poly1305Context *
+ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen,
+                               unsigned int tagLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+    return NULL;
+#else
+    ChaCha20Poly1305Context *ctx;
+
+    ctx = PORT_New(ChaCha20Poly1305Context);
+    if (ctx == NULL) {
+        return NULL;
+    }
+
+    if (ChaCha20Poly1305_InitContext(ctx, key, keyLen, tagLen) != SECSuccess) {
+        PORT_Free(ctx);
+        ctx = NULL;
+    }
+
+    return ctx;
+#endif
+}
+
+void
+ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit)
+{
+#ifndef NSS_DISABLE_CHACHAPOLY
+    PORT_Memset(ctx, 0, sizeof(*ctx));
+    if (freeit) {
+        PORT_Free(ctx);
+    }
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output,
+                      unsigned int *outputLen, unsigned int maxOutputLen,
+                      const unsigned char *input, unsigned int inputLen,
+                      const unsigned char *nonce, unsigned int nonceLen,
+                      const unsigned char *ad, unsigned int adLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+    return SECFailure;
+#else
+    unsigned char block[64];
+    unsigned char tag[16];
+
+    if (nonceLen != 12) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    *outputLen = inputLen + ctx->tagLen;
+    if (maxOutputLen < *outputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    PORT_Memset(block, 0, sizeof(block));
+    // Generate a block of keystream. The first 32 bytes will be the poly1305
+    // key. The remainder of the block is discarded.
+    ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0);
+    ChaCha20XOR(output, input, inputLen, ctx->key, nonce, 1);
+
+    Poly1305Do(tag, ad, adLen, output, inputLen, block);
+    PORT_Memcpy(output + inputLen, tag, ctx->tagLen);
+
+    return SECSuccess;
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output,
+                      unsigned int *outputLen, unsigned int maxOutputLen,
+                      const unsigned char *input, unsigned int inputLen,
+                      const unsigned char *nonce, unsigned int nonceLen,
+                      const unsigned char *ad, unsigned int adLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+    return SECFailure;
+#else
+    unsigned char block[64];
+    unsigned char tag[16];
+    unsigned int ciphertextLen;
+
+    if (nonceLen != 12) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    if (inputLen < ctx->tagLen) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    ciphertextLen = inputLen - ctx->tagLen;
+    *outputLen = ciphertextLen;
+    if (maxOutputLen < *outputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    PORT_Memset(block, 0, sizeof(block));
+    // Generate a block of keystream. The first 32 bytes will be the poly1305
+    // key. The remainder of the block is discarded.
+    ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0);
+    Poly1305Do(tag, ad, adLen, input, ciphertextLen, block);
+    if (NSS_SecureMemcmp(tag, &input[ciphertextLen], ctx->tagLen) != 0) {
+        PORT_SetError(SEC_ERROR_BAD_DATA);
+        return SECFailure;
+    }
+
+    ChaCha20XOR(output, input, ciphertextLen, ctx->key, nonce, 1);
+
+    return SECSuccess;
+#endif
+}
diff --git a/security/nss/lib/freebl/chacha20poly1305.h b/security/nss/lib/freebl/chacha20poly1305.h
new file mode 100644
index 000000000..c77632aa1
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20poly1305.h
@@ -0,0 +1,15 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _CHACHA20_POLY1305_H_
+#define _CHACHA20_POLY1305_H_ 1
+
+/* ChaCha20Poly1305ContextStr saves the key and tag length for a
+ * ChaCha20+Poly1305 AEAD operation. */
+struct ChaCha20Poly1305ContextStr {
+    unsigned char key[32];
+    unsigned char tagLen;
+};
+
+#endif /* _CHACHA20_POLY1305_H_ */
diff --git a/security/nss/lib/freebl/config.mk b/security/nss/lib/freebl/config.mk
new file mode 100644
index 000000000..918a66363
--- /dev/null
+++ b/security/nss/lib/freebl/config.mk
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# only do this in the outermost freebl build.
+ifndef FREEBL_CHILD_BUILD
+
+# We're going to change this build so that it builds libfreebl.a with
+# just loader.c.  Then we have to build this directory twice again to 
+# build the two DSOs.
+# To build libfreebl.a with just loader.c, we must now override many
+# of the make variables setup by the prior inclusion of CORECONF's config.mk
+
+CSRCS		= loader.c 
+SIMPLE_OBJS 	= $(CSRCS:.c=$(OBJ_SUFFIX))
+OBJS 		= $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS))
+ALL_TRASH :=    $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \
+                $(NOSUCHFILE) so_locations 
+
+# this is not a recursive child make.  We make a static lib. (archive)
+
+# Override the values defined in coreconf's ruleset.mk.
+#
+# - (1) LIBRARY: a static (archival) library
+# - (2) SHARED_LIBRARY: a shared (dynamic link) library
+# - (3) IMPORT_LIBRARY: an import library, used only on Windows
+# - (4) PROGRAM: an executable binary
+#
+# override these variables to prevent building a DSO/DLL.
+  TARGETS        = $(LIBRARY)
+  SHARED_LIBRARY =
+  IMPORT_LIBRARY =
+  PROGRAM        =
+
+else
+
+# This is a recursive child make. We build the shared lib.
+
+TARGETS      = $(SHARED_LIBRARY)
+LIBRARY      =
+IMPORT_LIBRARY =
+PROGRAM      =
+
+ifeq ($(OS_TARGET), SunOS)
+OS_LIBS += -lkstat
+endif
+
+ifeq (,$(filter-out WIN%,$(OS_TARGET)))
+
+# don't want the 32 in the shared library name
+SHARED_LIBRARY = $(OBJDIR)/$(DLL_PREFIX)$(LIBRARY_NAME)$(LIBRARY_VERSION).$(DLL_SUFFIX)
+
+RES     = $(OBJDIR)/$(LIBRARY_NAME).res
+RESNAME = freebl.rc
+
+ifdef NS_USE_GCC
+OS_LIBS += -ladvapi32
+else
+OS_LIBS += advapi32.lib
+endif
+
+ifdef NS_USE_GCC
+EXTRA_SHARED_LIBS += \
+	-L$(DIST)/lib \
+	-L$(NSSUTIL_LIB_DIR) \
+	-lnssutil3 \
+	-L$(NSPR_LIB_DIR) \
+	-lnspr4 \
+	$(NULL)
+else # ! NS_USE_GCC
+EXTRA_SHARED_LIBS += \
+	$(DIST)/lib/nssutil3.lib \
+	$(NSPR_LIB_DIR)/$(NSPR31_LIB_PREFIX)nspr4.lib \
+	$(NULL)
+endif # NS_USE_GCC
+
+else
+
+ifeq ($(FREEBL_NO_DEPEND),1)
+#drop pthreads as well
+OS_PTHREAD=
+else
+EXTRA_SHARED_LIBS += \
+	-L$(DIST)/lib \
+	-L$(NSSUTIL_LIB_DIR) \
+	-lnssutil3 \
+	-L$(NSPR_LIB_DIR) \
+	-lnspr4 \
+	$(NULL)
+endif
+endif
+
+ifeq ($(OS_ARCH), Darwin)
+EXTRA_SHARED_LIBS += -dylib_file @executable_path/libplc4.dylib:$(DIST)/lib/libplc4.dylib -dylib_file @executable_path/libplds4.dylib:$(DIST)/lib/libplds4.dylib
+endif
+
+endif
diff --git a/security/nss/lib/freebl/ctr.c b/security/nss/lib/freebl/ctr.c
new file mode 100644
index 000000000..d5715a505
--- /dev/null
+++ b/security/nss/lib/freebl/ctr.c
@@ -0,0 +1,246 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "prtypes.h"
+#include "blapit.h"
+#include "blapii.h"
+#include "ctr.h"
+#include "pkcs11t.h"
+#include "secerr.h"
+
+#ifdef USE_HW_AES
+#include "intel-aes.h"
+#include "rijndael.h"
+#endif
+
+SECStatus
+CTR_InitContext(CTRContext *ctr, void *context, freeblCipherFunc cipher,
+                const unsigned char *param, unsigned int blocksize)
+{
+    const CK_AES_CTR_PARAMS *ctrParams = (const CK_AES_CTR_PARAMS *)param;
+
+    if (ctrParams->ulCounterBits == 0 ||
+        ctrParams->ulCounterBits > blocksize * PR_BITS_PER_BYTE) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* Invariant: 0 < ctr->bufPtr <= blocksize */
+    ctr->checkWrap = PR_FALSE;
+    ctr->bufPtr = blocksize; /* no unused data in the buffer */
+    ctr->cipher = cipher;
+    ctr->context = context;
+    ctr->counterBits = ctrParams->ulCounterBits;
+    if (blocksize > sizeof(ctr->counter) ||
+        blocksize > sizeof(ctrParams->cb)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    PORT_Memcpy(ctr->counter, ctrParams->cb, blocksize);
+    if (ctr->counterBits < 64) {
+        PORT_Memcpy(ctr->counterFirst, ctr->counter, blocksize);
+        ctr->checkWrap = PR_TRUE;
+    }
+    return SECSuccess;
+}
+
+CTRContext *
+CTR_CreateContext(void *context, freeblCipherFunc cipher,
+                  const unsigned char *param, unsigned int blocksize)
+{
+    CTRContext *ctr;
+    SECStatus rv;
+
+    /* first fill in the Counter context */
+    ctr = PORT_ZNew(CTRContext);
+    if (ctr == NULL) {
+        return NULL;
+    }
+    rv = CTR_InitContext(ctr, context, cipher, param, blocksize);
+    if (rv != SECSuccess) {
+        CTR_DestroyContext(ctr, PR_TRUE);
+        ctr = NULL;
+    }
+    return ctr;
+}
+
+void
+CTR_DestroyContext(CTRContext *ctr, PRBool freeit)
+{
+    PORT_Memset(ctr, 0, sizeof(CTRContext));
+    if (freeit) {
+        PORT_Free(ctr);
+    }
+}
+
+/*
+ * Used by counter mode. Increment the counter block. Not all bits in the
+ * counter block are part of the counter, counterBits tells how many bits
+ * are part of the counter. The counter block is blocksize long. It's a
+ * big endian value.
+ *
+ * XXX Does not handle counter rollover.
+ */
+static void
+ctr_GetNextCtr(unsigned char *counter, unsigned int counterBits,
+               unsigned int blocksize)
+{
+    unsigned char *counterPtr = counter + blocksize - 1;
+    unsigned char mask, count;
+
+    PORT_Assert(counterBits <= blocksize * PR_BITS_PER_BYTE);
+    while (counterBits >= PR_BITS_PER_BYTE) {
+        if (++(*(counterPtr--))) {
+            return;
+        }
+        counterBits -= PR_BITS_PER_BYTE;
+    }
+    if (counterBits == 0) {
+        return;
+    }
+    /* increment the final partial byte */
+    mask = (1 << counterBits) - 1;
+    count = ++(*counterPtr) & mask;
+    *counterPtr = ((*counterPtr) & ~mask) | count;
+    return;
+}
+
+static void
+ctr_xor(unsigned char *target, const unsigned char *x,
+        const unsigned char *y, unsigned int count)
+{
+    unsigned int i;
+    for (i = 0; i < count; i++) {
+        *target++ = *x++ ^ *y++;
+    }
+}
+
+SECStatus
+CTR_Update(CTRContext *ctr, unsigned char *outbuf,
+           unsigned int *outlen, unsigned int maxout,
+           const unsigned char *inbuf, unsigned int inlen,
+           unsigned int blocksize)
+{
+    unsigned int tmp;
+    SECStatus rv;
+
+    if (maxout < inlen) {
+        *outlen = inlen;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    *outlen = 0;
+    if (ctr->bufPtr != blocksize) {
+        unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen);
+        ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed);
+        ctr->bufPtr += needed;
+        outbuf += needed;
+        inbuf += needed;
+        *outlen += needed;
+        inlen -= needed;
+        if (inlen == 0) {
+            return SECSuccess;
+        }
+        PORT_Assert(ctr->bufPtr == blocksize);
+    }
+
+    while (inlen >= blocksize) {
+        rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+                            ctr->counter, blocksize, blocksize);
+        ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+        if (ctr->checkWrap) {
+            if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) {
+                PORT_SetError(SEC_ERROR_INVALID_ARGS);
+                return SECFailure;
+            }
+        }
+        if (rv != SECSuccess) {
+            return SECFailure;
+        }
+        ctr_xor(outbuf, inbuf, ctr->buffer, blocksize);
+        outbuf += blocksize;
+        inbuf += blocksize;
+        *outlen += blocksize;
+        inlen -= blocksize;
+    }
+    if (inlen == 0) {
+        return SECSuccess;
+    }
+    rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+                        ctr->counter, blocksize, blocksize);
+    ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+    if (ctr->checkWrap) {
+        if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) {
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+        }
+    }
+    if (rv != SECSuccess) {
+        return SECFailure;
+    }
+    ctr_xor(outbuf, inbuf, ctr->buffer, inlen);
+    ctr->bufPtr = inlen;
+    *outlen += inlen;
+    return SECSuccess;
+}
+
+#if defined(USE_HW_AES) && defined(_MSC_VER)
+SECStatus
+CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf,
+                  unsigned int *outlen, unsigned int maxout,
+                  const unsigned char *inbuf, unsigned int inlen,
+                  unsigned int blocksize)
+{
+    unsigned int fullblocks;
+    unsigned int tmp;
+    SECStatus rv;
+
+    if (maxout < inlen) {
+        *outlen = inlen;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    *outlen = 0;
+    if (ctr->bufPtr != blocksize) {
+        unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen);
+        ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed);
+        ctr->bufPtr += needed;
+        outbuf += needed;
+        inbuf += needed;
+        *outlen += needed;
+        inlen -= needed;
+        if (inlen == 0) {
+            return SECSuccess;
+        }
+        PORT_Assert(ctr->bufPtr == blocksize);
+    }
+
+    intel_aes_ctr_worker(((AESContext *)(ctr->context))->Nr)(
+        ctr, outbuf, outlen, maxout, inbuf, inlen, blocksize);
+    /* XXX intel_aes_ctr_worker should set *outlen. */
+    PORT_Assert(*outlen == 0);
+    fullblocks = (inlen / blocksize) * blocksize;
+    *outlen += fullblocks;
+    outbuf += fullblocks;
+    inbuf += fullblocks;
+    inlen -= fullblocks;
+
+    if (inlen == 0) {
+        return SECSuccess;
+    }
+    rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+                        ctr->counter, blocksize, blocksize);
+    ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+    if (rv != SECSuccess) {
+        return SECFailure;
+    }
+    ctr_xor(outbuf, inbuf, ctr->buffer, inlen);
+    ctr->bufPtr = inlen;
+    *outlen += inlen;
+    return SECSuccess;
+}
+#endif
diff --git a/security/nss/lib/freebl/ctr.h b/security/nss/lib/freebl/ctr.h
new file mode 100644
index 000000000..a97da144e
--- /dev/null
+++ b/security/nss/lib/freebl/ctr.h
@@ -0,0 +1,53 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CTR_H
+#define CTR_H 1
+
+#include "blapii.h"
+
+/* This structure is defined in this header because both ctr.c and gcm.c
+ * need it. */
+struct CTRContextStr {
+    freeblCipherFunc cipher;
+    void *context;
+    unsigned char counter[MAX_BLOCK_SIZE];
+    unsigned char buffer[MAX_BLOCK_SIZE];
+    unsigned char counterFirst[MAX_BLOCK_SIZE]; /* counter overlfow value */
+    PRBool checkWrap;                           /*check for counter overflow*/
+    unsigned long counterBits;
+    unsigned int bufPtr;
+};
+
+typedef struct CTRContextStr CTRContext;
+
+SECStatus CTR_InitContext(CTRContext *ctr, void *context,
+                          freeblCipherFunc cipher, const unsigned char *param,
+                          unsigned int blocksize);
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * CTRContext does not own context. context needs to remain valid for as long
+ * as the CTRContext is valid.
+ *
+ * The cipher argument is a block cipher in the ECB encrypt mode.
+ */
+CTRContext *CTR_CreateContext(void *context, freeblCipherFunc cipher,
+                              const unsigned char *param, unsigned int blocksize);
+
+void CTR_DestroyContext(CTRContext *ctr, PRBool freeit);
+
+SECStatus CTR_Update(CTRContext *ctr, unsigned char *outbuf,
+                     unsigned int *outlen, unsigned int maxout,
+                     const unsigned char *inbuf, unsigned int inlen,
+                     unsigned int blocksize);
+
+#ifdef USE_HW_AES
+SECStatus CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf,
+                            unsigned int *outlen, unsigned int maxout,
+                            const unsigned char *inbuf, unsigned int inlen,
+                            unsigned int blocksize);
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/cts.c b/security/nss/lib/freebl/cts.c
new file mode 100644
index 000000000..99ccebb60
--- /dev/null
+++ b/security/nss/lib/freebl/cts.c
@@ -0,0 +1,307 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "blapit.h"
+#include "blapii.h"
+#include "cts.h"
+#include "secerr.h"
+
+struct CTSContextStr {
+    freeblCipherFunc cipher;
+    void *context;
+    /* iv stores the last ciphertext block of the previous message.
+     * Only used by decrypt. */
+    unsigned char iv[MAX_BLOCK_SIZE];
+};
+
+CTSContext *
+CTS_CreateContext(void *context, freeblCipherFunc cipher,
+                  const unsigned char *iv, unsigned int blocksize)
+{
+    CTSContext *cts;
+
+    if (blocksize > MAX_BLOCK_SIZE) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return NULL;
+    }
+    cts = PORT_ZNew(CTSContext);
+    if (cts == NULL) {
+        return NULL;
+    }
+    PORT_Memcpy(cts->iv, iv, blocksize);
+    cts->cipher = cipher;
+    cts->context = context;
+    return cts;
+}
+
+void
+CTS_DestroyContext(CTSContext *cts, PRBool freeit)
+{
+    if (freeit) {
+        PORT_Free(cts);
+    }
+}
+
+/*
+ * See addemdum to NIST SP 800-38A
+ * Generically handle cipher text stealing. Basically this is doing CBC
+ * operations except someone can pass us a partial block.
+ *
+ *  Output Order:
+ *  CS-1:  C1||C2||C3..Cn-1(could be partial)||Cn   (NIST)
+ *  CS-2: pad == 0 C1||C2||C3...Cn-1(is full)||Cn   (Schneier)
+ *  CS-2: pad != 0 C1||C2||C3...Cn||Cn-1(is partial)(Schneier)
+ *  CS-3: C1||C2||C3...Cn||Cn-1(could be partial)   (Kerberos)
+ *
+ * The characteristics of these three options:
+ *  - NIST & Schneier (CS-1 & CS-2) are identical to CBC if there are no
+ * partial blocks on input.
+ *  - Scheier and Kerberos (CS-2 and CS-3) have no embedded partial blocks,
+ * which make decoding easier.
+ *  - NIST & Kerberos (CS-1 and CS-3) have consistent block order independent
+ * of padding.
+ *
+ * PKCS #11 did not specify which version to implement, but points to the NIST
+ * spec, so this code implements CTS-CS-1 from NIST.
+ *
+ * To convert the returned buffer to:
+ *   CS-2 (Schneier): do
+ *       unsigned char tmp[MAX_BLOCK_SIZE];
+ *       pad = *outlen % blocksize;
+ *       if (pad) {
+ *          memcpy(tmp, outbuf+*outlen-blocksize, blocksize);
+ *          memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad);
+ *      memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize);
+ *       }
+ *   CS-3 (Kerberos): do
+ *       unsigned char tmp[MAX_BLOCK_SIZE];
+ *       pad = *outlen % blocksize;
+ *       if (pad == 0) {
+ *           pad = blocksize;
+ *       }
+ *       memcpy(tmp, outbuf+*outlen-blocksize, blocksize);
+ *       memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad);
+ *   memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize);
+ */
+SECStatus
+CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf,
+                  unsigned int *outlen, unsigned int maxout,
+                  const unsigned char *inbuf, unsigned int inlen,
+                  unsigned int blocksize)
+{
+    unsigned char lastBlock[MAX_BLOCK_SIZE];
+    unsigned int tmp;
+    int fullblocks;
+    int written;
+    unsigned char *saveout = outbuf;
+    SECStatus rv;
+
+    if (inlen < blocksize) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+
+    if (maxout < inlen) {
+        *outlen = inlen;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    fullblocks = (inlen / blocksize) * blocksize;
+    rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf,
+                        fullblocks, blocksize);
+    if (rv != SECSuccess) {
+        return SECFailure;
+    }
+    *outlen = fullblocks; /* AES low level doesn't set outlen */
+    inbuf += fullblocks;
+    inlen -= fullblocks;
+    if (inlen == 0) {
+        return SECSuccess;
+    }
+    written = *outlen - (blocksize - inlen);
+    outbuf += written;
+    maxout -= written;
+
+    /*
+     * here's the CTS magic, we pad our final block with zeros,
+     * then do a CBC encrypt. CBC will xor our plain text with
+     * the previous block (Cn-1), capturing part of that block (Cn-1**) as it
+     * xors with the zero pad. We then write this full block, overwritting
+     * (Cn-1**) in our buffer. This allows us to have input data == output
+     * data since Cn contains enough information to reconver Cn-1** when
+     * we decrypt (at the cost of some complexity as you can see in decrypt
+     * below */
+    PORT_Memcpy(lastBlock, inbuf, inlen);
+    PORT_Memset(lastBlock + inlen, 0, blocksize - inlen);
+    rv = (*cts->cipher)(cts->context, outbuf, &tmp, maxout, lastBlock,
+                        blocksize, blocksize);
+    PORT_Memset(lastBlock, 0, blocksize);
+    if (rv == SECSuccess) {
+        *outlen = written + blocksize;
+    } else {
+        PORT_Memset(saveout, 0, written + blocksize);
+    }
+    return rv;
+}
+
+#define XOR_BLOCK(x, y, count)  \
+    for (i = 0; i < count; i++) \
+    x[i] = x[i] ^ y[i]
+
+/*
+ * See addemdum to NIST SP 800-38A
+ * Decrypt, Expect CS-1: input. See the comment on the encrypt side
+ * to understand what CS-2 and CS-3 mean.
+ *
+ * To convert the input buffer to CS-1 from ...
+ *   CS-2 (Schneier): do
+ *       unsigned char tmp[MAX_BLOCK_SIZE];
+ *       pad = inlen % blocksize;
+ *       if (pad) {
+ *          memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize);
+ *          memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad);
+ *      memcpy(inbuf+inlen-blocksize, tmp, blocksize);
+ *       }
+ *   CS-3 (Kerberos): do
+ *       unsigned char tmp[MAX_BLOCK_SIZE];
+ *       pad = inlen % blocksize;
+ *       if (pad == 0) {
+ *           pad = blocksize;
+ *       }
+ *       memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize);
+ *       memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad);
+ *   memcpy(inbuf+inlen-blocksize, tmp, blocksize);
+ */
+SECStatus
+CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf,
+                  unsigned int *outlen, unsigned int maxout,
+                  const unsigned char *inbuf, unsigned int inlen,
+                  unsigned int blocksize)
+{
+    unsigned char *Pn;
+    unsigned char Cn_2[MAX_BLOCK_SIZE]; /* block Cn-2 */
+    unsigned char Cn_1[MAX_BLOCK_SIZE]; /* block Cn-1 */
+    unsigned char Cn[MAX_BLOCK_SIZE];   /* block Cn   */
+    unsigned char lastBlock[MAX_BLOCK_SIZE];
+    const unsigned char *tmp;
+    unsigned char *saveout = outbuf;
+    unsigned int tmpLen;
+    unsigned int fullblocks, pad;
+    unsigned int i;
+    SECStatus rv;
+
+    if (inlen < blocksize) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+
+    if (maxout < inlen) {
+        *outlen = inlen;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    fullblocks = (inlen / blocksize) * blocksize;
+
+    /* even though we expect the input to be CS-1, CS-2 is easier to parse,
+     * so convert to CS-2 immediately. NOTE: this is the same code as in
+     * the comment for encrypt. NOTE2: since we can't modify inbuf unless
+     * inbuf and outbuf overlap, just copy inbuf to outbuf and modify it there
+     */
+    pad = inlen - fullblocks;
+    if (pad != 0) {
+        if (inbuf != outbuf) {
+            memcpy(outbuf, inbuf, inlen);
+            /* keep the names so we logically know how we are using the
+         * buffers */
+            inbuf = outbuf;
+        }
+        memcpy(lastBlock, inbuf + inlen - blocksize, blocksize);
+        /* we know inbuf == outbuf now, inbuf is declared const and can't
+     * be the target, so use outbuf for the target here */
+        memcpy(outbuf + inlen - pad, inbuf + inlen - blocksize - pad, pad);
+        memcpy(outbuf + inlen - blocksize - pad, lastBlock, blocksize);
+    }
+    /* save the previous to last block so we can undo the misordered
+     * chaining */
+    tmp = (fullblocks < blocksize * 2) ? cts->iv : inbuf + fullblocks - blocksize * 2;
+    PORT_Memcpy(Cn_2, tmp, blocksize);
+    PORT_Memcpy(Cn, inbuf + fullblocks - blocksize, blocksize);
+    rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf,
+                        fullblocks, blocksize);
+    if (rv != SECSuccess) {
+        return SECFailure;
+    }
+    *outlen = fullblocks; /* AES low level doesn't set outlen */
+    inbuf += fullblocks;
+    inlen -= fullblocks;
+    if (inlen == 0) {
+        return SECSuccess;
+    }
+    outbuf += fullblocks;
+
+    /* recover the stolen text */
+    PORT_Memset(lastBlock, 0, blocksize);
+    PORT_Memcpy(lastBlock, inbuf, inlen);
+    PORT_Memcpy(Cn_1, inbuf, inlen);
+    Pn = outbuf - blocksize;
+    /* inbuf points to Cn-1* in the input buffer */
+    /* NOTE: below there are 2 sections marked "make up for the out of order
+     * cbc decryption". You may ask, what is going on here.
+     *   Short answer: CBC automatically xors the plain text with the previous
+     * encrypted block. We are decrypting the last 2 blocks out of order, so
+     * we have to 'back out' the decrypt xor and 'add back' the encrypt xor.
+     *   Long answer: When we encrypted, we encrypted as follows:
+     *       Pn-2, Pn-1, (Pn || 0), but on decryption we can't
+     *  decrypt Cn-1 until we decrypt Cn because part of Cn-1 is stored in
+     *  Cn (see below).  So above we decrypted all the full blocks:
+     *       Cn-2, Cn,
+     *  to get:
+     *       Pn-2, Pn, Except that Pn is not yet corect. On encrypt, we
+     *  xor'd Pn || 0  with Cn-1, but on decrypt we xor'd it with Cn-2
+     *  To recover Pn, we xor the block with Cn-1* || 0 (in last block) and
+     *  Cn-2 to get Pn || Cn-1**. Pn can then be written to the output buffer
+     *  and we can now reunite Cn-1. With the full Cn-1 we can decrypt it,
+     *  but now decrypt is going to xor the decrypted data with Cn instead of
+     *  Cn-2. xoring Cn and Cn-2 restores the original Pn-1 and we can now
+     *  write that oout to the buffer */
+
+    /* make up for the out of order CBC decryption */
+    XOR_BLOCK(lastBlock, Cn_2, blocksize);
+    XOR_BLOCK(lastBlock, Pn, blocksize);
+    /* last buf now has Pn || Cn-1**, copy out Pn */
+    PORT_Memcpy(outbuf, lastBlock, inlen);
+    *outlen += inlen;
+    /* copy Cn-1* into last buf to recover Cn-1 */
+    PORT_Memcpy(lastBlock, Cn_1, inlen);
+    /* note: because Cn and Cn-1 were out of order, our pointer to Pn also
+     * points to where Pn-1 needs to reside. From here on out read Pn in
+     * the code as really Pn-1. */
+    rv = (*cts->cipher)(cts->context, Pn, &tmpLen, blocksize, lastBlock,
+                        blocksize, blocksize);
+    if (rv != SECSuccess) {
+        PORT_Memset(lastBlock, 0, blocksize);
+        PORT_Memset(saveout, 0, *outlen);
+        return SECFailure;
+    }
+    /* make up for the out of order CBC decryption */
+    XOR_BLOCK(Pn, Cn_2, blocksize);
+    XOR_BLOCK(Pn, Cn, blocksize);
+    /* reset iv to Cn  */
+    PORT_Memcpy(cts->iv, Cn, blocksize);
+    /* This makes Cn the last block for the next decrypt operation, which
+     * matches the encrypt. We don't care about the contexts of last block,
+     * only the side effect of setting the internal IV */
+    (void)(*cts->cipher)(cts->context, lastBlock, &tmpLen, blocksize, Cn,
+                         blocksize, blocksize);
+    /* clear last block. At this point last block contains Pn xor Cn_1 xor
+     * Cn_2, both of with an attacker would know, so we need to clear this
+     * buffer out */
+    PORT_Memset(lastBlock, 0, blocksize);
+    /* Cn, Cn_1, and Cn_2 have encrypted data, so no need to clear them */
+    return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/cts.h b/security/nss/lib/freebl/cts.h
new file mode 100644
index 000000000..a3ec180af
--- /dev/null
+++ b/security/nss/lib/freebl/cts.h
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CTS_H
+#define CTS_H 1
+
+#include "blapii.h"
+
+typedef struct CTSContextStr CTSContext;
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * CTSContext does not own context. context needs to remain valid for as long
+ * as the CTSContext is valid.
+ *
+ * The cipher argument is a block cipher in the CBC mode.
+ */
+CTSContext *CTS_CreateContext(void *context, freeblCipherFunc cipher,
+                              const unsigned char *iv, unsigned int blocksize);
+
+void CTS_DestroyContext(CTSContext *cts, PRBool freeit);
+
+SECStatus CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf,
+                            unsigned int *outlen, unsigned int maxout,
+                            const unsigned char *inbuf, unsigned int inlen,
+                            unsigned int blocksize);
+SECStatus CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf,
+                            unsigned int *outlen, unsigned int maxout,
+                            const unsigned char *inbuf, unsigned int inlen,
+                            unsigned int blocksize);
+
+#endif
diff --git a/security/nss/lib/freebl/des.c b/security/nss/lib/freebl/des.c
new file mode 100644
index 000000000..fd433bbb2
--- /dev/null
+++ b/security/nss/lib/freebl/des.c
@@ -0,0 +1,676 @@
+/*
+ *  des.c
+ *
+ *  core source file for DES-150 library
+ *  Make key schedule from DES key.
+ *  Encrypt/Decrypt one 8-byte block.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "des.h"
+#include "blapii.h"
+#include <stddef.h> /* for ptrdiff_t */
+/* #define USE_INDEXING 1 */
+
+/*
+ * The tables below are the 8 sbox functions, with the 6-bit input permutation
+ * and the 32-bit output permutation pre-computed.
+ * They are shifted circularly to the left 3 bits, which removes 2 shifts
+ * and an or from each round by reducing the number of sboxes whose
+ * indices cross word broundaries from 2 to 1.
+ */
+
+static const HALF SP[8][64] = {
+    /* Box S1 */
+    { 0x04041000, 0x00000000, 0x00040000, 0x04041010,
+      0x04040010, 0x00041010, 0x00000010, 0x00040000,
+      0x00001000, 0x04041000, 0x04041010, 0x00001000,
+      0x04001010, 0x04040010, 0x04000000, 0x00000010,
+      0x00001010, 0x04001000, 0x04001000, 0x00041000,
+      0x00041000, 0x04040000, 0x04040000, 0x04001010,
+      0x00040010, 0x04000010, 0x04000010, 0x00040010,
+      0x00000000, 0x00001010, 0x00041010, 0x04000000,
+      0x00040000, 0x04041010, 0x00000010, 0x04040000,
+      0x04041000, 0x04000000, 0x04000000, 0x00001000,
+      0x04040010, 0x00040000, 0x00041000, 0x04000010,
+      0x00001000, 0x00000010, 0x04001010, 0x00041010,
+      0x04041010, 0x00040010, 0x04040000, 0x04001010,
+      0x04000010, 0x00001010, 0x00041010, 0x04041000,
+      0x00001010, 0x04001000, 0x04001000, 0x00000000,
+      0x00040010, 0x00041000, 0x00000000, 0x04040010 },
+    /* Box S2 */
+    { 0x00420082, 0x00020002, 0x00020000, 0x00420080,
+      0x00400000, 0x00000080, 0x00400082, 0x00020082,
+      0x00000082, 0x00420082, 0x00420002, 0x00000002,
+      0x00020002, 0x00400000, 0x00000080, 0x00400082,
+      0x00420000, 0x00400080, 0x00020082, 0x00000000,
+      0x00000002, 0x00020000, 0x00420080, 0x00400002,
+      0x00400080, 0x00000082, 0x00000000, 0x00420000,
+      0x00020080, 0x00420002, 0x00400002, 0x00020080,
+      0x00000000, 0x00420080, 0x00400082, 0x00400000,
+      0x00020082, 0x00400002, 0x00420002, 0x00020000,
+      0x00400002, 0x00020002, 0x00000080, 0x00420082,
+      0x00420080, 0x00000080, 0x00020000, 0x00000002,
+      0x00020080, 0x00420002, 0x00400000, 0x00000082,
+      0x00400080, 0x00020082, 0x00000082, 0x00400080,
+      0x00420000, 0x00000000, 0x00020002, 0x00020080,
+      0x00000002, 0x00400082, 0x00420082, 0x00420000 },
+    /* Box S3 */
+    { 0x00000820, 0x20080800, 0x00000000, 0x20080020,
+      0x20000800, 0x00000000, 0x00080820, 0x20000800,
+      0x00080020, 0x20000020, 0x20000020, 0x00080000,
+      0x20080820, 0x00080020, 0x20080000, 0x00000820,
+      0x20000000, 0x00000020, 0x20080800, 0x00000800,
+      0x00080800, 0x20080000, 0x20080020, 0x00080820,
+      0x20000820, 0x00080800, 0x00080000, 0x20000820,
+      0x00000020, 0x20080820, 0x00000800, 0x20000000,
+      0x20080800, 0x20000000, 0x00080020, 0x00000820,
+      0x00080000, 0x20080800, 0x20000800, 0x00000000,
+      0x00000800, 0x00080020, 0x20080820, 0x20000800,
+      0x20000020, 0x00000800, 0x00000000, 0x20080020,
+      0x20000820, 0x00080000, 0x20000000, 0x20080820,
+      0x00000020, 0x00080820, 0x00080800, 0x20000020,
+      0x20080000, 0x20000820, 0x00000820, 0x20080000,
+      0x00080820, 0x00000020, 0x20080020, 0x00080800 },
+    /* Box S4 */
+    { 0x02008004, 0x00008204, 0x00008204, 0x00000200,
+      0x02008200, 0x02000204, 0x02000004, 0x00008004,
+      0x00000000, 0x02008000, 0x02008000, 0x02008204,
+      0x00000204, 0x00000000, 0x02000200, 0x02000004,
+      0x00000004, 0x00008000, 0x02000000, 0x02008004,
+      0x00000200, 0x02000000, 0x00008004, 0x00008200,
+      0x02000204, 0x00000004, 0x00008200, 0x02000200,
+      0x00008000, 0x02008200, 0x02008204, 0x00000204,
+      0x02000200, 0x02000004, 0x02008000, 0x02008204,
+      0x00000204, 0x00000000, 0x00000000, 0x02008000,
+      0x00008200, 0x02000200, 0x02000204, 0x00000004,
+      0x02008004, 0x00008204, 0x00008204, 0x00000200,
+      0x02008204, 0x00000204, 0x00000004, 0x00008000,
+      0x02000004, 0x00008004, 0x02008200, 0x02000204,
+      0x00008004, 0x00008200, 0x02000000, 0x02008004,
+      0x00000200, 0x02000000, 0x00008000, 0x02008200 },
+    /* Box S5 */
+    { 0x00000400, 0x08200400, 0x08200000, 0x08000401,
+      0x00200000, 0x00000400, 0x00000001, 0x08200000,
+      0x00200401, 0x00200000, 0x08000400, 0x00200401,
+      0x08000401, 0x08200001, 0x00200400, 0x00000001,
+      0x08000000, 0x00200001, 0x00200001, 0x00000000,
+      0x00000401, 0x08200401, 0x08200401, 0x08000400,
+      0x08200001, 0x00000401, 0x00000000, 0x08000001,
+      0x08200400, 0x08000000, 0x08000001, 0x00200400,
+      0x00200000, 0x08000401, 0x00000400, 0x08000000,
+      0x00000001, 0x08200000, 0x08000401, 0x00200401,
+      0x08000400, 0x00000001, 0x08200001, 0x08200400,
+      0x00200401, 0x00000400, 0x08000000, 0x08200001,
+      0x08200401, 0x00200400, 0x08000001, 0x08200401,
+      0x08200000, 0x00000000, 0x00200001, 0x08000001,
+      0x00200400, 0x08000400, 0x00000401, 0x00200000,
+      0x00000000, 0x00200001, 0x08200400, 0x00000401 },
+    /* Box S6 */
+    { 0x80000040, 0x81000000, 0x00010000, 0x81010040,
+      0x81000000, 0x00000040, 0x81010040, 0x01000000,
+      0x80010000, 0x01010040, 0x01000000, 0x80000040,
+      0x01000040, 0x80010000, 0x80000000, 0x00010040,
+      0x00000000, 0x01000040, 0x80010040, 0x00010000,
+      0x01010000, 0x80010040, 0x00000040, 0x81000040,
+      0x81000040, 0x00000000, 0x01010040, 0x81010000,
+      0x00010040, 0x01010000, 0x81010000, 0x80000000,
+      0x80010000, 0x00000040, 0x81000040, 0x01010000,
+      0x81010040, 0x01000000, 0x00010040, 0x80000040,
+      0x01000000, 0x80010000, 0x80000000, 0x00010040,
+      0x80000040, 0x81010040, 0x01010000, 0x81000000,
+      0x01010040, 0x81010000, 0x00000000, 0x81000040,
+      0x00000040, 0x00010000, 0x81000000, 0x01010040,
+      0x00010000, 0x01000040, 0x80010040, 0x00000000,
+      0x81010000, 0x80000000, 0x01000040, 0x80010040 },
+    /* Box S7 */
+    { 0x00800000, 0x10800008, 0x10002008, 0x00000000,
+      0x00002000, 0x10002008, 0x00802008, 0x10802000,
+      0x10802008, 0x00800000, 0x00000000, 0x10000008,
+      0x00000008, 0x10000000, 0x10800008, 0x00002008,
+      0x10002000, 0x00802008, 0x00800008, 0x10002000,
+      0x10000008, 0x10800000, 0x10802000, 0x00800008,
+      0x10800000, 0x00002000, 0x00002008, 0x10802008,
+      0x00802000, 0x00000008, 0x10000000, 0x00802000,
+      0x10000000, 0x00802000, 0x00800000, 0x10002008,
+      0x10002008, 0x10800008, 0x10800008, 0x00000008,
+      0x00800008, 0x10000000, 0x10002000, 0x00800000,
+      0x10802000, 0x00002008, 0x00802008, 0x10802000,
+      0x00002008, 0x10000008, 0x10802008, 0x10800000,
+      0x00802000, 0x00000000, 0x00000008, 0x10802008,
+      0x00000000, 0x00802008, 0x10800000, 0x00002000,
+      0x10000008, 0x10002000, 0x00002000, 0x00800008 },
+    /* Box S8 */
+    { 0x40004100, 0x00004000, 0x00100000, 0x40104100,
+      0x40000000, 0x40004100, 0x00000100, 0x40000000,
+      0x00100100, 0x40100000, 0x40104100, 0x00104000,
+      0x40104000, 0x00104100, 0x00004000, 0x00000100,
+      0x40100000, 0x40000100, 0x40004000, 0x00004100,
+      0x00104000, 0x00100100, 0x40100100, 0x40104000,
+      0x00004100, 0x00000000, 0x00000000, 0x40100100,
+      0x40000100, 0x40004000, 0x00104100, 0x00100000,
+      0x00104100, 0x00100000, 0x40104000, 0x00004000,
+      0x00000100, 0x40100100, 0x00004000, 0x00104100,
+      0x40004000, 0x00000100, 0x40000100, 0x40100000,
+      0x40100100, 0x40000000, 0x00100000, 0x40004100,
+      0x00000000, 0x40104100, 0x00100100, 0x40000100,
+      0x40100000, 0x40004000, 0x40004100, 0x00000000,
+      0x40104100, 0x00104000, 0x00104000, 0x00004100,
+      0x00004100, 0x00100100, 0x40000000, 0x40104000 }
+};
+
+static const HALF PC2[8][64] = {
+    /* table 0 */
+    { 0x00000000, 0x00001000, 0x04000000, 0x04001000,
+      0x00100000, 0x00101000, 0x04100000, 0x04101000,
+      0x00008000, 0x00009000, 0x04008000, 0x04009000,
+      0x00108000, 0x00109000, 0x04108000, 0x04109000,
+      0x00000004, 0x00001004, 0x04000004, 0x04001004,
+      0x00100004, 0x00101004, 0x04100004, 0x04101004,
+      0x00008004, 0x00009004, 0x04008004, 0x04009004,
+      0x00108004, 0x00109004, 0x04108004, 0x04109004,
+      0x08000000, 0x08001000, 0x0c000000, 0x0c001000,
+      0x08100000, 0x08101000, 0x0c100000, 0x0c101000,
+      0x08008000, 0x08009000, 0x0c008000, 0x0c009000,
+      0x08108000, 0x08109000, 0x0c108000, 0x0c109000,
+      0x08000004, 0x08001004, 0x0c000004, 0x0c001004,
+      0x08100004, 0x08101004, 0x0c100004, 0x0c101004,
+      0x08008004, 0x08009004, 0x0c008004, 0x0c009004,
+      0x08108004, 0x08109004, 0x0c108004, 0x0c109004 },
+    /* table 1 */
+    { 0x00000000, 0x00002000, 0x80000000, 0x80002000,
+      0x00000008, 0x00002008, 0x80000008, 0x80002008,
+      0x00200000, 0x00202000, 0x80200000, 0x80202000,
+      0x00200008, 0x00202008, 0x80200008, 0x80202008,
+      0x20000000, 0x20002000, 0xa0000000, 0xa0002000,
+      0x20000008, 0x20002008, 0xa0000008, 0xa0002008,
+      0x20200000, 0x20202000, 0xa0200000, 0xa0202000,
+      0x20200008, 0x20202008, 0xa0200008, 0xa0202008,
+      0x00000400, 0x00002400, 0x80000400, 0x80002400,
+      0x00000408, 0x00002408, 0x80000408, 0x80002408,
+      0x00200400, 0x00202400, 0x80200400, 0x80202400,
+      0x00200408, 0x00202408, 0x80200408, 0x80202408,
+      0x20000400, 0x20002400, 0xa0000400, 0xa0002400,
+      0x20000408, 0x20002408, 0xa0000408, 0xa0002408,
+      0x20200400, 0x20202400, 0xa0200400, 0xa0202400,
+      0x20200408, 0x20202408, 0xa0200408, 0xa0202408 },
+    /* table 2 */
+    { 0x00000000, 0x00004000, 0x00000020, 0x00004020,
+      0x00080000, 0x00084000, 0x00080020, 0x00084020,
+      0x00000800, 0x00004800, 0x00000820, 0x00004820,
+      0x00080800, 0x00084800, 0x00080820, 0x00084820,
+      0x00000010, 0x00004010, 0x00000030, 0x00004030,
+      0x00080010, 0x00084010, 0x00080030, 0x00084030,
+      0x00000810, 0x00004810, 0x00000830, 0x00004830,
+      0x00080810, 0x00084810, 0x00080830, 0x00084830,
+      0x00400000, 0x00404000, 0x00400020, 0x00404020,
+      0x00480000, 0x00484000, 0x00480020, 0x00484020,
+      0x00400800, 0x00404800, 0x00400820, 0x00404820,
+      0x00480800, 0x00484800, 0x00480820, 0x00484820,
+      0x00400010, 0x00404010, 0x00400030, 0x00404030,
+      0x00480010, 0x00484010, 0x00480030, 0x00484030,
+      0x00400810, 0x00404810, 0x00400830, 0x00404830,
+      0x00480810, 0x00484810, 0x00480830, 0x00484830 },
+    /* table 3 */
+    { 0x00000000, 0x40000000, 0x00000080, 0x40000080,
+      0x00040000, 0x40040000, 0x00040080, 0x40040080,
+      0x00000040, 0x40000040, 0x000000c0, 0x400000c0,
+      0x00040040, 0x40040040, 0x000400c0, 0x400400c0,
+      0x10000000, 0x50000000, 0x10000080, 0x50000080,
+      0x10040000, 0x50040000, 0x10040080, 0x50040080,
+      0x10000040, 0x50000040, 0x100000c0, 0x500000c0,
+      0x10040040, 0x50040040, 0x100400c0, 0x500400c0,
+      0x00800000, 0x40800000, 0x00800080, 0x40800080,
+      0x00840000, 0x40840000, 0x00840080, 0x40840080,
+      0x00800040, 0x40800040, 0x008000c0, 0x408000c0,
+      0x00840040, 0x40840040, 0x008400c0, 0x408400c0,
+      0x10800000, 0x50800000, 0x10800080, 0x50800080,
+      0x10840000, 0x50840000, 0x10840080, 0x50840080,
+      0x10800040, 0x50800040, 0x108000c0, 0x508000c0,
+      0x10840040, 0x50840040, 0x108400c0, 0x508400c0 },
+    /* table 4 */
+    { 0x00000000, 0x00000008, 0x08000000, 0x08000008,
+      0x00040000, 0x00040008, 0x08040000, 0x08040008,
+      0x00002000, 0x00002008, 0x08002000, 0x08002008,
+      0x00042000, 0x00042008, 0x08042000, 0x08042008,
+      0x80000000, 0x80000008, 0x88000000, 0x88000008,
+      0x80040000, 0x80040008, 0x88040000, 0x88040008,
+      0x80002000, 0x80002008, 0x88002000, 0x88002008,
+      0x80042000, 0x80042008, 0x88042000, 0x88042008,
+      0x00080000, 0x00080008, 0x08080000, 0x08080008,
+      0x000c0000, 0x000c0008, 0x080c0000, 0x080c0008,
+      0x00082000, 0x00082008, 0x08082000, 0x08082008,
+      0x000c2000, 0x000c2008, 0x080c2000, 0x080c2008,
+      0x80080000, 0x80080008, 0x88080000, 0x88080008,
+      0x800c0000, 0x800c0008, 0x880c0000, 0x880c0008,
+      0x80082000, 0x80082008, 0x88082000, 0x88082008,
+      0x800c2000, 0x800c2008, 0x880c2000, 0x880c2008 },
+    /* table 5 */
+    { 0x00000000, 0x00400000, 0x00008000, 0x00408000,
+      0x40000000, 0x40400000, 0x40008000, 0x40408000,
+      0x00000020, 0x00400020, 0x00008020, 0x00408020,
+      0x40000020, 0x40400020, 0x40008020, 0x40408020,
+      0x00001000, 0x00401000, 0x00009000, 0x00409000,
+      0x40001000, 0x40401000, 0x40009000, 0x40409000,
+      0x00001020, 0x00401020, 0x00009020, 0x00409020,
+      0x40001020, 0x40401020, 0x40009020, 0x40409020,
+      0x00100000, 0x00500000, 0x00108000, 0x00508000,
+      0x40100000, 0x40500000, 0x40108000, 0x40508000,
+      0x00100020, 0x00500020, 0x00108020, 0x00508020,
+      0x40100020, 0x40500020, 0x40108020, 0x40508020,
+      0x00101000, 0x00501000, 0x00109000, 0x00509000,
+      0x40101000, 0x40501000, 0x40109000, 0x40509000,
+      0x00101020, 0x00501020, 0x00109020, 0x00509020,
+      0x40101020, 0x40501020, 0x40109020, 0x40509020 },
+    /* table 6 */
+    { 0x00000000, 0x00000040, 0x04000000, 0x04000040,
+      0x00000800, 0x00000840, 0x04000800, 0x04000840,
+      0x00800000, 0x00800040, 0x04800000, 0x04800040,
+      0x00800800, 0x00800840, 0x04800800, 0x04800840,
+      0x10000000, 0x10000040, 0x14000000, 0x14000040,
+      0x10000800, 0x10000840, 0x14000800, 0x14000840,
+      0x10800000, 0x10800040, 0x14800000, 0x14800040,
+      0x10800800, 0x10800840, 0x14800800, 0x14800840,
+      0x00000080, 0x000000c0, 0x04000080, 0x040000c0,
+      0x00000880, 0x000008c0, 0x04000880, 0x040008c0,
+      0x00800080, 0x008000c0, 0x04800080, 0x048000c0,
+      0x00800880, 0x008008c0, 0x04800880, 0x048008c0,
+      0x10000080, 0x100000c0, 0x14000080, 0x140000c0,
+      0x10000880, 0x100008c0, 0x14000880, 0x140008c0,
+      0x10800080, 0x108000c0, 0x14800080, 0x148000c0,
+      0x10800880, 0x108008c0, 0x14800880, 0x148008c0 },
+    /* table 7 */
+    { 0x00000000, 0x00000010, 0x00000400, 0x00000410,
+      0x00000004, 0x00000014, 0x00000404, 0x00000414,
+      0x00004000, 0x00004010, 0x00004400, 0x00004410,
+      0x00004004, 0x00004014, 0x00004404, 0x00004414,
+      0x20000000, 0x20000010, 0x20000400, 0x20000410,
+      0x20000004, 0x20000014, 0x20000404, 0x20000414,
+      0x20004000, 0x20004010, 0x20004400, 0x20004410,
+      0x20004004, 0x20004014, 0x20004404, 0x20004414,
+      0x00200000, 0x00200010, 0x00200400, 0x00200410,
+      0x00200004, 0x00200014, 0x00200404, 0x00200414,
+      0x00204000, 0x00204010, 0x00204400, 0x00204410,
+      0x00204004, 0x00204014, 0x00204404, 0x00204414,
+      0x20200000, 0x20200010, 0x20200400, 0x20200410,
+      0x20200004, 0x20200014, 0x20200404, 0x20200414,
+      0x20204000, 0x20204010, 0x20204400, 0x20204410,
+      0x20204004, 0x20204014, 0x20204404, 0x20204414 }
+};
+
+/*
+ * The PC-1 Permutation
+ * If we number the bits of the 8 bytes of key input like this (in octal):
+ *     00 01 02 03 04 05 06 07
+ *     10 11 12 13 14 15 16 17
+ *     20 21 22 23 24 25 26 27
+ *     30 31 32 33 34 35 36 37
+ *     40 41 42 43 44 45 46 47
+ *     50 51 52 53 54 55 56 57
+ *     60 61 62 63 64 65 66 67
+ *     70 71 72 73 74 75 76 77
+ * then after the PC-1 permutation,
+ * C0 is
+ *     70 60 50 40 30 20 10 00
+ *     71 61 51 41 31 21 11 01
+ *     72 62 52 42 32 22 12 02
+ *     73 63 53 43
+ * D0 is
+ *     76 66 56 46 36 26 16 06
+ *     75 65 55 45 35 25 15 05
+ *     74 64 54 44 34 24 14 04
+ *                 33 23 13 03
+ * and these parity bits have been discarded:
+ *     77 67 57 47 37 27 17 07
+ *
+ * We achieve this by flipping the input matrix about the diagonal from 70-07,
+ * getting left =
+ *     77 67 57 47 37 27 17 07  (these are the parity bits)
+ *     76 66 56 46 36 26 16 06
+ *     75 65 55 45 35 25 15 05
+ *     74 64 54 44 34 24 14 04
+ * right =
+ *     73 63 53 43 33 23 13 03
+ *     72 62 52 42 32 22 12 02
+ *     71 61 51 41 31 21 11 01
+ *     70 60 50 40 30 20 10 00
+ * then byte swap right, ala htonl() on a little endian machine.
+ * right =
+ *     70 60 50 40 30 20 10 00
+ *     71 67 57 47 37 27 11 07
+ *     72 62 52 42 32 22 12 02
+ *     73 63 53 43 33 23 13 03
+ * then
+ *     c0 = right >> 4;
+ *     d0 = ((left & 0x00ffffff) << 4) | (right & 0xf);
+*/
+
+#define FLIP_RIGHT_DIAGONAL(word, temp)        \
+    temp = (word ^ (word >> 18)) & 0x00003333; \
+    word ^= temp | (temp << 18);               \
+    temp = (word ^ (word >> 9)) & 0x00550055;  \
+    word ^= temp | (temp << 9);
+
+#if defined(__GNUC__) && defined(NSS_X86_OR_X64)
+#define BYTESWAP(word, temp) \
+    __asm("bswap  %0"        \
+          : "+r"(word));
+#elif (_MSC_VER >= 1300) && defined(NSS_X86_OR_X64)
+#include <stdlib.h>
+#pragma intrinsic(_byteswap_ulong)
+#define BYTESWAP(word, temp) \
+    word = _byteswap_ulong(word);
+#elif defined(__GNUC__) && (defined(__thumb2__) ||         \
+                            (!defined(__thumb__) &&        \
+                             (defined(__ARM_ARCH_6__) ||   \
+                              defined(__ARM_ARCH_6J__) ||  \
+                              defined(__ARM_ARCH_6K__) ||  \
+                              defined(__ARM_ARCH_6Z__) ||  \
+                              defined(__ARM_ARCH_6ZK__) || \
+                              defined(__ARM_ARCH_6T2__) || \
+                              defined(__ARM_ARCH_7__) ||   \
+                              defined(__ARM_ARCH_7A__) ||  \
+                              defined(__ARM_ARCH_7R__))))
+#define BYTESWAP(word, temp) \
+    __asm("rev %0, %0"       \
+          : "+r"(word));
+#else
+#define BYTESWAP(word, temp)            \
+    word = (word >> 16) | (word << 16); \
+    temp = 0x00ff00ff;                  \
+    word = ((word & temp) << 8) | ((word >> 8) & temp);
+#endif
+
+#define PC1(left, right, c0, d0, temp)                  \
+    right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \
+    left ^= temp << 4;                                  \
+    FLIP_RIGHT_DIAGONAL(left, temp);                    \
+    FLIP_RIGHT_DIAGONAL(right, temp);                   \
+    BYTESWAP(right, temp);                              \
+    c0 = right >> 4;                                    \
+    d0 = ((left & 0x00ffffff) << 4) | (right & 0xf);
+
+#define LEFT_SHIFT_1(reg) (((reg << 1) | (reg >> 27)) & 0x0FFFFFFF)
+#define LEFT_SHIFT_2(reg) (((reg << 2) | (reg >> 26)) & 0x0FFFFFFF)
+
+/*
+ *   setup key schedules from key
+ */
+
+void
+DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction)
+{
+    register HALF left, right;
+    register HALF c0, d0;
+    register HALF temp;
+    int delta;
+    unsigned int ls;
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+    left = HALFPTR(key)[0];
+    right = HALFPTR(key)[1];
+#if defined(IS_LITTLE_ENDIAN)
+    BYTESWAP(left, temp);
+    BYTESWAP(right, temp);
+#endif
+#else
+    if (((ptrdiff_t)key & 0x03) == 0) {
+        left = HALFPTR(key)[0];
+        right = HALFPTR(key)[1];
+#if defined(IS_LITTLE_ENDIAN)
+        BYTESWAP(left, temp);
+        BYTESWAP(right, temp);
+#endif
+    } else {
+        left = ((HALF)key[0] << 24) | ((HALF)key[1] << 16) |
+               ((HALF)key[2] << 8) | key[3];
+        right = ((HALF)key[4] << 24) | ((HALF)key[5] << 16) |
+                ((HALF)key[6] << 8) | key[7];
+    }
+#endif
+
+    PC1(left, right, c0, d0, temp);
+
+    if (direction == DES_ENCRYPT) {
+        delta = 2 * (int)sizeof(HALF);
+    } else {
+        ks += 30;
+        delta = (-2) * (int)sizeof(HALF);
+    }
+
+    for (ls = 0x8103; ls; ls >>= 1) {
+        if (ls & 1) {
+            c0 = LEFT_SHIFT_1(c0);
+            d0 = LEFT_SHIFT_1(d0);
+        } else {
+            c0 = LEFT_SHIFT_2(c0);
+            d0 = LEFT_SHIFT_2(d0);
+        }
+
+#ifdef USE_INDEXING
+#define PC2LOOKUP(b, c) PC2[b][c]
+
+        left = PC2LOOKUP(0, ((c0 >> 22) & 0x3F));
+        left |= PC2LOOKUP(1, ((c0 >> 13) & 0x3F));
+        left |= PC2LOOKUP(2, ((c0 >> 4) & 0x38) | (c0 & 0x7));
+        left |= PC2LOOKUP(3, ((c0 >> 18) & 0xC) | ((c0 >> 11) & 0x3) | (c0 & 0x30));
+
+        right = PC2LOOKUP(4, ((d0 >> 22) & 0x3F));
+        right |= PC2LOOKUP(5, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf));
+        right |= PC2LOOKUP(6, ((d0 >> 7) & 0x3F));
+        right |= PC2LOOKUP(7, ((d0 >> 1) & 0x3C) | (d0 & 0x3));
+#else
+#define PC2LOOKUP(b, c) *(HALF *)((BYTE *)&PC2[b][0] + (c))
+
+        left = PC2LOOKUP(0, ((c0 >> 20) & 0xFC));
+        left |= PC2LOOKUP(1, ((c0 >> 11) & 0xFC));
+        left |= PC2LOOKUP(2, ((c0 >> 2) & 0xE0) | ((c0 << 2) & 0x1C));
+        left |= PC2LOOKUP(3, ((c0 >> 16) & 0x30) | ((c0 >> 9) & 0xC) | ((c0 << 2) & 0xC0));
+
+        right = PC2LOOKUP(4, ((d0 >> 20) & 0xFC));
+        right |= PC2LOOKUP(5, ((d0 >> 13) & 0xC0) | ((d0 >> 12) & 0x3C));
+        right |= PC2LOOKUP(6, ((d0 >> 5) & 0xFC));
+        right |= PC2LOOKUP(7, ((d0 << 1) & 0xF0) | ((d0 << 2) & 0x0C));
+#endif
+        /* left  contains key bits for S1 S3 S2 S4 */
+        /* right contains key bits for S6 S8 S5 S7 */
+        temp = (left << 16)     /* S2 S4 XX XX */
+               | (right >> 16); /* XX XX S6 S8 */
+        ks[0] = temp;
+
+        temp = (left & 0xffff0000)     /* S1 S3 XX XX */
+               | (right & 0x0000ffff); /* XX XX S5 S7 */
+        ks[1] = temp;
+
+        ks = (HALF *)((BYTE *)ks + delta);
+    }
+}
+
+/*
+ * The DES Initial Permutation
+ * if we number the bits of the 8 bytes of input like this (in octal):
+ *     00 01 02 03 04 05 06 07
+ *     10 11 12 13 14 15 16 17
+ *     20 21 22 23 24 25 26 27
+ *     30 31 32 33 34 35 36 37
+ *     40 41 42 43 44 45 46 47
+ *     50 51 52 53 54 55 56 57
+ *     60 61 62 63 64 65 66 67
+ *     70 71 72 73 74 75 76 77
+ * then after the initial permutation, they will be in this order.
+ *     71 61 51 41 31 21 11 01
+ *     73 63 53 43 33 23 13 03
+ *     75 65 55 45 35 25 15 05
+ *     77 67 57 47 37 27 17 07
+ *     70 60 50 40 30 20 10 00
+ *     72 62 52 42 32 22 12 02
+ *     74 64 54 44 34 24 14 04
+ *     76 66 56 46 36 26 16 06
+ *
+ * One way to do this is in two steps:
+ * 1. Flip this matrix about the diagonal from 70-07 as done for PC1.
+ * 2. Rearrange the bytes (rows in the matrix above) with the following code.
+ *
+ * #define swapHiLo(word, temp) \
+ *   temp  = (word ^ (word >> 24)) & 0x000000ff; \
+ *   word ^=  temp | (temp << 24);
+ *
+ *   right ^= temp = ((left << 8) ^ right) & 0xff00ff00;
+ *   left  ^= temp >> 8;
+ *   swapHiLo(left, temp);
+ *   swapHiLo(right,temp);
+ *
+ * However, the two steps can be combined, so that the rows are rearranged
+ * while the matrix is being flipped, reducing the number of bit exchange
+ * operations from 8 ot 5.
+ *
+ * Initial Permutation */
+#define IP(left, right, temp)                            \
+    right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f;  \
+    left ^= temp << 4;                                   \
+    right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \
+    left ^= temp << 16;                                  \
+    right ^= temp = ((left << 2) ^ right) & 0xcccccccc;  \
+    left ^= temp >> 2;                                   \
+    right ^= temp = ((left << 8) ^ right) & 0xff00ff00;  \
+    left ^= temp >> 8;                                   \
+    right ^= temp = ((left >> 1) ^ right) & 0x55555555;  \
+    left ^= temp << 1;
+
+/* The Final (Inverse Initial) permutation is done by reversing the
+** steps of the Initital Permutation
+*/
+
+#define FP(left, right, temp)                            \
+    right ^= temp = ((left >> 1) ^ right) & 0x55555555;  \
+    left ^= temp << 1;                                   \
+    right ^= temp = ((left << 8) ^ right) & 0xff00ff00;  \
+    left ^= temp >> 8;                                   \
+    right ^= temp = ((left << 2) ^ right) & 0xcccccccc;  \
+    left ^= temp >> 2;                                   \
+    right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \
+    left ^= temp << 16;                                  \
+    right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f;  \
+    left ^= temp << 4;
+
+void NO_SANITIZE_ALIGNMENT
+DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf)
+{
+    register HALF left, right;
+    register HALF temp;
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+    left = HALFPTR(inbuf)[0];
+    right = HALFPTR(inbuf)[1];
+#if defined(IS_LITTLE_ENDIAN)
+    BYTESWAP(left, temp);
+    BYTESWAP(right, temp);
+#endif
+#else
+    if (((ptrdiff_t)inbuf & 0x03) == 0) {
+        left = HALFPTR(inbuf)[0];
+        right = HALFPTR(inbuf)[1];
+#if defined(IS_LITTLE_ENDIAN)
+        BYTESWAP(left, temp);
+        BYTESWAP(right, temp);
+#endif
+    } else {
+        left = ((HALF)inbuf[0] << 24) | ((HALF)inbuf[1] << 16) |
+               ((HALF)inbuf[2] << 8) | inbuf[3];
+        right = ((HALF)inbuf[4] << 24) | ((HALF)inbuf[5] << 16) |
+                ((HALF)inbuf[6] << 8) | inbuf[7];
+    }
+#endif
+
+    IP(left, right, temp);
+
+    /* shift the values left circularly 3 bits. */
+    left = (left << 3) | (left >> 29);
+    right = (right << 3) | (right >> 29);
+
+#ifdef USE_INDEXING
+#define KSLOOKUP(s, b) SP[s][((temp >> (b + 2)) & 0x3f)]
+#else
+#define KSLOOKUP(s, b) *(HALF *)((BYTE *)&SP[s][0] + ((temp >> b) & 0xFC))
+#endif
+#define ROUND(out, in, r)                            \
+    temp = in ^ ks[2 * r];                           \
+    out ^= KSLOOKUP(1, 24);                          \
+    out ^= KSLOOKUP(3, 16);                          \
+    out ^= KSLOOKUP(5, 8);                           \
+    out ^= KSLOOKUP(7, 0);                           \
+    temp = ((in >> 4) | (in << 28)) ^ ks[2 * r + 1]; \
+    out ^= KSLOOKUP(0, 24);                          \
+    out ^= KSLOOKUP(2, 16);                          \
+    out ^= KSLOOKUP(4, 8);                           \
+    out ^= KSLOOKUP(6, 0);
+
+    /* Do the 16 Feistel rounds */
+    ROUND(left, right, 0)
+    ROUND(right, left, 1)
+    ROUND(left, right, 2)
+    ROUND(right, left, 3)
+    ROUND(left, right, 4)
+    ROUND(right, left, 5)
+    ROUND(left, right, 6)
+    ROUND(right, left, 7)
+    ROUND(left, right, 8)
+    ROUND(right, left, 9)
+    ROUND(left, right, 10)
+    ROUND(right, left, 11)
+    ROUND(left, right, 12)
+    ROUND(right, left, 13)
+    ROUND(left, right, 14)
+    ROUND(right, left, 15)
+
+    /* now shift circularly right 3 bits to undo the shifting done
+    ** above.  switch left and right here.
+    */
+    temp = (left >> 3) | (left << 29);
+    left = (right >> 3) | (right << 29);
+    right = temp;
+
+    FP(left, right, temp);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+#if defined(IS_LITTLE_ENDIAN)
+    BYTESWAP(left, temp);
+    BYTESWAP(right, temp);
+#endif
+    HALFPTR(outbuf)
+    [0] = left;
+    HALFPTR(outbuf)
+    [1] = right;
+#else
+    if (((ptrdiff_t)outbuf & 0x03) == 0) {
+#if defined(IS_LITTLE_ENDIAN)
+        BYTESWAP(left, temp);
+        BYTESWAP(right, temp);
+#endif
+        HALFPTR(outbuf)
+        [0] = left;
+        HALFPTR(outbuf)
+        [1] = right;
+    } else {
+        outbuf[0] = (BYTE)(left >> 24);
+        outbuf[1] = (BYTE)(left >> 16);
+        outbuf[2] = (BYTE)(left >> 8);
+        outbuf[3] = (BYTE)(left);
+
+        outbuf[4] = (BYTE)(right >> 24);
+        outbuf[5] = (BYTE)(right >> 16);
+        outbuf[6] = (BYTE)(right >> 8);
+        outbuf[7] = (BYTE)(right);
+    }
+#endif
+}
+
+/* Ackowledgements:
+** Two ideas used in this implementation were shown to me by Dennis Ferguson
+** in 1990.  He credits them to Richard Outerbridge and Dan Hoey.  They were:
+** 1. The method of computing the Initial and Final permutations.
+** 2. Circularly rotating the SP tables and the initial values of left and
+**  right to reduce the number of shifts required during the 16 rounds.
+*/
diff --git a/security/nss/lib/freebl/des.h b/security/nss/lib/freebl/des.h
new file mode 100644
index 000000000..70a17e510
--- /dev/null
+++ b/security/nss/lib/freebl/des.h
@@ -0,0 +1,43 @@
+/*
+ *  des.h
+ *
+ *  header file for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _DES_H_
+#define _DES_H_ 1
+
+#include "blapi.h"
+
+typedef unsigned char BYTE;
+typedef unsigned int HALF;
+
+#define HALFPTR(x) ((HALF *)(x))
+#define SHORTPTR(x) ((unsigned short *)(x))
+#define BYTEPTR(x) ((BYTE *)(x))
+
+typedef enum {
+    DES_ENCRYPT = 0x5555,
+    DES_DECRYPT = 0xAAAA
+} DESDirection;
+
+typedef void DESFunc(struct DESContextStr *cx, BYTE *out, const BYTE *in,
+                     unsigned int len);
+
+struct DESContextStr {
+    /* key schedule, 16 internal keys, each with 8 6-bit parts */
+    HALF ks0[32];
+    HALF ks1[32];
+    HALF ks2[32];
+    HALF iv[2];
+    DESDirection direction;
+    DESFunc *worker;
+};
+
+void DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction);
+void DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf);
+
+#endif
diff --git a/security/nss/lib/freebl/desblapi.c b/security/nss/lib/freebl/desblapi.c
new file mode 100644
index 000000000..c03ab27cc
--- /dev/null
+++ b/security/nss/lib/freebl/desblapi.c
@@ -0,0 +1,256 @@
+/*
+ *  desblapi.c
+ *
+ *  core source file for DES-150 library
+ *  Implement DES Modes of Operation and Triple-DES.
+ *  Adapt DES-150 to blapi API.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "des.h"
+#include "blapii.h"
+#include <stddef.h>
+#include "secerr.h"
+
+#if defined(NSS_X86_OR_X64)
+/* Intel X86 CPUs do unaligned loads and stores without complaint. */
+#define COPY8B(to, from, ptr) \
+    HALFPTR(to)               \
+    [0] = HALFPTR(from)[0];   \
+    HALFPTR(to)               \
+    [1] = HALFPTR(from)[1];
+#else
+#define COPY8B(to, from, ptr) memcpy(to, from, 8)
+#endif
+#define COPY8BTOHALF(to, from) COPY8B(to, from, from)
+#define COPY8BFROMHALF(to, from) COPY8B(to, from, to)
+
+static void
+DES_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+    while (len) {
+        DES_Do1Block(cx->ks0, in, out);
+        len -= 8;
+        in += 8;
+        out += 8;
+    }
+}
+
+static void
+DES_EDE3_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+    while (len) {
+        DES_Do1Block(cx->ks0, in, out);
+        len -= 8;
+        in += 8;
+        DES_Do1Block(cx->ks1, out, out);
+        DES_Do1Block(cx->ks2, out, out);
+        out += 8;
+    }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+    const BYTE *bufend = in + len;
+    HALF vec[2];
+
+    while (in != bufend) {
+        COPY8BTOHALF(vec, in);
+        in += 8;
+        vec[0] ^= cx->iv[0];
+        vec[1] ^= cx->iv[1];
+        DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv);
+        COPY8BFROMHALF(out, cx->iv);
+        out += 8;
+    }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+    const BYTE *bufend;
+    HALF oldciphertext[2];
+    HALF plaintext[2];
+
+    for (bufend = in + len; in != bufend;) {
+        oldciphertext[0] = cx->iv[0];
+        oldciphertext[1] = cx->iv[1];
+        COPY8BTOHALF(cx->iv, in);
+        in += 8;
+        DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext);
+        plaintext[0] ^= oldciphertext[0];
+        plaintext[1] ^= oldciphertext[1];
+        COPY8BFROMHALF(out, plaintext);
+        out += 8;
+    }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_EDE3CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+    const BYTE *bufend = in + len;
+    HALF vec[2];
+
+    while (in != bufend) {
+        COPY8BTOHALF(vec, in);
+        in += 8;
+        vec[0] ^= cx->iv[0];
+        vec[1] ^= cx->iv[1];
+        DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv);
+        DES_Do1Block(cx->ks1, (BYTE *)cx->iv, (BYTE *)cx->iv);
+        DES_Do1Block(cx->ks2, (BYTE *)cx->iv, (BYTE *)cx->iv);
+        COPY8BFROMHALF(out, cx->iv);
+        out += 8;
+    }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_EDE3CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+    const BYTE *bufend;
+    HALF oldciphertext[2];
+    HALF plaintext[2];
+
+    for (bufend = in + len; in != bufend;) {
+        oldciphertext[0] = cx->iv[0];
+        oldciphertext[1] = cx->iv[1];
+        COPY8BTOHALF(cx->iv, in);
+        in += 8;
+        DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext);
+        DES_Do1Block(cx->ks1, (BYTE *)plaintext, (BYTE *)plaintext);
+        DES_Do1Block(cx->ks2, (BYTE *)plaintext, (BYTE *)plaintext);
+        plaintext[0] ^= oldciphertext[0];
+        plaintext[1] ^= oldciphertext[1];
+        COPY8BFROMHALF(out, plaintext);
+        out += 8;
+    }
+}
+
+DESContext *
+DES_AllocateContext(void)
+{
+    return PORT_ZNew(DESContext);
+}
+
+SECStatus
+DES_InitContext(DESContext *cx, const unsigned char *key, unsigned int keylen,
+                const unsigned char *iv, int mode, unsigned int encrypt,
+                unsigned int unused)
+{
+    DESDirection opposite;
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    cx->direction = encrypt ? DES_ENCRYPT : DES_DECRYPT;
+    opposite = encrypt ? DES_DECRYPT : DES_ENCRYPT;
+    switch (mode) {
+        case NSS_DES: /* DES ECB */
+            DES_MakeSchedule(cx->ks0, key, cx->direction);
+            cx->worker = &DES_ECB;
+            break;
+
+        case NSS_DES_EDE3: /* DES EDE ECB */
+            cx->worker = &DES_EDE3_ECB;
+            if (encrypt) {
+                DES_MakeSchedule(cx->ks0, key, cx->direction);
+                DES_MakeSchedule(cx->ks1, key + 8, opposite);
+                DES_MakeSchedule(cx->ks2, key + 16, cx->direction);
+            } else {
+                DES_MakeSchedule(cx->ks2, key, cx->direction);
+                DES_MakeSchedule(cx->ks1, key + 8, opposite);
+                DES_MakeSchedule(cx->ks0, key + 16, cx->direction);
+            }
+            break;
+
+        case NSS_DES_CBC: /* DES CBC */
+            COPY8BTOHALF(cx->iv, iv);
+            cx->worker = encrypt ? &DES_CBCEn : &DES_CBCDe;
+            DES_MakeSchedule(cx->ks0, key, cx->direction);
+            break;
+
+        case NSS_DES_EDE3_CBC: /* DES EDE CBC */
+            COPY8BTOHALF(cx->iv, iv);
+            if (encrypt) {
+                cx->worker = &DES_EDE3CBCEn;
+                DES_MakeSchedule(cx->ks0, key, cx->direction);
+                DES_MakeSchedule(cx->ks1, key + 8, opposite);
+                DES_MakeSchedule(cx->ks2, key + 16, cx->direction);
+            } else {
+                cx->worker = &DES_EDE3CBCDe;
+                DES_MakeSchedule(cx->ks2, key, cx->direction);
+                DES_MakeSchedule(cx->ks1, key + 8, opposite);
+                DES_MakeSchedule(cx->ks0, key + 16, cx->direction);
+            }
+            break;
+
+        default:
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+    }
+    return SECSuccess;
+}
+
+DESContext *
+DES_CreateContext(const BYTE *key, const BYTE *iv, int mode, PRBool encrypt)
+{
+    DESContext *cx = PORT_ZNew(DESContext);
+    SECStatus rv = DES_InitContext(cx, key, 0, iv, mode, encrypt, 0);
+
+    if (rv != SECSuccess) {
+        PORT_ZFree(cx, sizeof *cx);
+        cx = NULL;
+    }
+    return cx;
+}
+
+void
+DES_DestroyContext(DESContext *cx, PRBool freeit)
+{
+    if (cx) {
+        memset(cx, 0, sizeof *cx);
+        if (freeit)
+            PORT_Free(cx);
+    }
+}
+
+SECStatus
+DES_Encrypt(DESContext *cx, BYTE *out, unsigned int *outLen,
+            unsigned int maxOutLen, const BYTE *in, unsigned int inLen)
+{
+
+    if ((inLen % 8) != 0 || maxOutLen < inLen || !cx ||
+        cx->direction != DES_ENCRYPT) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    cx->worker(cx, out, in, inLen);
+    if (outLen)
+        *outLen = inLen;
+    return SECSuccess;
+}
+
+SECStatus
+DES_Decrypt(DESContext *cx, BYTE *out, unsigned int *outLen,
+            unsigned int maxOutLen, const BYTE *in, unsigned int inLen)
+{
+
+    if ((inLen % 8) != 0 || maxOutLen < inLen || !cx ||
+        cx->direction != DES_DECRYPT) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    cx->worker(cx, out, in, inLen);
+    if (outLen)
+        *outLen = inLen;
+    return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/det_rng.c b/security/nss/lib/freebl/det_rng.c
new file mode 100644
index 000000000..fcbf9b34a
--- /dev/null
+++ b/security/nss/lib/freebl/det_rng.c
@@ -0,0 +1,67 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "blapi.h"
+#include "blapit.h"
+#include "chacha20.h"
+#include "nssilock.h"
+#include "seccomon.h"
+#include "secerr.h"
+
+static unsigned long globalNumCalls = 0;
+
+SECStatus
+prng_ResetForFuzzing(PZLock *rng_lock)
+{
+    /* Check for a valid RNG lock. */
+    PORT_Assert(rng_lock != NULL);
+    if (rng_lock == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* --- LOCKED --- */
+    PZ_Lock(rng_lock);
+    globalNumCalls = 0;
+    PZ_Unlock(rng_lock);
+    /* --- UNLOCKED --- */
+
+    return SECSuccess;
+}
+
+SECStatus
+prng_GenerateDeterministicRandomBytes(PZLock *rng_lock, void *dest, size_t len)
+{
+    static const uint8_t key[32];
+    uint8_t nonce[12] = { 0 };
+
+    /* Check for a valid RNG lock. */
+    PORT_Assert(rng_lock != NULL);
+    if (rng_lock == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* --- LOCKED --- */
+    PZ_Lock(rng_lock);
+
+    memcpy(nonce, &globalNumCalls, sizeof(globalNumCalls));
+    globalNumCalls++;
+
+    ChaCha20Poly1305Context *cx =
+        ChaCha20Poly1305_CreateContext(key, sizeof(key), 16);
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        PZ_Unlock(rng_lock);
+        return SECFailure;
+    }
+
+    memset(dest, 0, len);
+    ChaCha20XOR(dest, dest, len, key, nonce, 0);
+    ChaCha20Poly1305_DestroyContext(cx, PR_TRUE);
+
+    PZ_Unlock(rng_lock);
+    /* --- UNLOCKED --- */
+    return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/det_rng.h b/security/nss/lib/freebl/det_rng.h
new file mode 100644
index 000000000..599d726ca
--- /dev/null
+++ b/security/nss/lib/freebl/det_rng.h
@@ -0,0 +1,12 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __det_rng_h_
+#define __det_rng_h_
+
+SECStatus prng_ResetForFuzzing(PZLock *rng_lock);
+SECStatus prng_GenerateDeterministicRandomBytes(PZLock *rng_lock, void *dest,
+                                                size_t len);
+
+#endif /* __det_rng_h_ */
diff --git a/security/nss/lib/freebl/dh.c b/security/nss/lib/freebl/dh.c
new file mode 100644
index 000000000..97025c7e2
--- /dev/null
+++ b/security/nss/lib/freebl/dh.c
@@ -0,0 +1,452 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Diffie-Hellman parameter generation, key generation, and secret derivation.
+ * KEA secret generation and verification.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "blapi.h"
+#include "secitem.h"
+#include "mpi.h"
+#include "mpprime.h"
+#include "secmpi.h"
+
+#define KEA_DERIVED_SECRET_LEN 128
+
+/* Lengths are in bytes. */
+static unsigned int
+dh_GetSecretKeyLen(unsigned int primeLen)
+{
+    /* Based on Table 2 in NIST SP 800-57. */
+    if (primeLen >= 1920) { /* 15360 bits */
+        return 64;          /* 512 bits */
+    }
+    if (primeLen >= 960) { /* 7680 bits */
+        return 48;         /* 384 bits */
+    }
+    if (primeLen >= 384) { /* 3072 bits */
+        return 32;         /* 256 bits */
+    }
+    if (primeLen >= 256) { /* 2048 bits */
+        return 28;         /* 224 bits */
+    }
+    return 20; /* 160 bits */
+}
+
+SECStatus
+DH_GenParam(int primeLen, DHParams **params)
+{
+    PLArenaPool *arena;
+    DHParams *dhparams;
+    unsigned char *pb = NULL;
+    unsigned char *ab = NULL;
+    unsigned long counter = 0;
+    mp_int p, q, a, h, psub1, test;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    if (!params || primeLen < 0) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+    if (!arena) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    dhparams = (DHParams *)PORT_ArenaZAlloc(arena, sizeof(DHParams));
+    if (!dhparams) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        PORT_FreeArena(arena, PR_TRUE);
+        return SECFailure;
+    }
+    dhparams->arena = arena;
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&a) = 0;
+    MP_DIGITS(&h) = 0;
+    MP_DIGITS(&psub1) = 0;
+    MP_DIGITS(&test) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&a));
+    CHECK_MPI_OK(mp_init(&h));
+    CHECK_MPI_OK(mp_init(&psub1));
+    CHECK_MPI_OK(mp_init(&test));
+    /* generate prime with MPI, uses Miller-Rabin to generate strong prime. */
+    pb = PORT_Alloc(primeLen);
+    CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(pb, primeLen));
+    pb[0] |= 0x80;            /* set high-order bit */
+    pb[primeLen - 1] |= 0x01; /* set low-order bit  */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&p, pb, primeLen));
+    CHECK_MPI_OK(mpp_make_prime(&p, primeLen * 8, PR_TRUE, &counter));
+    /* construct Sophie-Germain prime q = (p-1)/2. */
+    CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+    CHECK_MPI_OK(mp_div_2(&psub1, &q));
+    /* construct a generator from the prime. */
+    ab = PORT_Alloc(primeLen);
+    /* generate a candidate number a in p's field */
+    CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(ab, primeLen));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&a, ab, primeLen));
+    /* force a < p (note that quot(a/p) <= 1) */
+    if (mp_cmp(&a, &p) > 0)
+        CHECK_MPI_OK(mp_sub(&a, &p, &a));
+    do {
+        /* check that a is in the range [2..p-1] */
+        if (mp_cmp_d(&a, 2) < 0 || mp_cmp(&a, &psub1) >= 0) {
+            /* a is outside of the allowed range.  Set a=3 and keep going. */
+            mp_set(&a, 3);
+        }
+        /* if a**q mod p != 1 then a is a generator */
+        CHECK_MPI_OK(mp_exptmod(&a, &q, &p, &test));
+        if (mp_cmp_d(&test, 1) != 0)
+            break;
+        /* increment the candidate and try again. */
+        CHECK_MPI_OK(mp_add_d(&a, 1, &a));
+    } while (PR_TRUE);
+    MPINT_TO_SECITEM(&p, &dhparams->prime, arena);
+    MPINT_TO_SECITEM(&a, &dhparams->base, arena);
+    *params = dhparams;
+cleanup:
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&a);
+    mp_clear(&h);
+    mp_clear(&psub1);
+    mp_clear(&test);
+    if (pb)
+        PORT_ZFree(pb, primeLen);
+    if (ab)
+        PORT_ZFree(ab, primeLen);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    if (rv)
+        PORT_FreeArena(arena, PR_TRUE);
+    return rv;
+}
+
+SECStatus
+DH_NewKey(DHParams *params, DHPrivateKey **privKey)
+{
+    PLArenaPool *arena;
+    DHPrivateKey *key;
+    mp_int g, xa, p, Ya;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    if (!params || !privKey) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+    if (!arena) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    key = (DHPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DHPrivateKey));
+    if (!key) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        PORT_FreeArena(arena, PR_TRUE);
+        return SECFailure;
+    }
+    key->arena = arena;
+    MP_DIGITS(&g) = 0;
+    MP_DIGITS(&xa) = 0;
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&Ya) = 0;
+    CHECK_MPI_OK(mp_init(&g));
+    CHECK_MPI_OK(mp_init(&xa));
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&Ya));
+    /* Set private key's p */
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->prime, &params->prime));
+    SECITEM_TO_MPINT(key->prime, &p);
+    /* Set private key's g */
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->base, &params->base));
+    SECITEM_TO_MPINT(key->base, &g);
+    /* Generate private key xa */
+    SECITEM_AllocItem(arena, &key->privateValue,
+                      dh_GetSecretKeyLen(params->prime.len));
+    CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(key->privateValue.data,
+                                               key->privateValue.len));
+    SECITEM_TO_MPINT(key->privateValue, &xa);
+    /* xa < p */
+    CHECK_MPI_OK(mp_mod(&xa, &p, &xa));
+    /* Compute public key Ya = g ** xa mod p */
+    CHECK_MPI_OK(mp_exptmod(&g, &xa, &p, &Ya));
+    MPINT_TO_SECITEM(&Ya, &key->publicValue, key->arena);
+    *privKey = key;
+cleanup:
+    mp_clear(&g);
+    mp_clear(&xa);
+    mp_clear(&p);
+    mp_clear(&Ya);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    if (rv) {
+        *privKey = NULL;
+        PORT_FreeArena(arena, PR_TRUE);
+    }
+    return rv;
+}
+
+SECStatus
+DH_Derive(SECItem *publicValue,
+          SECItem *prime,
+          SECItem *privateValue,
+          SECItem *derivedSecret,
+          unsigned int outBytes)
+{
+    mp_int p, Xa, Yb, ZZ, psub1;
+    mp_err err = MP_OKAY;
+    unsigned int len = 0;
+    unsigned int nb;
+    unsigned char *secret = NULL;
+    if (!publicValue || !prime || !privateValue || !derivedSecret) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    memset(derivedSecret, 0, sizeof *derivedSecret);
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&Xa) = 0;
+    MP_DIGITS(&Yb) = 0;
+    MP_DIGITS(&ZZ) = 0;
+    MP_DIGITS(&psub1) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&Xa));
+    CHECK_MPI_OK(mp_init(&Yb));
+    CHECK_MPI_OK(mp_init(&ZZ));
+    CHECK_MPI_OK(mp_init(&psub1));
+    SECITEM_TO_MPINT(*publicValue, &Yb);
+    SECITEM_TO_MPINT(*privateValue, &Xa);
+    SECITEM_TO_MPINT(*prime, &p);
+    CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+
+    /* We assume that the modulus, p, is a safe prime. That is, p = 2q+1 where
+     * q is also a prime. Thus the orders of the subgroups are factors of 2q:
+     * namely 1, 2, q and 2q.
+     *
+     * We check that the peer's public value isn't zero (which isn't in the
+     * group), one (subgroup of order one) or p-1 (subgroup of order 2). We
+     * also check that the public value is less than p, to avoid being fooled
+     * by values like p+1 or 2*p-1.
+     *
+     * Thus we must be operating in the subgroup of size q or 2q. */
+    if (mp_cmp_d(&Yb, 1) <= 0 ||
+        mp_cmp(&Yb, &psub1) >= 0) {
+        err = MP_BADARG;
+        goto cleanup;
+    }
+
+    /* ZZ = (Yb)**Xa mod p */
+    CHECK_MPI_OK(mp_exptmod(&Yb, &Xa, &p, &ZZ));
+    /* number of bytes in the derived secret */
+    len = mp_unsigned_octet_size(&ZZ);
+    if (len <= 0) {
+        err = MP_BADARG;
+        goto cleanup;
+    }
+
+    /*
+     * We check to make sure that ZZ is not equal to 1 or -1 mod p.
+     * This helps guard against small subgroup attacks, since an attacker
+     * using a subgroup of size N will produce 1 or -1 with probability 1/N.
+     * When the protocol is executed within a properly large subgroup, the
+     * probability of this result will be negligibly small.  For example,
+     * with a strong prime of the form 2p+1, the probability will be 1/p.
+     *
+     * We return MP_BADARG because this is probably the result of a bad
+     * public value or a bad prime having been provided.
+     */
+    if (mp_cmp_d(&ZZ, 1) == 0 ||
+        mp_cmp(&ZZ, &psub1) == 0) {
+        err = MP_BADARG;
+        goto cleanup;
+    }
+
+    /* allocate a buffer which can hold the entire derived secret. */
+    secret = PORT_Alloc(len);
+    if (secret == NULL) {
+        err = MP_MEM;
+        goto cleanup;
+    }
+    /* grab the derived secret */
+    err = mp_to_unsigned_octets(&ZZ, secret, len);
+    if (err >= 0)
+        err = MP_OKAY;
+    /*
+    ** if outBytes is 0 take all of the bytes from the derived secret.
+    ** if outBytes is not 0 take exactly outBytes from the derived secret, zero
+    ** pad at the beginning if necessary, and truncate beginning bytes
+    ** if necessary.
+    */
+    if (outBytes > 0)
+        nb = outBytes;
+    else
+        nb = len;
+    if (SECITEM_AllocItem(NULL, derivedSecret, nb) == NULL) {
+        err = MP_MEM;
+        goto cleanup;
+    }
+    if (len < nb) {
+        unsigned int offset = nb - len;
+        memset(derivedSecret->data, 0, offset);
+        memcpy(derivedSecret->data + offset, secret, len);
+    } else {
+        memcpy(derivedSecret->data, secret + len - nb, nb);
+    }
+cleanup:
+    mp_clear(&p);
+    mp_clear(&Xa);
+    mp_clear(&Yb);
+    mp_clear(&ZZ);
+    mp_clear(&psub1);
+    if (secret) {
+        /* free the buffer allocated for the full secret. */
+        PORT_ZFree(secret, len);
+    }
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        if (derivedSecret->data)
+            PORT_ZFree(derivedSecret->data, derivedSecret->len);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+SECStatus
+KEA_Derive(SECItem *prime,
+           SECItem *public1,
+           SECItem *public2,
+           SECItem *private1,
+           SECItem *private2,
+           SECItem *derivedSecret)
+{
+    mp_int p, Y, R, r, x, t, u, w;
+    mp_err err;
+    unsigned char *secret = NULL;
+    unsigned int len = 0, offset;
+    if (!prime || !public1 || !public2 || !private1 || !private2 ||
+        !derivedSecret) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    memset(derivedSecret, 0, sizeof *derivedSecret);
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&Y) = 0;
+    MP_DIGITS(&R) = 0;
+    MP_DIGITS(&r) = 0;
+    MP_DIGITS(&x) = 0;
+    MP_DIGITS(&t) = 0;
+    MP_DIGITS(&u) = 0;
+    MP_DIGITS(&w) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&Y));
+    CHECK_MPI_OK(mp_init(&R));
+    CHECK_MPI_OK(mp_init(&r));
+    CHECK_MPI_OK(mp_init(&x));
+    CHECK_MPI_OK(mp_init(&t));
+    CHECK_MPI_OK(mp_init(&u));
+    CHECK_MPI_OK(mp_init(&w));
+    SECITEM_TO_MPINT(*prime, &p);
+    SECITEM_TO_MPINT(*public1, &Y);
+    SECITEM_TO_MPINT(*public2, &R);
+    SECITEM_TO_MPINT(*private1, &r);
+    SECITEM_TO_MPINT(*private2, &x);
+    /* t = DH(Y, r, p) = Y ** r mod p */
+    CHECK_MPI_OK(mp_exptmod(&Y, &r, &p, &t));
+    /* u = DH(R, x, p) = R ** x mod p */
+    CHECK_MPI_OK(mp_exptmod(&R, &x, &p, &u));
+    /* w = (t + u) mod p */
+    CHECK_MPI_OK(mp_addmod(&t, &u, &p, &w));
+    /* allocate a buffer for the full derived secret */
+    len = mp_unsigned_octet_size(&w);
+    secret = PORT_Alloc(len);
+    if (secret == NULL) {
+        err = MP_MEM;
+        goto cleanup;
+    }
+    /* grab the secret */
+    err = mp_to_unsigned_octets(&w, secret, len);
+    if (err > 0)
+        err = MP_OKAY;
+    /* allocate output buffer */
+    if (SECITEM_AllocItem(NULL, derivedSecret, KEA_DERIVED_SECRET_LEN) == NULL) {
+        err = MP_MEM;
+        goto cleanup;
+    }
+    memset(derivedSecret->data, 0, derivedSecret->len);
+    /* copy in the 128 lsb of the secret */
+    if (len >= KEA_DERIVED_SECRET_LEN) {
+        memcpy(derivedSecret->data, secret + (len - KEA_DERIVED_SECRET_LEN),
+               KEA_DERIVED_SECRET_LEN);
+    } else {
+        offset = KEA_DERIVED_SECRET_LEN - len;
+        memcpy(derivedSecret->data + offset, secret, len);
+    }
+cleanup:
+    mp_clear(&p);
+    mp_clear(&Y);
+    mp_clear(&R);
+    mp_clear(&r);
+    mp_clear(&x);
+    mp_clear(&t);
+    mp_clear(&u);
+    mp_clear(&w);
+    if (secret)
+        PORT_ZFree(secret, len);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        if (derivedSecret->data)
+            PORT_ZFree(derivedSecret->data, derivedSecret->len);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+PRBool
+KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime)
+{
+    mp_int p, q, y, r;
+    mp_err err;
+    int cmp = 1; /* default is false */
+    if (!Y || !prime || !subPrime) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&y) = 0;
+    MP_DIGITS(&r) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&y));
+    CHECK_MPI_OK(mp_init(&r));
+    SECITEM_TO_MPINT(*prime, &p);
+    SECITEM_TO_MPINT(*subPrime, &q);
+    SECITEM_TO_MPINT(*Y, &y);
+    /* compute r = y**q mod p */
+    CHECK_MPI_OK(mp_exptmod(&y, &q, &p, &r));
+    /* compare to 1 */
+    cmp = mp_cmp_d(&r, 1);
+cleanup:
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&y);
+    mp_clear(&r);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        return PR_FALSE;
+    }
+    return (cmp == 0) ? PR_TRUE : PR_FALSE;
+}
diff --git a/security/nss/lib/freebl/drbg.c b/security/nss/lib/freebl/drbg.c
new file mode 100644
index 000000000..ac0bba6e0
--- /dev/null
+++ b/security/nss/lib/freebl/drbg.c
@@ -0,0 +1,968 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerror.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "blapii.h"
+#include "nssilock.h"
+#include "secitem.h"
+#include "sha_fast.h"
+#include "sha256.h"
+#include "secrng.h" /* for RNG_SystemRNG() */
+#include "secmpi.h"
+
+#ifdef UNSAFE_FUZZER_MODE
+#include "det_rng.h"
+#endif
+
+/* PRNG_SEEDLEN defined in NIST SP 800-90 section 10.1
+ * for SHA-1, SHA-224, and SHA-256 it's 440 bits.
+ * for SHA-384 and SHA-512 it's 888 bits */
+#define PRNG_SEEDLEN (440 / PR_BITS_PER_BYTE)
+#define PRNG_MAX_ADDITIONAL_BYTES PR_INT64(0x100000000)
+/* 2^35 bits or 2^32 bytes */
+#define PRNG_MAX_REQUEST_SIZE 0x10000             /* 2^19 bits or 2^16 bytes */
+#define PRNG_ADDITONAL_DATA_CACHE_SIZE (8 * 1024) /* must be less than          \
+                                                   *  PRNG_MAX_ADDITIONAL_BYTES \
+                                                   */
+
+/* RESEED_COUNT is how many calls to the prng before we need to reseed
+ * under normal NIST rules, you must return an error. In the NSS case, we
+ * self-reseed with RNG_SystemRNG(). Count can be a large number. For code
+ * simplicity, we specify count with 2 components: RESEED_BYTE (which is
+ * the same as LOG256(RESEED_COUNT)) and RESEED_VALUE (which is the same as
+ * RESEED_COUNT / (256 ^ RESEED_BYTE)). Another way to look at this is
+ * RESEED_COUNT = RESEED_VALUE * (256 ^ RESEED_BYTE). For Hash based DRBG
+ * we use the maximum count value, 2^48, or RESEED_BYTE=6 and RESEED_VALUE=1
+ */
+#define RESEED_BYTE 6
+#define RESEED_VALUE 1
+
+#define PRNG_RESET_RESEED_COUNT(rng)                                    \
+    PORT_Memset((rng)->reseed_counter, 0, sizeof(rng)->reseed_counter); \
+    (rng)->reseed_counter[RESEED_BYTE] = 1;
+
+/*
+ * The actual values of this enum are specified in SP 800-90, 10.1.1.*
+ * The spec does not name the types, it only uses bare values
+ */
+typedef enum {
+    prngCGenerateType = 0,      /* used when creating a new 'C' */
+    prngReseedType = 1,         /* used in reseeding */
+    prngAdditionalDataType = 2, /* used in mixing additional data */
+    prngGenerateByteType = 3    /* used when mixing internal state while
+                 * generating bytes */
+} prngVTypes;
+
+/*
+ * Global RNG context
+ */
+struct RNGContextStr {
+    PZLock *lock; /* Lock to serialize access to global rng */
+    /*
+     * NOTE, a number of steps in the drbg algorithm need to hash
+     * V_type || V. The code, therefore, depends on the V array following
+     * immediately after V_type to avoid extra copies. To accomplish this
+     * in a way that compiliers can't perturb, we declare V_type and V
+     * as a V_Data array and reference them by macros */
+    PRUint8 V_Data[PRNG_SEEDLEN + 1]; /* internal state variables */
+#define V_type V_Data[0]
+#define V(rng) (((rng)->V_Data) + 1)
+#define VSize(rng) ((sizeof(rng)->V_Data) - 1)
+    PRUint8 C[PRNG_SEEDLEN];           /* internal state variables */
+    PRUint8 lastOutput[SHA256_LENGTH]; /* for continuous rng checking */
+    /* If we get calls for the PRNG to return less than the length of our
+     * hash, we extend the request for a full hash (since we'll be doing
+     * the full hash anyway). Future requests for random numbers are fulfilled
+     * from the remainder of the bytes we generated. Requests for bytes longer
+     * than the hash size are fulfilled directly from the HashGen function
+     * of the random number generator. */
+    PRUint8 reseed_counter[RESEED_BYTE + 1]; /* number of requests since the
+                                              * last reseed. Need only be
+                                              * big enough to hold the whole
+                                              * reseed count */
+    PRUint8 data[SHA256_LENGTH];             /* when we request less than a block
+                                              * save the rest of the rng output for
+                                              * another partial block */
+    PRUint8 dataAvail;                       /* # bytes of output available in our cache,
+                                              * [0...SHA256_LENGTH] */
+    /* store additional data that has been shovelled off to us by
+     * RNG_RandomUpdate. */
+    PRUint8 additionalDataCache[PRNG_ADDITONAL_DATA_CACHE_SIZE];
+    PRUint32 additionalAvail;
+    PRBool isValid;   /* false if RNG reaches an invalid state */
+    PRBool isKatTest; /* true if running NIST PRNG KAT tests */
+};
+
+typedef struct RNGContextStr RNGContext;
+static RNGContext *globalrng = NULL;
+static RNGContext theGlobalRng;
+
+/*
+ * The next several functions are derived from the NIST SP 800-90
+ * spec. In these functions, an attempt was made to use names consistent
+ * with the names in the spec, even if they differ from normal NSS usage.
+ */
+
+/*
+ * Hash Derive function defined in NISP SP 800-90 Section 10.4.1.
+ * This function is used in the Instantiate and Reseed functions.
+ *
+ * NOTE: requested_bytes cannot overlap with input_string_1 or input_string_2.
+ * input_string_1 and input_string_2 are logically concatentated.
+ * input_string_1 must be supplied.
+ * if input_string_2 is not supplied, NULL should be passed for this parameter.
+ */
+static SECStatus
+prng_Hash_df(PRUint8 *requested_bytes, unsigned int no_of_bytes_to_return,
+             const PRUint8 *input_string_1, unsigned int input_string_1_len,
+             const PRUint8 *input_string_2, unsigned int input_string_2_len)
+{
+    SHA256Context ctx;
+    PRUint32 tmp;
+    PRUint8 counter;
+
+    tmp = SHA_HTONL(no_of_bytes_to_return * 8);
+
+    for (counter = 1; no_of_bytes_to_return > 0; counter++) {
+        unsigned int hash_return_len;
+        SHA256_Begin(&ctx);
+        SHA256_Update(&ctx, &counter, 1);
+        SHA256_Update(&ctx, (unsigned char *)&tmp, sizeof tmp);
+        SHA256_Update(&ctx, input_string_1, input_string_1_len);
+        if (input_string_2) {
+            SHA256_Update(&ctx, input_string_2, input_string_2_len);
+        }
+        SHA256_End(&ctx, requested_bytes, &hash_return_len,
+                   no_of_bytes_to_return);
+        requested_bytes += hash_return_len;
+        no_of_bytes_to_return -= hash_return_len;
+    }
+    return SECSuccess;
+}
+
+/*
+ * Hash_DRBG Instantiate NIST SP 800-90 10.1.1.2
+ *
+ * NOTE: bytes & len are entropy || nonce || personalization_string. In
+ * normal operation, NSS calculates them all together in a single call.
+ */
+static SECStatus
+prng_instantiate(RNGContext *rng, const PRUint8 *bytes, unsigned int len)
+{
+    if (!rng->isKatTest && len < PRNG_SEEDLEN) {
+        /* If the seedlen is too small, it's probably because we failed to get
+         * enough random data.
+         * This is stricter than NIST SP800-90A requires. Don't enforce it for
+         * tests. */
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        return SECFailure;
+    }
+    prng_Hash_df(V(rng), VSize(rng), bytes, len, NULL, 0);
+    rng->V_type = prngCGenerateType;
+    prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0);
+    PRNG_RESET_RESEED_COUNT(rng)
+    return SECSuccess;
+}
+
+/*
+ * Update the global random number generator with more seeding
+ * material. Use the Hash_DRBG reseed algorithm from NIST SP-800-90
+ * section 10.1.1.3
+ *
+ * If entropy is NULL, it is fetched from the noise generator.
+ */
+static SECStatus
+prng_reseed(RNGContext *rng, const PRUint8 *entropy, unsigned int entropy_len,
+            const PRUint8 *additional_input, unsigned int additional_input_len)
+{
+    PRUint8 noiseData[(sizeof rng->V_Data) + PRNG_SEEDLEN];
+    PRUint8 *noise = &noiseData[0];
+
+    /* if entropy wasn't supplied, fetch it. (normal operation case) */
+    if (entropy == NULL) {
+        entropy_len = (unsigned int)RNG_SystemRNG(
+            &noiseData[sizeof rng->V_Data], PRNG_SEEDLEN);
+    } else {
+        /* NOTE: this code is only available for testing, not to applications */
+        /* if entropy was too big for the stack variable, get it from malloc */
+        if (entropy_len > PRNG_SEEDLEN) {
+            noise = PORT_Alloc(entropy_len + (sizeof rng->V_Data));
+            if (noise == NULL) {
+                return SECFailure;
+            }
+        }
+        PORT_Memcpy(&noise[sizeof rng->V_Data], entropy, entropy_len);
+    }
+
+    if (entropy_len < 256 / PR_BITS_PER_BYTE) {
+        /* noise == &noiseData[0] at this point, so nothing to free */
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        return SECFailure;
+    }
+
+    rng->V_type = prngReseedType;
+    PORT_Memcpy(noise, rng->V_Data, sizeof rng->V_Data);
+    prng_Hash_df(V(rng), VSize(rng), noise, (sizeof rng->V_Data) + entropy_len,
+                 additional_input, additional_input_len);
+    /* clear potential CSP */
+    PORT_Memset(noise, 0, (sizeof rng->V_Data) + entropy_len);
+    rng->V_type = prngCGenerateType;
+    prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0);
+    PRNG_RESET_RESEED_COUNT(rng)
+
+    if (noise != &noiseData[0]) {
+        PORT_Free(noise);
+    }
+    return SECSuccess;
+}
+
+/*
+ * SP 800-90 requires we rerun our health tests on reseed
+ */
+static SECStatus
+prng_reseed_test(RNGContext *rng, const PRUint8 *entropy,
+                 unsigned int entropy_len, const PRUint8 *additional_input,
+                 unsigned int additional_input_len)
+{
+    SECStatus rv;
+
+    /* do health checks in FIPS mode */
+    rv = PRNGTEST_RunHealthTests();
+    if (rv != SECSuccess) {
+        /* error set by PRNGTEST_RunHealTests() */
+        rng->isValid = PR_FALSE;
+        return SECFailure;
+    }
+    return prng_reseed(rng, entropy, entropy_len,
+                       additional_input, additional_input_len);
+}
+
+/*
+ * build some fast inline functions for adding.
+ */
+#define PRNG_ADD_CARRY_ONLY(dest, start, carry)    \
+    {                                              \
+        int k1;                                    \
+        for (k1 = start; carry && k1 >= 0; k1--) { \
+            carry = !(++dest[k1]);                 \
+        }                                          \
+    }
+
+/*
+ * NOTE: dest must be an array for the following to work.
+ */
+#define PRNG_ADD_BITS(dest, dest_len, add, len, carry)               \
+    carry = 0;                                                       \
+    PORT_Assert((dest_len) >= (len));                                \
+    {                                                                \
+        int k1, k2;                                                  \
+        for (k1 = dest_len - 1, k2 = len - 1; k2 >= 0; --k1, --k2) { \
+            carry += dest[k1] + add[k2];                             \
+            dest[k1] = (PRUint8)carry;                               \
+            carry >>= 8;                                             \
+        }                                                            \
+    }
+
+#define PRNG_ADD_BITS_AND_CARRY(dest, dest_len, add, len, carry) \
+    PRNG_ADD_BITS(dest, dest_len, add, len, carry)               \
+    PRNG_ADD_CARRY_ONLY(dest, dest_len - len - 1, carry)
+
+/*
+ * This function expands the internal state of the prng to fulfill any number
+ * of bytes we need for this request. We only use this call if we need more
+ * than can be supplied by a single call to SHA256_HashBuf.
+ *
+ * This function is specified in NIST SP 800-90 section 10.1.1.4, Hashgen
+ */
+static void
+prng_Hashgen(RNGContext *rng, PRUint8 *returned_bytes,
+             unsigned int no_of_returned_bytes)
+{
+    PRUint8 data[VSize(rng)];
+    PRUint8 thisHash[SHA256_LENGTH];
+    PRUint8 *lastHash = rng->lastOutput;
+
+    PORT_Memcpy(data, V(rng), VSize(rng));
+    while (no_of_returned_bytes) {
+        SHA256Context ctx;
+        unsigned int len;
+        unsigned int carry;
+
+        SHA256_Begin(&ctx);
+        SHA256_Update(&ctx, data, sizeof data);
+        SHA256_End(&ctx, thisHash, &len, SHA256_LENGTH);
+        if (PORT_Memcmp(lastHash, thisHash, len) == 0) {
+            rng->isValid = PR_FALSE;
+            break;
+        }
+        if (no_of_returned_bytes < SHA256_LENGTH) {
+            len = no_of_returned_bytes;
+        }
+        PORT_Memcpy(returned_bytes, thisHash, len);
+        lastHash = returned_bytes;
+        returned_bytes += len;
+        no_of_returned_bytes -= len;
+        /* The carry parameter is a bool (increment or not).
+     * This increments data if no_of_returned_bytes is not zero */
+        carry = no_of_returned_bytes;
+        PRNG_ADD_CARRY_ONLY(data, (sizeof data) - 1, carry);
+    }
+    PORT_Memcpy(rng->lastOutput, thisHash, SHA256_LENGTH);
+    PORT_Memset(data, 0, sizeof data);
+    PORT_Memset(thisHash, 0, sizeof thisHash);
+}
+
+/*
+ * Generates new random bytes and advances the internal prng state.
+ * additional bytes are only used in algorithm testing.
+ *
+ * This function is specified in NIST SP 800-90 section 10.1.1.4
+ */
+static SECStatus
+prng_generateNewBytes(RNGContext *rng,
+                      PRUint8 *returned_bytes, unsigned int no_of_returned_bytes,
+                      const PRUint8 *additional_input,
+                      unsigned int additional_input_len)
+{
+    PRUint8 H[SHA256_LENGTH]; /* both H and w since they
+                   * aren't used concurrently */
+    unsigned int carry;
+
+    if (!rng->isValid) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    /* This code only triggers during tests, normal
+     * prng operation does not use additional_input */
+    if (additional_input) {
+        SHA256Context ctx;
+/* NIST SP 800-90 defines two temporaries in their calculations,
+     * w and H. These temporaries are the same lengths, and used
+     * at different times, so we use the following macro to collapse
+     * them to the same variable, but keeping their unique names for
+     * easy comparison to the spec */
+#define w H
+        rng->V_type = prngAdditionalDataType;
+        SHA256_Begin(&ctx);
+        SHA256_Update(&ctx, rng->V_Data, sizeof rng->V_Data);
+        SHA256_Update(&ctx, additional_input, additional_input_len);
+        SHA256_End(&ctx, w, NULL, sizeof w);
+        PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), w, sizeof w, carry)
+        PORT_Memset(w, 0, sizeof w);
+#undef w
+    }
+
+    if (no_of_returned_bytes == SHA256_LENGTH) {
+        /* short_cut to hashbuf and a couple of copies and clears */
+        SHA256_HashBuf(returned_bytes, V(rng), VSize(rng));
+        /* continuous rng check */
+        if (memcmp(rng->lastOutput, returned_bytes, SHA256_LENGTH) == 0) {
+            rng->isValid = PR_FALSE;
+        }
+        PORT_Memcpy(rng->lastOutput, returned_bytes, sizeof rng->lastOutput);
+    } else {
+        prng_Hashgen(rng, returned_bytes, no_of_returned_bytes);
+    }
+    /* advance our internal state... */
+    rng->V_type = prngGenerateByteType;
+    SHA256_HashBuf(H, rng->V_Data, sizeof rng->V_Data);
+    PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), H, sizeof H, carry)
+    PRNG_ADD_BITS(V(rng), VSize(rng), rng->C, sizeof rng->C, carry);
+    PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), rng->reseed_counter,
+                            sizeof rng->reseed_counter, carry)
+    carry = 1;
+    PRNG_ADD_CARRY_ONLY(rng->reseed_counter, (sizeof rng->reseed_counter) - 1, carry);
+
+    /* if the prng failed, don't return any output, signal softoken */
+    if (!rng->isValid) {
+        PORT_Memset(returned_bytes, 0, no_of_returned_bytes);
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+/* Use NSPR to prevent RNG_RNGInit from being called from separate
+ * threads, creating a race condition.
+ */
+static const PRCallOnceType pristineCallOnce;
+static PRCallOnceType coRNGInit;
+static PRStatus
+rng_init(void)
+{
+    PRUint8 bytes[PRNG_SEEDLEN * 2]; /* entropy + nonce */
+    unsigned int numBytes;
+    SECStatus rv = SECSuccess;
+
+    if (globalrng == NULL) {
+        /* bytes needs to have enough space to hold
+     * a SHA256 hash value. Blow up at compile time if this isn't true */
+        PR_STATIC_ASSERT(sizeof(bytes) >= SHA256_LENGTH);
+        /* create a new global RNG context */
+        globalrng = &theGlobalRng;
+        PORT_Assert(NULL == globalrng->lock);
+        /* create a lock for it */
+        globalrng->lock = PZ_NewLock(nssILockOther);
+        if (globalrng->lock == NULL) {
+            globalrng = NULL;
+            PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+            return PR_FAILURE;
+        }
+
+        /* Try to get some seed data for the RNG */
+        numBytes = (unsigned int)RNG_SystemRNG(bytes, sizeof bytes);
+        PORT_Assert(numBytes == 0 || numBytes == sizeof bytes);
+        if (numBytes != 0) {
+            /* if this is our first call,  instantiate, otherwise reseed
+             * prng_instantiate gets a new clean state, we want to mix
+             * any previous entropy we may have collected */
+            if (V(globalrng)[0] == 0) {
+                rv = prng_instantiate(globalrng, bytes, numBytes);
+            } else {
+                rv = prng_reseed_test(globalrng, bytes, numBytes, NULL, 0);
+            }
+            memset(bytes, 0, numBytes);
+        } else {
+            PZ_DestroyLock(globalrng->lock);
+            globalrng->lock = NULL;
+            globalrng = NULL;
+            return PR_FAILURE;
+        }
+
+        if (rv != SECSuccess) {
+            return PR_FAILURE;
+        }
+        /* the RNG is in a valid state */
+        globalrng->isValid = PR_TRUE;
+        globalrng->isKatTest = PR_FALSE;
+
+        /* fetch one random value so that we can populate rng->oldV for our
+         * continous random number test. */
+        prng_generateNewBytes(globalrng, bytes, SHA256_LENGTH, NULL, 0);
+
+        /* Fetch more entropy into the PRNG */
+        RNG_SystemInfoForRNG();
+    }
+    return PR_SUCCESS;
+}
+
+/*
+ * Clean up the global RNG context
+ */
+static void
+prng_freeRNGContext(RNGContext *rng)
+{
+    PRUint8 inputhash[VSize(rng) + (sizeof rng->C)];
+
+    /* destroy context lock */
+    SKIP_AFTER_FORK(PZ_DestroyLock(globalrng->lock));
+
+    /* zero global RNG context except for C & V to preserve entropy */
+    prng_Hash_df(inputhash, sizeof rng->C, rng->C, sizeof rng->C, NULL, 0);
+    prng_Hash_df(&inputhash[sizeof rng->C], VSize(rng), V(rng), VSize(rng),
+                 NULL, 0);
+    memset(rng, 0, sizeof *rng);
+    memcpy(rng->C, inputhash, sizeof rng->C);
+    memcpy(V(rng), &inputhash[sizeof rng->C], VSize(rng));
+
+    memset(inputhash, 0, sizeof inputhash);
+}
+
+/*
+ * Public functions
+ */
+
+/*
+ * Initialize the global RNG context and give it some seed input taken
+ * from the system.  This function is thread-safe and will only allow
+ * the global context to be initialized once.  The seed input is likely
+ * small, so it is imperative that RNG_RandomUpdate() be called with
+ * additional seed data before the generator is used.  A good way to
+ * provide the generator with additional entropy is to call
+ * RNG_SystemInfoForRNG().  Note that C_Initialize() does exactly that.
+ */
+SECStatus
+RNG_RNGInit(void)
+{
+    /* Allow only one call to initialize the context */
+    PR_CallOnce(&coRNGInit, rng_init);
+    /* Make sure there is a context */
+    return (globalrng != NULL) ? SECSuccess : SECFailure;
+}
+
+/*
+** Update the global random number generator with more seeding
+** material.
+*/
+SECStatus
+RNG_RandomUpdate(const void *data, size_t bytes)
+{
+    SECStatus rv;
+
+    /* Make sure our assumption that size_t is unsigned is true */
+    PR_STATIC_ASSERT(((size_t)-1) > (size_t)1);
+
+#if defined(NS_PTR_GT_32) || (defined(NSS_USE_64) && !defined(NS_PTR_LE_32))
+    /*
+     * NIST 800-90 requires us to verify our inputs. This value can
+     * come from the application, so we need to make sure it's within the
+     * spec. The spec says it must be less than 2^32 bytes (2^35 bits).
+     * This can only happen if size_t is greater than 32 bits (i.e. on
+     * most 64 bit platforms). The 90% case (perhaps 100% case), size_t
+     * is less than or equal to 32 bits if the platform is not 64 bits, and
+     * greater than 32 bits if it is a 64 bit platform. The corner
+     * cases are handled with explicit defines NS_PTR_GT_32 and NS_PTR_LE_32.
+     *
+     * In general, neither NS_PTR_GT_32 nor NS_PTR_LE_32 will need to be
+     * defined. If you trip over the next two size ASSERTS at compile time,
+     * you will need to define them for your platform.
+     *
+     * if 'sizeof(size_t) > 4' is triggered it means that we were expecting
+     *   sizeof(size_t) to be greater than 4, but it wasn't. Setting
+     *   NS_PTR_LE_32 will correct that mistake.
+     *
+     * if 'sizeof(size_t) <= 4' is triggered, it means that we were expecting
+     *   sizeof(size_t) to be less than or equal to 4, but it wasn't. Setting
+     *   NS_PTR_GT_32 will correct that mistake.
+     */
+
+    PR_STATIC_ASSERT(sizeof(size_t) > 4);
+
+    if (bytes > (size_t)PRNG_MAX_ADDITIONAL_BYTES) {
+        bytes = PRNG_MAX_ADDITIONAL_BYTES;
+    }
+#else
+    PR_STATIC_ASSERT(sizeof(size_t) <= 4);
+#endif
+
+    PZ_Lock(globalrng->lock);
+    /* if we're passed more than our additionalDataCache, simply
+     * call reseed with that data */
+    if (bytes > sizeof(globalrng->additionalDataCache)) {
+        rv = prng_reseed_test(globalrng, NULL, 0, data, (unsigned int)bytes);
+        /* if we aren't going to fill or overflow the buffer, just cache it */
+    } else if (bytes < ((sizeof globalrng->additionalDataCache) - globalrng->additionalAvail)) {
+        PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail,
+                    data, bytes);
+        globalrng->additionalAvail += (PRUint32)bytes;
+        rv = SECSuccess;
+    } else {
+        /* we are going to fill or overflow the buffer. In this case we will
+         * fill the entropy buffer, reseed with it, start a new buffer with the
+         * remainder. We know the remainder will fit in the buffer because
+         * we already handled the case where bytes > the size of the buffer.
+         */
+        size_t bufRemain = (sizeof globalrng->additionalDataCache) - globalrng->additionalAvail;
+        /* fill the rest of the buffer */
+        if (bufRemain) {
+            PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail,
+                        data, bufRemain);
+            data = ((unsigned char *)data) + bufRemain;
+            bytes -= bufRemain;
+        }
+        /* reseed from buffer */
+        rv = prng_reseed_test(globalrng, NULL, 0,
+                              globalrng->additionalDataCache,
+                              sizeof globalrng->additionalDataCache);
+
+        /* copy the rest into the cache */
+        PORT_Memcpy(globalrng->additionalDataCache, data, bytes);
+        globalrng->additionalAvail = (PRUint32)bytes;
+    }
+
+    PZ_Unlock(globalrng->lock);
+    return rv;
+}
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+static SECStatus
+prng_GenerateGlobalRandomBytes(RNGContext *rng,
+                               void *dest, size_t len)
+{
+    SECStatus rv = SECSuccess;
+    PRUint8 *output = dest;
+    /* check for a valid global RNG context */
+    PORT_Assert(rng != NULL);
+    if (rng == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    /* FIPS limits the amount of entropy available in a single request */
+    if (len > PRNG_MAX_REQUEST_SIZE) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    /* --- LOCKED --- */
+    PZ_Lock(rng->lock);
+    /* Check the amount of seed data in the generator.  If not enough,
+     * don't produce any data.
+     */
+    if (rng->reseed_counter[0] >= RESEED_VALUE) {
+        rv = prng_reseed_test(rng, NULL, 0, NULL, 0);
+        PZ_Unlock(rng->lock);
+        if (rv != SECSuccess) {
+            return rv;
+        }
+        RNG_SystemInfoForRNG();
+        PZ_Lock(rng->lock);
+    }
+    /*
+     * see if we have enough bytes to fulfill the request.
+     */
+    if (len <= rng->dataAvail) {
+        memcpy(output, rng->data + ((sizeof rng->data) - rng->dataAvail), len);
+        memset(rng->data + ((sizeof rng->data) - rng->dataAvail), 0, len);
+        rng->dataAvail -= len;
+        rv = SECSuccess;
+        /* if we are asking for a small number of bytes, cache the rest of
+     * the bytes */
+    } else if (len < sizeof rng->data) {
+        rv = prng_generateNewBytes(rng, rng->data, sizeof rng->data,
+                                   rng->additionalAvail ? rng->additionalDataCache : NULL,
+                                   rng->additionalAvail);
+        rng->additionalAvail = 0;
+        if (rv == SECSuccess) {
+            memcpy(output, rng->data, len);
+            memset(rng->data, 0, len);
+            rng->dataAvail = (sizeof rng->data) - len;
+        }
+        /* we are asking for lots of bytes, just ask the generator to pass them */
+    } else {
+        rv = prng_generateNewBytes(rng, output, len,
+                                   rng->additionalAvail ? rng->additionalDataCache : NULL,
+                                   rng->additionalAvail);
+        rng->additionalAvail = 0;
+    }
+    PZ_Unlock(rng->lock);
+    /* --- UNLOCKED --- */
+    return rv;
+}
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+SECStatus
+RNG_GenerateGlobalRandomBytes(void *dest, size_t len)
+{
+#ifdef UNSAFE_FUZZER_MODE
+    return prng_GenerateDeterministicRandomBytes(globalrng->lock, dest, len);
+#else
+    return prng_GenerateGlobalRandomBytes(globalrng, dest, len);
+#endif
+}
+
+SECStatus
+RNG_ResetForFuzzing(void)
+{
+#ifdef UNSAFE_FUZZER_MODE
+    return prng_ResetForFuzzing(globalrng->lock);
+#else
+    return SECFailure;
+#endif
+}
+
+void
+RNG_RNGShutdown(void)
+{
+    /* check for a valid global RNG context */
+    PORT_Assert(globalrng != NULL);
+    if (globalrng == NULL) {
+        /* Should set a "not initialized" error code. */
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return;
+    }
+    /* clear */
+    prng_freeRNGContext(globalrng);
+    globalrng = NULL;
+    /* reset the callonce struct to allow a new call to RNG_RNGInit() */
+    coRNGInit = pristineCallOnce;
+}
+
+/*
+ * Test case interface. used by fips testing and power on self test
+ */
+/* make sure the test context is separate from the global context, This
+  * allows us to test the internal random number generator without losing
+  * entropy we may have previously collected. */
+RNGContext testContext;
+
+SECStatus
+PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len,
+                         const PRUint8 *nonce, unsigned int nonce_len,
+                         const PRUint8 *personal_string, unsigned int ps_len)
+{
+    testContext.isKatTest = PR_TRUE;
+    return PRNGTEST_Instantiate(entropy, entropy_len,
+                                nonce, nonce_len,
+                                personal_string, ps_len);
+}
+
+/*
+ * Test vector API. Use NIST SP 800-90 general interface so one of the
+ * other NIST SP 800-90 algorithms may be used in the future.
+ */
+SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+                     const PRUint8 *nonce, unsigned int nonce_len,
+                     const PRUint8 *personal_string, unsigned int ps_len)
+{
+    int bytes_len = entropy_len + nonce_len + ps_len;
+    PRUint8 *bytes = NULL;
+    SECStatus rv;
+
+    if (entropy_len < 256 / PR_BITS_PER_BYTE) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        return SECFailure;
+    }
+
+    bytes = PORT_Alloc(bytes_len);
+    if (bytes == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    /* concatenate the various inputs, internally NSS only instantiates with
+    * a single long string */
+    PORT_Memcpy(bytes, entropy, entropy_len);
+    if (nonce) {
+        PORT_Memcpy(&bytes[entropy_len], nonce, nonce_len);
+    } else {
+        PORT_Assert(nonce_len == 0);
+    }
+    if (personal_string) {
+        PORT_Memcpy(&bytes[entropy_len + nonce_len], personal_string, ps_len);
+    } else {
+        PORT_Assert(ps_len == 0);
+    }
+    rv = prng_instantiate(&testContext, bytes, bytes_len);
+    PORT_ZFree(bytes, bytes_len);
+    if (rv == SECFailure) {
+        return SECFailure;
+    }
+    testContext.isValid = PR_TRUE;
+    return SECSuccess;
+}
+
+SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+                const PRUint8 *additional, unsigned int additional_len)
+{
+    if (!testContext.isValid) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    /* This magic input tells us to set the reseed count to it's max count,
+     * so we can simulate PRNGTEST_Generate reaching max reseed count */
+    if ((entropy == NULL) && (entropy_len == 0) &&
+        (additional == NULL) && (additional_len == 0)) {
+        testContext.reseed_counter[0] = RESEED_VALUE;
+        return SECSuccess;
+    }
+    return prng_reseed(&testContext, entropy, entropy_len, additional,
+                       additional_len);
+}
+
+SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+                  const PRUint8 *additional, unsigned int additional_len)
+{
+    SECStatus rv;
+    if (!testContext.isValid) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    /* replicate reseed test from prng_GenerateGlobalRandomBytes */
+    if (testContext.reseed_counter[0] >= RESEED_VALUE) {
+        rv = prng_reseed(&testContext, NULL, 0, NULL, 0);
+        if (rv != SECSuccess) {
+            return rv;
+        }
+    }
+    return prng_generateNewBytes(&testContext, bytes, bytes_len,
+                                 additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Uninstantiate()
+{
+    if (!testContext.isValid) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    PORT_Memset(&testContext, 0, sizeof testContext);
+    return SECSuccess;
+}
+
+SECStatus
+PRNGTEST_RunHealthTests()
+{
+    static const PRUint8 entropy[] = {
+        0x8e, 0x9c, 0x0d, 0x25, 0x75, 0x22, 0x04, 0xf9,
+        0xc5, 0x79, 0x10, 0x8b, 0x23, 0x79, 0x37, 0x14,
+        0x9f, 0x2c, 0xc7, 0x0b, 0x39, 0xf8, 0xee, 0xef,
+        0x95, 0x0c, 0x97, 0x59, 0xfc, 0x0a, 0x85, 0x41,
+        0x76, 0x9d, 0x6d, 0x67, 0x00, 0x4e, 0x19, 0x12,
+        0x02, 0x16, 0x53, 0xea, 0xf2, 0x73, 0xd7, 0xd6,
+        0x7f, 0x7e, 0xc8, 0xae, 0x9c, 0x09, 0x99, 0x7d,
+        0xbb, 0x9e, 0x48, 0x7f, 0xbb, 0x96, 0x46, 0xb3,
+        0x03, 0x75, 0xf8, 0xc8, 0x69, 0x45, 0x3f, 0x97,
+        0x5e, 0x2e, 0x48, 0xe1, 0x5d, 0x58, 0x97, 0x4c
+    };
+    static const PRUint8 rng_known_result[] = {
+        0x16, 0xe1, 0x8c, 0x57, 0x21, 0xd8, 0xf1, 0x7e,
+        0x5a, 0xa0, 0x16, 0x0b, 0x7e, 0xa6, 0x25, 0xb4,
+        0x24, 0x19, 0xdb, 0x54, 0xfa, 0x35, 0x13, 0x66,
+        0xbb, 0xaa, 0x2a, 0x1b, 0x22, 0x33, 0x2e, 0x4a,
+        0x14, 0x07, 0x9d, 0x52, 0xfc, 0x73, 0x61, 0x48,
+        0xac, 0xc1, 0x22, 0xfc, 0xa4, 0xfc, 0xac, 0xa4,
+        0xdb, 0xda, 0x5b, 0x27, 0x33, 0xc4, 0xb3
+    };
+    static const PRUint8 reseed_entropy[] = {
+        0xc6, 0x0b, 0x0a, 0x30, 0x67, 0x07, 0xf4, 0xe2,
+        0x24, 0xa7, 0x51, 0x6f, 0x5f, 0x85, 0x3e, 0x5d,
+        0x67, 0x97, 0xb8, 0x3b, 0x30, 0x9c, 0x7a, 0xb1,
+        0x52, 0xc6, 0x1b, 0xc9, 0x46, 0xa8, 0x62, 0x79
+    };
+    static const PRUint8 additional_input[] = {
+        0x86, 0x82, 0x28, 0x98, 0xe7, 0xcb, 0x01, 0x14,
+        0xae, 0x87, 0x4b, 0x1d, 0x99, 0x1b, 0xc7, 0x41,
+        0x33, 0xff, 0x33, 0x66, 0x40, 0x95, 0x54, 0xc6,
+        0x67, 0x4d, 0x40, 0x2a, 0x1f, 0xf9, 0xeb, 0x65
+    };
+    static const PRUint8 rng_reseed_result[] = {
+        0x02, 0x0c, 0xc6, 0x17, 0x86, 0x49, 0xba, 0xc4,
+        0x7b, 0x71, 0x35, 0x05, 0xf0, 0xdb, 0x4a, 0xc2,
+        0x2c, 0x38, 0xc1, 0xa4, 0x42, 0xe5, 0x46, 0x4a,
+        0x7d, 0xf0, 0xbe, 0x47, 0x88, 0xb8, 0x0e, 0xc6,
+        0x25, 0x2b, 0x1d, 0x13, 0xef, 0xa6, 0x87, 0x96,
+        0xa3, 0x7d, 0x5b, 0x80, 0xc2, 0x38, 0x76, 0x61,
+        0xc7, 0x80, 0x5d, 0x0f, 0x05, 0x76, 0x85
+    };
+    static const PRUint8 rng_no_reseed_result[] = {
+        0xc4, 0x40, 0x41, 0x8c, 0xbf, 0x2f, 0x70, 0x23,
+        0x88, 0xf2, 0x7b, 0x30, 0xc3, 0xca, 0x1e, 0xf3,
+        0xef, 0x53, 0x81, 0x5d, 0x30, 0xed, 0x4c, 0xf1,
+        0xff, 0x89, 0xa5, 0xee, 0x92, 0xf8, 0xc0, 0x0f,
+        0x88, 0x53, 0xdf, 0xb6, 0x76, 0xf0, 0xaa, 0xd3,
+        0x2e, 0x1d, 0x64, 0x37, 0x3e, 0xe8, 0x4a, 0x02,
+        0xff, 0x0a, 0x7f, 0xe5, 0xe9, 0x2b, 0x6d
+    };
+
+    SECStatus rng_status = SECSuccess;
+    PR_STATIC_ASSERT(sizeof(rng_known_result) >= sizeof(rng_reseed_result));
+    PRUint8 result[sizeof(rng_known_result)];
+
+    /********************************************/
+    /*   First test instantiate error path.     */
+    /*   In this case we supply enough entropy, */
+    /*   but not enough seed. This will trigger */
+    /*   the code that checks for a entropy     */
+    /*   source failure.                        */
+    /********************************************/
+    rng_status = PRNGTEST_Instantiate(entropy, 256 / PR_BITS_PER_BYTE,
+                                      NULL, 0, NULL, 0);
+    if (rng_status == SECSuccess) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    /* we failed with the proper error code, we can continue */
+
+    /********************************************/
+    /* Generate random bytes with a known seed. */
+    /********************************************/
+    rng_status = PRNGTEST_Instantiate(entropy, sizeof entropy,
+                                      NULL, 0, NULL, 0);
+    if (rng_status != SECSuccess) {
+        /* Error set by PRNGTEST_Instantiate */
+        return SECFailure;
+    }
+    rng_status = PRNGTEST_Generate(result, sizeof rng_known_result, NULL, 0);
+    if ((rng_status != SECSuccess) ||
+        (PORT_Memcmp(result, rng_known_result,
+                     sizeof rng_known_result) != 0)) {
+        PRNGTEST_Uninstantiate();
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    rng_status = PRNGTEST_Reseed(reseed_entropy, sizeof reseed_entropy,
+                                 additional_input, sizeof additional_input);
+    if (rng_status != SECSuccess) {
+        /* Error set by PRNG_Reseed */
+        PRNGTEST_Uninstantiate();
+        return SECFailure;
+    }
+    rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0);
+    if ((rng_status != SECSuccess) ||
+        (PORT_Memcmp(result, rng_reseed_result,
+                     sizeof rng_reseed_result) != 0)) {
+        PRNGTEST_Uninstantiate();
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    /* This magic forces the reseed count to it's max count, so we can see if
+     * PRNGTEST_Generate will actually when it reaches it's count */
+    rng_status = PRNGTEST_Reseed(NULL, 0, NULL, 0);
+    if (rng_status != SECSuccess) {
+        PRNGTEST_Uninstantiate();
+        /* Error set by PRNG_Reseed */
+        return SECFailure;
+    }
+    /* This generate should now reseed */
+    rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0);
+    if ((rng_status != SECSuccess) ||
+        /* NOTE we fail if the result is equal to the no_reseed_result.
+         * no_reseed_result is the value we would have gotten if we didn't
+         * do an automatic reseed in PRNGTEST_Generate */
+        (PORT_Memcmp(result, rng_no_reseed_result,
+                     sizeof rng_no_reseed_result) == 0)) {
+        PRNGTEST_Uninstantiate();
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    /* make sure reseed fails when we don't supply enough entropy */
+    rng_status = PRNGTEST_Reseed(reseed_entropy, 4, NULL, 0);
+    if (rng_status == SECSuccess) {
+        PRNGTEST_Uninstantiate();
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) {
+        PRNGTEST_Uninstantiate();
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    rng_status = PRNGTEST_Uninstantiate();
+    if (rng_status != SECSuccess) {
+        /* Error set by PRNG_Uninstantiate */
+        return rng_status;
+    }
+    /* make sure uninstantiate fails if the contest is not initiated (also tests
+     * if the context was cleared in the previous Uninstantiate) */
+    rng_status = PRNGTEST_Uninstantiate();
+    if (rng_status == SECSuccess) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    if (PORT_GetError() != SEC_ERROR_LIBRARY_FAILURE) {
+        return rng_status;
+    }
+
+    return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/dsa.c b/security/nss/lib/freebl/dsa.c
new file mode 100644
index 000000000..9324d306b
--- /dev/null
+++ b/security/nss/lib/freebl/dsa.c
@@ -0,0 +1,647 @@
+/*
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerror.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "nssilock.h"
+#include "secitem.h"
+#include "blapi.h"
+#include "mpi.h"
+#include "secmpi.h"
+#include "pqg.h"
+
+/* XXX to be replaced by define in blapit.h */
+#define NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE 2048
+
+/*
+ * FIPS 186-2 requires result from random output to be reduced mod q when
+ * generating random numbers for DSA.
+ *
+ * Input: w, 2*qLen bytes
+ *        q, qLen bytes
+ * Output: xj, qLen bytes
+ */
+static SECStatus
+fips186Change_ReduceModQForDSA(const PRUint8 *w, const PRUint8 *q,
+                               unsigned int qLen, PRUint8 *xj)
+{
+    mp_int W, Q, Xj;
+    mp_err err;
+    SECStatus rv = SECSuccess;
+
+    /* Initialize MPI integers. */
+    MP_DIGITS(&W) = 0;
+    MP_DIGITS(&Q) = 0;
+    MP_DIGITS(&Xj) = 0;
+    CHECK_MPI_OK(mp_init(&W));
+    CHECK_MPI_OK(mp_init(&Q));
+    CHECK_MPI_OK(mp_init(&Xj));
+    /*
+     * Convert input arguments into MPI integers.
+     */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&W, w, 2 * qLen));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&Q, q, qLen));
+
+    /*
+     * Algorithm 1 of FIPS 186-2 Change Notice 1, Step 3.3
+     *
+     * xj = (w0 || w1) mod q
+     */
+    CHECK_MPI_OK(mp_mod(&W, &Q, &Xj));
+    CHECK_MPI_OK(mp_to_fixlen_octets(&Xj, xj, qLen));
+cleanup:
+    mp_clear(&W);
+    mp_clear(&Q);
+    mp_clear(&Xj);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/*
+ * FIPS 186-2 requires result from random output to be reduced mod q when
+ * generating random numbers for DSA.
+ */
+SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+                               const unsigned char *q,
+                               unsigned char *xj)
+{
+    return fips186Change_ReduceModQForDSA(w, q, DSA1_SUBPRIME_LEN, xj);
+}
+
+/*
+ * The core of Algorithm 1 of FIPS 186-2 Change Notice 1.
+ *
+ * We no longer support FIPS 186-2 RNG. This function was exported
+ * for power-up self tests and FIPS tests. Keep this stub, which fails,
+ * to prevent crashes, but also to signal to test code that FIPS 186-2
+ * RNG is no longer supported.
+ */
+SECStatus
+FIPS186Change_GenerateX(PRUint8 *XKEY, const PRUint8 *XSEEDj,
+                        PRUint8 *x_j)
+{
+    PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+    return SECFailure;
+}
+
+/*
+ * Specialized RNG for DSA
+ *
+ * As per Algorithm 1 of FIPS 186-2 Change Notice 1, in step 3.3 the value
+ * Xj should be reduced mod q, a 160-bit prime number.  Since this parameter
+ * is only meaningful in the context of DSA, the above RNG functions
+ * were implemented without it.  They are re-implemented below for use
+ * with DSA.
+ */
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.  In DSA mode, so there is a q.
+*/
+static SECStatus
+dsa_GenerateGlobalRandomBytes(const SECItem *qItem, PRUint8 *dest,
+                              unsigned int *destLen, unsigned int maxDestLen)
+{
+    SECStatus rv;
+    SECItem w;
+    const PRUint8 *q = qItem->data;
+    unsigned int qLen = qItem->len;
+
+    if (*q == 0) {
+        ++q;
+        --qLen;
+    }
+    if (maxDestLen < qLen) {
+        /* This condition can occur when DSA_SignDigest is passed a group
+           with a subprime that is larger than DSA_MAX_SUBPRIME_LEN. */
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    w.data = NULL; /* otherwise SECITEM_AllocItem asserts */
+    if (!SECITEM_AllocItem(NULL, &w, 2 * qLen)) {
+        return SECFailure;
+    }
+    *destLen = qLen;
+
+    rv = RNG_GenerateGlobalRandomBytes(w.data, w.len);
+    if (rv == SECSuccess) {
+        rv = fips186Change_ReduceModQForDSA(w.data, q, qLen, dest);
+    }
+
+    SECITEM_FreeItem(&w, PR_FALSE);
+    return rv;
+}
+
+static void
+translate_mpi_error(mp_err err)
+{
+    MP_TO_SEC_ERROR(err);
+}
+
+static SECStatus
+dsa_NewKeyExtended(const PQGParams *params, const SECItem *seed,
+                   DSAPrivateKey **privKey)
+{
+    mp_int p, g;
+    mp_int x, y;
+    mp_err err;
+    PLArenaPool *arena;
+    DSAPrivateKey *key;
+    /* Check args. */
+    if (!params || !privKey || !seed || !seed->data) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    /* Initialize an arena for the DSA key. */
+    arena = PORT_NewArena(NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE);
+    if (!arena) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    key = (DSAPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DSAPrivateKey));
+    if (!key) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        PORT_FreeArena(arena, PR_TRUE);
+        return SECFailure;
+    }
+    key->params.arena = arena;
+    /* Initialize MPI integers. */
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&g) = 0;
+    MP_DIGITS(&x) = 0;
+    MP_DIGITS(&y) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&g));
+    CHECK_MPI_OK(mp_init(&x));
+    CHECK_MPI_OK(mp_init(&y));
+    /* Copy over the PQG params */
+    CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.prime,
+                                  &params->prime));
+    CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.subPrime,
+                                  &params->subPrime));
+    CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.base, &params->base));
+    /* Convert stored p, g, and received x into MPI integers. */
+    SECITEM_TO_MPINT(params->prime, &p);
+    SECITEM_TO_MPINT(params->base, &g);
+    OCTETS_TO_MPINT(seed->data, &x, seed->len);
+    /* Store x in private key */
+    SECITEM_AllocItem(arena, &key->privateValue, seed->len);
+    PORT_Memcpy(key->privateValue.data, seed->data, seed->len);
+    /* Compute public key y = g**x mod p */
+    CHECK_MPI_OK(mp_exptmod(&g, &x, &p, &y));
+    /* Store y in public key */
+    MPINT_TO_SECITEM(&y, &key->publicValue, arena);
+    *privKey = key;
+    key = NULL;
+cleanup:
+    mp_clear(&p);
+    mp_clear(&g);
+    mp_clear(&x);
+    mp_clear(&y);
+    if (key)
+        PORT_FreeArena(key->params.arena, PR_TRUE);
+    if (err) {
+        translate_mpi_error(err);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+SECStatus
+DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed)
+{
+    int retries = 10;
+    unsigned int i;
+    PRBool good;
+
+    if (q == NULL || q->data == NULL || q->len == 0 ||
+        (q->data[0] == 0 && q->len == 1)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    if (!SECITEM_AllocItem(arena, seed, q->len)) {
+        return SECFailure;
+    }
+
+    do {
+        /* Generate seed bytes for x according to FIPS 186-1 appendix 3 */
+        if (dsa_GenerateGlobalRandomBytes(q, seed->data, &seed->len,
+                                          seed->len)) {
+            goto loser;
+        }
+        /* Disallow values of 0 and 1 for x. */
+        good = PR_FALSE;
+        for (i = 0; i < seed->len - 1; i++) {
+            if (seed->data[i] != 0) {
+                good = PR_TRUE;
+                break;
+            }
+        }
+        if (!good && seed->data[i] > 1) {
+            good = PR_TRUE;
+        }
+    } while (!good && --retries > 0);
+
+    if (!good) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+    loser:
+        if (arena != NULL) {
+            SECITEM_FreeItem(seed, PR_FALSE);
+        }
+        return SECFailure;
+    }
+
+    return SECSuccess;
+}
+
+/*
+** Generate and return a new DSA public and private key pair,
+**  both of which are encoded into a single DSAPrivateKey struct.
+**  "params" is a pointer to the PQG parameters for the domain
+**  Uses a random seed.
+*/
+SECStatus
+DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey)
+{
+    SECItem seed;
+    SECStatus rv;
+
+    rv = PQG_Check(params);
+    if (rv != SECSuccess) {
+        return rv;
+    }
+    seed.data = NULL;
+
+    rv = DSA_NewRandom(NULL, &params->subPrime, &seed);
+    if (rv == SECSuccess) {
+        if (seed.len != PQG_GetLength(&params->subPrime)) {
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            rv = SECFailure;
+        } else {
+            rv = dsa_NewKeyExtended(params, &seed, privKey);
+        }
+    }
+    SECITEM_FreeItem(&seed, PR_FALSE);
+    return rv;
+}
+
+/* For FIPS compliance testing. Seed must be exactly the size of subPrime  */
+SECStatus
+DSA_NewKeyFromSeed(const PQGParams *params,
+                   const unsigned char *seed,
+                   DSAPrivateKey **privKey)
+{
+    SECItem seedItem;
+    seedItem.data = (unsigned char *)seed;
+    seedItem.len = PQG_GetLength(&params->subPrime);
+    return dsa_NewKeyExtended(params, &seedItem, privKey);
+}
+
+static SECStatus
+dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
+               const unsigned char *kb)
+{
+    mp_int p, q, g; /* PQG parameters */
+    mp_int x, k;    /* private key & pseudo-random integer */
+    mp_int r, s;    /* tuple (r, s) is signature) */
+    mp_int t;       /* holding tmp values */
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    unsigned int dsa_subprime_len, dsa_signature_len, offset;
+    SECItem localDigest;
+    unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN];
+    SECItem t2 = { siBuffer, NULL, 0 };
+
+    /* FIPS-compliance dictates that digest is a SHA hash. */
+    /* Check args. */
+    if (!key || !signature || !digest) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+    dsa_signature_len = dsa_subprime_len * 2;
+    if ((signature->len < dsa_signature_len) ||
+        (digest->len > HASH_LENGTH_MAX) ||
+        (digest->len < SHA1_LENGTH)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* DSA accepts digests not equal to dsa_subprime_len, if the
+     * digests are greater, then they are truncated to the size of
+     * dsa_subprime_len, using the left most bits. If they are less
+     * then they are padded on the left.*/
+    PORT_Memset(localDigestData, 0, dsa_subprime_len);
+    offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0;
+    PORT_Memcpy(localDigestData + offset, digest->data,
+                dsa_subprime_len - offset);
+    localDigest.data = localDigestData;
+    localDigest.len = dsa_subprime_len;
+
+    /* Initialize MPI integers. */
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&g) = 0;
+    MP_DIGITS(&x) = 0;
+    MP_DIGITS(&k) = 0;
+    MP_DIGITS(&r) = 0;
+    MP_DIGITS(&s) = 0;
+    MP_DIGITS(&t) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&g));
+    CHECK_MPI_OK(mp_init(&x));
+    CHECK_MPI_OK(mp_init(&k));
+    CHECK_MPI_OK(mp_init(&r));
+    CHECK_MPI_OK(mp_init(&s));
+    CHECK_MPI_OK(mp_init(&t));
+    /*
+    ** Convert stored PQG and private key into MPI integers.
+    */
+    SECITEM_TO_MPINT(key->params.prime, &p);
+    SECITEM_TO_MPINT(key->params.subPrime, &q);
+    SECITEM_TO_MPINT(key->params.base, &g);
+    SECITEM_TO_MPINT(key->privateValue, &x);
+    OCTETS_TO_MPINT(kb, &k, dsa_subprime_len);
+    /*
+    ** FIPS 186-1, Section 5, Step 1
+    **
+    ** r = (g**k mod p) mod q
+    */
+    CHECK_MPI_OK(mp_exptmod(&g, &k, &p, &r)); /* r = g**k mod p */
+    CHECK_MPI_OK(mp_mod(&r, &q, &r));         /* r = r mod q    */
+    /*
+    ** FIPS 186-1, Section 5, Step 2
+    **
+    ** s = (k**-1 * (HASH(M) + x*r)) mod q
+    */
+    if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    SECITEM_TO_MPINT(t2, &t);                /* t <-$ Zq */
+    CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
+    CHECK_MPI_OK(mp_invmod(&k, &q, &k));     /* k = k**-1 mod q */
+    CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
+    SECITEM_TO_MPINT(localDigest, &s);       /* s = HASH(M)     */
+    CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */
+    CHECK_MPI_OK(mp_addmod(&s, &x, &q, &s)); /* s = s + x mod q */
+    CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */
+    /*
+    ** verify r != 0 and s != 0
+    ** mentioned as optional in FIPS 186-1.
+    */
+    if (mp_cmp_z(&r) == 0 || mp_cmp_z(&s) == 0) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    /*
+    ** Step 4
+    **
+    ** Signature is tuple (r, s)
+    */
+    err = mp_to_fixlen_octets(&r, signature->data, dsa_subprime_len);
+    if (err < 0)
+        goto cleanup;
+    err = mp_to_fixlen_octets(&s, signature->data + dsa_subprime_len,
+                              dsa_subprime_len);
+    if (err < 0)
+        goto cleanup;
+    err = MP_OKAY;
+    signature->len = dsa_signature_len;
+cleanup:
+    PORT_Memset(localDigestData, 0, DSA_MAX_SUBPRIME_LEN);
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&g);
+    mp_clear(&x);
+    mp_clear(&k);
+    mp_clear(&r);
+    mp_clear(&s);
+    mp_clear(&t);
+    SECITEM_FreeItem(&t2, PR_FALSE);
+    if (err) {
+        translate_mpi_error(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/* signature is caller-supplied buffer of at least 40 bytes.
+** On input,  signature->len == size of buffer to hold signature.
+**            digest->len    == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+SECStatus
+DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+    SECStatus rv;
+    int retries = 10;
+    unsigned char kSeed[DSA_MAX_SUBPRIME_LEN];
+    unsigned int kSeedLen = 0;
+    unsigned int i;
+    unsigned int dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+    PRBool good;
+
+    PORT_SetError(0);
+    do {
+        rv = dsa_GenerateGlobalRandomBytes(&key->params.subPrime,
+                                           kSeed, &kSeedLen, sizeof kSeed);
+        if (rv != SECSuccess)
+            break;
+        if (kSeedLen != dsa_subprime_len) {
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            rv = SECFailure;
+            break;
+        }
+        /* Disallow a value of 0 for k. */
+        good = PR_FALSE;
+        for (i = 0; i < kSeedLen; i++) {
+            if (kSeed[i] != 0) {
+                good = PR_TRUE;
+                break;
+            }
+        }
+        if (!good) {
+            PORT_SetError(SEC_ERROR_NEED_RANDOM);
+            rv = SECFailure;
+            continue;
+        }
+        rv = dsa_SignDigest(key, signature, digest, kSeed);
+    } while (rv != SECSuccess && PORT_GetError() == SEC_ERROR_NEED_RANDOM &&
+             --retries > 0);
+    return rv;
+}
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes. */
+SECStatus
+DSA_SignDigestWithSeed(DSAPrivateKey *key,
+                       SECItem *signature,
+                       const SECItem *digest,
+                       const unsigned char *seed)
+{
+    SECStatus rv;
+    rv = dsa_SignDigest(key, signature, digest, seed);
+    return rv;
+}
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input,  signature->len == size of buffer to hold signature.
+**            digest->len    == size of digest.
+*/
+SECStatus
+DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature,
+                 const SECItem *digest)
+{
+    /* FIPS-compliance dictates that digest is a SHA hash. */
+    mp_int p, q, g;      /* PQG parameters */
+    mp_int r_, s_;       /* tuple (r', s') is received signature) */
+    mp_int u1, u2, v, w; /* intermediate values used in verification */
+    mp_int y;            /* public key */
+    mp_err err;
+    unsigned int dsa_subprime_len, dsa_signature_len, offset;
+    SECItem localDigest;
+    unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN];
+    SECStatus verified = SECFailure;
+
+    /* Check args. */
+    if (!key || !signature || !digest) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+    dsa_signature_len = dsa_subprime_len * 2;
+    if ((signature->len != dsa_signature_len) ||
+        (digest->len > HASH_LENGTH_MAX) ||
+        (digest->len < SHA1_LENGTH)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* DSA accepts digests not equal to dsa_subprime_len, if the
+     * digests are greater, than they are truncated to the size of
+     * dsa_subprime_len, using the left most bits. If they are less
+     * then they are padded on the left.*/
+    PORT_Memset(localDigestData, 0, dsa_subprime_len);
+    offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0;
+    PORT_Memcpy(localDigestData + offset, digest->data,
+                dsa_subprime_len - offset);
+    localDigest.data = localDigestData;
+    localDigest.len = dsa_subprime_len;
+
+    /* Initialize MPI integers. */
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&g) = 0;
+    MP_DIGITS(&y) = 0;
+    MP_DIGITS(&r_) = 0;
+    MP_DIGITS(&s_) = 0;
+    MP_DIGITS(&u1) = 0;
+    MP_DIGITS(&u2) = 0;
+    MP_DIGITS(&v) = 0;
+    MP_DIGITS(&w) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&g));
+    CHECK_MPI_OK(mp_init(&y));
+    CHECK_MPI_OK(mp_init(&r_));
+    CHECK_MPI_OK(mp_init(&s_));
+    CHECK_MPI_OK(mp_init(&u1));
+    CHECK_MPI_OK(mp_init(&u2));
+    CHECK_MPI_OK(mp_init(&v));
+    CHECK_MPI_OK(mp_init(&w));
+    /*
+    ** Convert stored PQG and public key into MPI integers.
+    */
+    SECITEM_TO_MPINT(key->params.prime, &p);
+    SECITEM_TO_MPINT(key->params.subPrime, &q);
+    SECITEM_TO_MPINT(key->params.base, &g);
+    SECITEM_TO_MPINT(key->publicValue, &y);
+    /*
+    ** Convert received signature (r', s') into MPI integers.
+    */
+    OCTETS_TO_MPINT(signature->data, &r_, dsa_subprime_len);
+    OCTETS_TO_MPINT(signature->data + dsa_subprime_len, &s_, dsa_subprime_len);
+    /*
+    ** Verify that 0 < r' < q and 0 < s' < q
+    */
+    if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 ||
+        mp_cmp(&r_, &q) >= 0 || mp_cmp(&s_, &q) >= 0) {
+        /* err is zero here. */
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        goto cleanup; /* will return verified == SECFailure */
+    }
+    /*
+    ** FIPS 186-1, Section 6, Step 1
+    **
+    ** w = (s')**-1 mod q
+    */
+    CHECK_MPI_OK(mp_invmod(&s_, &q, &w)); /* w = (s')**-1 mod q */
+    /*
+    ** FIPS 186-1, Section 6, Step 2
+    **
+    ** u1 = ((Hash(M')) * w) mod q
+    */
+    SECITEM_TO_MPINT(localDigest, &u1);        /* u1 = HASH(M')     */
+    CHECK_MPI_OK(mp_mulmod(&u1, &w, &q, &u1)); /* u1 = u1 * w mod q */
+    /*
+    ** FIPS 186-1, Section 6, Step 3
+    **
+    ** u2 = ((r') * w) mod q
+    */
+    CHECK_MPI_OK(mp_mulmod(&r_, &w, &q, &u2));
+    /*
+    ** FIPS 186-1, Section 6, Step 4
+    **
+    ** v = ((g**u1 * y**u2) mod p) mod q
+    */
+    CHECK_MPI_OK(mp_exptmod(&g, &u1, &p, &g)); /* g = g**u1 mod p */
+    CHECK_MPI_OK(mp_exptmod(&y, &u2, &p, &y)); /* y = y**u2 mod p */
+    CHECK_MPI_OK(mp_mulmod(&g, &y, &p, &v));   /* v = g * y mod p */
+    CHECK_MPI_OK(mp_mod(&v, &q, &v));          /* v = v mod q     */
+    /*
+    ** Verification:  v == r'
+    */
+    if (mp_cmp(&v, &r_)) {
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        verified = SECFailure; /* Signature failed to verify. */
+    } else {
+        verified = SECSuccess; /* Signature verified. */
+    }
+cleanup:
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&g);
+    mp_clear(&y);
+    mp_clear(&r_);
+    mp_clear(&s_);
+    mp_clear(&u1);
+    mp_clear(&u2);
+    mp_clear(&v);
+    mp_clear(&w);
+    if (err) {
+        translate_mpi_error(err);
+    }
+    return verified;
+}
diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c
new file mode 100644
index 000000000..12bfeed41
--- /dev/null
+++ b/security/nss/lib/freebl/ec.c
@@ -0,0 +1,1159 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "prerr.h"
+#include "secerr.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "mplogic.h"
+#include "ec.h"
+#include "ecl.h"
+
+#ifndef NSS_DISABLE_ECC
+
+static const ECMethod kMethods[] = {
+    { ECCurve25519,
+      ec_Curve25519_pt_mul,
+      ec_Curve25519_pt_validate }
+};
+
+static const ECMethod *
+ec_get_method_from_name(ECCurveName name)
+{
+    int i;
+    for (i = 0; i < sizeof(kMethods) / sizeof(kMethods[0]); ++i) {
+        if (kMethods[i].name == name) {
+            return &kMethods[i];
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Returns true if pointP is the point at infinity, false otherwise
+ */
+PRBool
+ec_point_at_infinity(SECItem *pointP)
+{
+    unsigned int i;
+
+    for (i = 1; i < pointP->len; i++) {
+        if (pointP->data[i] != 0x00)
+            return PR_FALSE;
+    }
+
+    return PR_TRUE;
+}
+
+/*
+ * Computes scalar point multiplication pointQ = k1 * G + k2 * pointP for
+ * the curve whose parameters are encoded in params with base point G.
+ */
+SECStatus
+ec_points_mul(const ECParams *params, const mp_int *k1, const mp_int *k2,
+              const SECItem *pointP, SECItem *pointQ)
+{
+    mp_int Px, Py, Qx, Qy;
+    mp_int Gx, Gy, order, irreducible, a, b;
+    ECGroup *group = NULL;
+    SECStatus rv = SECFailure;
+    mp_err err = MP_OKAY;
+    int len;
+
+#if EC_DEBUG
+    int i;
+    char mpstr[256];
+
+    printf("ec_points_mul: params [len=%d]:", params->DEREncoding.len);
+    for (i = 0; i < params->DEREncoding.len; i++)
+        printf("%02x:", params->DEREncoding.data[i]);
+    printf("\n");
+
+    if (k1 != NULL) {
+        mp_tohex((mp_int *)k1, mpstr);
+        printf("ec_points_mul: scalar k1: %s\n", mpstr);
+        mp_todecimal((mp_int *)k1, mpstr);
+        printf("ec_points_mul: scalar k1: %s (dec)\n", mpstr);
+    }
+
+    if (k2 != NULL) {
+        mp_tohex((mp_int *)k2, mpstr);
+        printf("ec_points_mul: scalar k2: %s\n", mpstr);
+        mp_todecimal((mp_int *)k2, mpstr);
+        printf("ec_points_mul: scalar k2: %s (dec)\n", mpstr);
+    }
+
+    if (pointP != NULL) {
+        printf("ec_points_mul: pointP [len=%d]:", pointP->len);
+        for (i = 0; i < pointP->len; i++)
+            printf("%02x:", pointP->data[i]);
+        printf("\n");
+    }
+#endif
+
+    /* NOTE: We only support uncompressed points for now */
+    len = (params->fieldID.size + 7) >> 3;
+    if (pointP != NULL) {
+        if ((pointP->data[0] != EC_POINT_FORM_UNCOMPRESSED) ||
+            (pointP->len != (2 * len + 1))) {
+            PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM);
+            return SECFailure;
+        };
+    }
+
+    MP_DIGITS(&Px) = 0;
+    MP_DIGITS(&Py) = 0;
+    MP_DIGITS(&Qx) = 0;
+    MP_DIGITS(&Qy) = 0;
+    MP_DIGITS(&Gx) = 0;
+    MP_DIGITS(&Gy) = 0;
+    MP_DIGITS(&order) = 0;
+    MP_DIGITS(&irreducible) = 0;
+    MP_DIGITS(&a) = 0;
+    MP_DIGITS(&b) = 0;
+    CHECK_MPI_OK(mp_init(&Px));
+    CHECK_MPI_OK(mp_init(&Py));
+    CHECK_MPI_OK(mp_init(&Qx));
+    CHECK_MPI_OK(mp_init(&Qy));
+    CHECK_MPI_OK(mp_init(&Gx));
+    CHECK_MPI_OK(mp_init(&Gy));
+    CHECK_MPI_OK(mp_init(&order));
+    CHECK_MPI_OK(mp_init(&irreducible));
+    CHECK_MPI_OK(mp_init(&a));
+    CHECK_MPI_OK(mp_init(&b));
+
+    if ((k2 != NULL) && (pointP != NULL)) {
+        /* Initialize Px and Py */
+        CHECK_MPI_OK(mp_read_unsigned_octets(&Px, pointP->data + 1, (mp_size)len));
+        CHECK_MPI_OK(mp_read_unsigned_octets(&Py, pointP->data + 1 + len, (mp_size)len));
+    }
+
+    /* construct from named params, if possible */
+    if (params->name != ECCurve_noName) {
+        group = ECGroup_fromName(params->name);
+    }
+
+    if (group == NULL)
+        goto cleanup;
+
+    if ((k2 != NULL) && (pointP != NULL)) {
+        CHECK_MPI_OK(ECPoints_mul(group, k1, k2, &Px, &Py, &Qx, &Qy));
+    } else {
+        CHECK_MPI_OK(ECPoints_mul(group, k1, NULL, NULL, NULL, &Qx, &Qy));
+    }
+
+    /* Construct the SECItem representation of point Q */
+    pointQ->data[0] = EC_POINT_FORM_UNCOMPRESSED;
+    CHECK_MPI_OK(mp_to_fixlen_octets(&Qx, pointQ->data + 1,
+                                     (mp_size)len));
+    CHECK_MPI_OK(mp_to_fixlen_octets(&Qy, pointQ->data + 1 + len,
+                                     (mp_size)len));
+
+    rv = SECSuccess;
+
+#if EC_DEBUG
+    printf("ec_points_mul: pointQ [len=%d]:", pointQ->len);
+    for (i = 0; i < pointQ->len; i++)
+        printf("%02x:", pointQ->data[i]);
+    printf("\n");
+#endif
+
+cleanup:
+    ECGroup_free(group);
+    mp_clear(&Px);
+    mp_clear(&Py);
+    mp_clear(&Qx);
+    mp_clear(&Qy);
+    mp_clear(&Gx);
+    mp_clear(&Gy);
+    mp_clear(&order);
+    mp_clear(&irreducible);
+    mp_clear(&a);
+    mp_clear(&b);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+
+    return rv;
+}
+#endif /* NSS_DISABLE_ECC */
+
+/* Generates a new EC key pair. The private key is a supplied
+ * value and the public key is the result of performing a scalar
+ * point multiplication of that value with the curve's base point.
+ */
+SECStatus
+ec_NewKey(ECParams *ecParams, ECPrivateKey **privKey,
+          const unsigned char *privKeyBytes, int privKeyLen)
+{
+    SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+    PLArenaPool *arena;
+    ECPrivateKey *key;
+    mp_int k;
+    mp_err err = MP_OKAY;
+    int len;
+
+#if EC_DEBUG
+    printf("ec_NewKey called\n");
+#endif
+    MP_DIGITS(&k) = 0;
+
+    if (!ecParams || !privKey || !privKeyBytes || (privKeyLen < 0) ||
+        !ecParams->name) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* Initialize an arena for the EC key. */
+    if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE)))
+        return SECFailure;
+
+    key = (ECPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(ECPrivateKey));
+    if (!key) {
+        PORT_FreeArena(arena, PR_TRUE);
+        return SECFailure;
+    }
+
+    /* Set the version number (SEC 1 section C.4 says it should be 1) */
+    SECITEM_AllocItem(arena, &key->version, 1);
+    key->version.data[0] = 1;
+
+    /* Copy all of the fields from the ECParams argument to the
+     * ECParams structure within the private key.
+     */
+    key->ecParams.arena = arena;
+    key->ecParams.type = ecParams->type;
+    key->ecParams.fieldID.size = ecParams->fieldID.size;
+    key->ecParams.fieldID.type = ecParams->fieldID.type;
+    if (ecParams->fieldID.type == ec_field_GFp ||
+        ecParams->fieldID.type == ec_field_plain) {
+        CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.prime,
+                                      &ecParams->fieldID.u.prime));
+    } else {
+        CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.poly,
+                                      &ecParams->fieldID.u.poly));
+    }
+    key->ecParams.fieldID.k1 = ecParams->fieldID.k1;
+    key->ecParams.fieldID.k2 = ecParams->fieldID.k2;
+    key->ecParams.fieldID.k3 = ecParams->fieldID.k3;
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.a,
+                                  &ecParams->curve.a));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.b,
+                                  &ecParams->curve.b));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.seed,
+                                  &ecParams->curve.seed));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.base,
+                                  &ecParams->base));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.order,
+                                  &ecParams->order));
+    key->ecParams.cofactor = ecParams->cofactor;
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.DEREncoding,
+                                  &ecParams->DEREncoding));
+    key->ecParams.name = ecParams->name;
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curveOID,
+                                  &ecParams->curveOID));
+
+    SECITEM_AllocItem(arena, &key->publicValue, EC_GetPointSize(ecParams));
+    len = ecParams->order.len;
+    SECITEM_AllocItem(arena, &key->privateValue, len);
+
+    /* Copy private key */
+    if (privKeyLen >= len) {
+        memcpy(key->privateValue.data, privKeyBytes, len);
+    } else {
+        memset(key->privateValue.data, 0, (len - privKeyLen));
+        memcpy(key->privateValue.data + (len - privKeyLen), privKeyBytes, privKeyLen);
+    }
+
+    /* Compute corresponding public key */
+
+    /* Use curve specific code for point multiplication */
+    if (ecParams->fieldID.type == ec_field_plain) {
+        const ECMethod *method = ec_get_method_from_name(ecParams->name);
+        if (method == NULL || method->mul == NULL) {
+            /* unknown curve */
+            rv = SECFailure;
+            goto cleanup;
+        }
+        rv = method->mul(&key->publicValue, &key->privateValue, NULL);
+        goto done;
+    }
+
+    CHECK_MPI_OK(mp_init(&k));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&k, key->privateValue.data,
+                                         (mp_size)len));
+
+    rv = ec_points_mul(ecParams, &k, NULL, NULL, &(key->publicValue));
+    if (rv != SECSuccess) {
+        goto cleanup;
+    }
+
+done:
+    *privKey = key;
+
+cleanup:
+    mp_clear(&k);
+    if (rv) {
+        PORT_FreeArena(arena, PR_TRUE);
+    }
+
+#if EC_DEBUG
+    printf("ec_NewKey returning %s\n",
+           (rv == SECSuccess) ? "success" : "failure");
+#endif
+#else
+    PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+    return rv;
+}
+
+/* Generates a new EC key pair. The private key is a supplied
+ * random value (in seed) and the public key is the result of
+ * performing a scalar point multiplication of that value with
+ * the curve's base point.
+ */
+SECStatus
+EC_NewKeyFromSeed(ECParams *ecParams, ECPrivateKey **privKey,
+                  const unsigned char *seed, int seedlen)
+{
+    SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+    rv = ec_NewKey(ecParams, privKey, seed, seedlen);
+#else
+    PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+    return rv;
+}
+
+#ifndef NSS_DISABLE_ECC
+/* Generate a random private key using the algorithm A.4.1 of ANSI X9.62,
+ * modified a la FIPS 186-2 Change Notice 1 to eliminate the bias in the
+ * random number generator.
+ *
+ * Parameters
+ * - order: a buffer that holds the curve's group order
+ * - len: the length in octets of the order buffer
+ *
+ * Return Value
+ * Returns a buffer of len octets that holds the private key. The caller
+ * is responsible for freeing the buffer with PORT_ZFree.
+ */
+static unsigned char *
+ec_GenerateRandomPrivateKey(const unsigned char *order, int len)
+{
+    SECStatus rv = SECSuccess;
+    mp_err err;
+    unsigned char *privKeyBytes = NULL;
+    mp_int privKeyVal, order_1, one;
+
+    MP_DIGITS(&privKeyVal) = 0;
+    MP_DIGITS(&order_1) = 0;
+    MP_DIGITS(&one) = 0;
+    CHECK_MPI_OK(mp_init(&privKeyVal));
+    CHECK_MPI_OK(mp_init(&order_1));
+    CHECK_MPI_OK(mp_init(&one));
+
+    /* Generates 2*len random bytes using the global random bit generator
+     * (which implements Algorithm 1 of FIPS 186-2 Change Notice 1) then
+     * reduces modulo the group order.
+     */
+    if ((privKeyBytes = PORT_Alloc(2 * len)) == NULL)
+        goto cleanup;
+    CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(privKeyBytes, 2 * len));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&privKeyVal, privKeyBytes, 2 * len));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&order_1, order, len));
+    CHECK_MPI_OK(mp_set_int(&one, 1));
+    CHECK_MPI_OK(mp_sub(&order_1, &one, &order_1));
+    CHECK_MPI_OK(mp_mod(&privKeyVal, &order_1, &privKeyVal));
+    CHECK_MPI_OK(mp_add(&privKeyVal, &one, &privKeyVal));
+    CHECK_MPI_OK(mp_to_fixlen_octets(&privKeyVal, privKeyBytes, len));
+    memset(privKeyBytes + len, 0, len);
+cleanup:
+    mp_clear(&privKeyVal);
+    mp_clear(&order_1);
+    mp_clear(&one);
+    if (err < MP_OKAY) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    if (rv != SECSuccess && privKeyBytes) {
+        PORT_ZFree(privKeyBytes, 2 * len);
+        privKeyBytes = NULL;
+    }
+    return privKeyBytes;
+}
+#endif /* NSS_DISABLE_ECC */
+
+/* Generates a new EC key pair. The private key is a random value and
+ * the public key is the result of performing a scalar point multiplication
+ * of that value with the curve's base point.
+ */
+SECStatus
+EC_NewKey(ECParams *ecParams, ECPrivateKey **privKey)
+{
+    SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+    int len;
+    unsigned char *privKeyBytes = NULL;
+
+    if (!ecParams) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    len = ecParams->order.len;
+    privKeyBytes = ec_GenerateRandomPrivateKey(ecParams->order.data, len);
+    if (privKeyBytes == NULL)
+        goto cleanup;
+    /* generate public key */
+    CHECK_SEC_OK(ec_NewKey(ecParams, privKey, privKeyBytes, len));
+
+cleanup:
+    if (privKeyBytes) {
+        PORT_ZFree(privKeyBytes, len);
+    }
+#if EC_DEBUG
+    printf("EC_NewKey returning %s\n",
+           (rv == SECSuccess) ? "success" : "failure");
+#endif
+#else
+    PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+    return rv;
+}
+
+/* Validates an EC public key as described in Section 5.2.2 of
+ * X9.62. The ECDH primitive when used without the cofactor does
+ * not address small subgroup attacks, which may occur when the
+ * public key is not valid. These attacks can be prevented by
+ * validating the public key before using ECDH.
+ */
+SECStatus
+EC_ValidatePublicKey(ECParams *ecParams, SECItem *publicValue)
+{
+#ifndef NSS_DISABLE_ECC
+    mp_int Px, Py;
+    ECGroup *group = NULL;
+    SECStatus rv = SECFailure;
+    mp_err err = MP_OKAY;
+    int len;
+
+    if (!ecParams || !publicValue || !ecParams->name) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* Uses curve specific code for point validation. */
+    if (ecParams->fieldID.type == ec_field_plain) {
+        const ECMethod *method = ec_get_method_from_name(ecParams->name);
+        if (method == NULL || method->validate == NULL) {
+            /* unknown curve */
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+        }
+        return method->validate(publicValue);
+    }
+
+    /* NOTE: We only support uncompressed points for now */
+    len = (ecParams->fieldID.size + 7) >> 3;
+    if (publicValue->data[0] != EC_POINT_FORM_UNCOMPRESSED) {
+        PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM);
+        return SECFailure;
+    } else if (publicValue->len != (2 * len + 1)) {
+        PORT_SetError(SEC_ERROR_BAD_KEY);
+        return SECFailure;
+    }
+
+    MP_DIGITS(&Px) = 0;
+    MP_DIGITS(&Py) = 0;
+    CHECK_MPI_OK(mp_init(&Px));
+    CHECK_MPI_OK(mp_init(&Py));
+
+    /* Initialize Px and Py */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&Px, publicValue->data + 1, (mp_size)len));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&Py, publicValue->data + 1 + len, (mp_size)len));
+
+    /* construct from named params */
+    group = ECGroup_fromName(ecParams->name);
+    if (group == NULL) {
+        /*
+         * ECGroup_fromName fails if ecParams->name is not a valid
+         * ECCurveName value, or if we run out of memory, or perhaps
+         * for other reasons.  Unfortunately if ecParams->name is a
+         * valid ECCurveName value, we don't know what the right error
+         * code should be because ECGroup_fromName doesn't return an
+         * error code to the caller.  Set err to MP_UNDEF because
+         * that's what ECGroup_fromName uses internally.
+         */
+        if ((ecParams->name <= ECCurve_noName) ||
+            (ecParams->name >= ECCurve_pastLastCurve)) {
+            err = MP_BADARG;
+        } else {
+            err = MP_UNDEF;
+        }
+        goto cleanup;
+    }
+
+    /* validate public point */
+    if ((err = ECPoint_validate(group, &Px, &Py)) < MP_YES) {
+        if (err == MP_NO) {
+            PORT_SetError(SEC_ERROR_BAD_KEY);
+            rv = SECFailure;
+            err = MP_OKAY; /* don't change the error code */
+        }
+        goto cleanup;
+    }
+
+    rv = SECSuccess;
+
+cleanup:
+    ECGroup_free(group);
+    mp_clear(&Px);
+    mp_clear(&Py);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+#else
+    PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+    return SECFailure;
+#endif /* NSS_DISABLE_ECC */
+}
+
+/*
+** Performs an ECDH key derivation by computing the scalar point
+** multiplication of privateValue and publicValue (with or without the
+** cofactor) and returns the x-coordinate of the resulting elliptic
+** curve point in derived secret.  If successful, derivedSecret->data
+** is set to the address of the newly allocated buffer containing the
+** derived secret, and derivedSecret->len is the size of the secret
+** produced. It is the caller's responsibility to free the allocated
+** buffer containing the derived secret.
+*/
+SECStatus
+ECDH_Derive(SECItem *publicValue,
+            ECParams *ecParams,
+            SECItem *privateValue,
+            PRBool withCofactor,
+            SECItem *derivedSecret)
+{
+    SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+    unsigned int len = 0;
+    SECItem pointQ = { siBuffer, NULL, 0 };
+    mp_int k; /* to hold the private value */
+    mp_int cofactor;
+    mp_err err = MP_OKAY;
+#if EC_DEBUG
+    int i;
+#endif
+
+    if (!publicValue || !ecParams || !privateValue || !derivedSecret ||
+        !ecParams->name) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* Perform curve specific multiplication using ECMethod */
+    if (ecParams->fieldID.type == ec_field_plain) {
+        const ECMethod *method;
+        memset(derivedSecret, 0, sizeof(*derivedSecret));
+        derivedSecret = SECITEM_AllocItem(NULL, derivedSecret, EC_GetPointSize(ecParams));
+        if (derivedSecret == NULL) {
+            PORT_SetError(SEC_ERROR_NO_MEMORY);
+            return SECFailure;
+        }
+        method = ec_get_method_from_name(ecParams->name);
+        if (method == NULL || method->validate == NULL ||
+            method->mul == NULL) {
+            PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+            return SECFailure;
+        }
+        if (method->validate(publicValue) != SECSuccess) {
+            PORT_SetError(SEC_ERROR_BAD_KEY);
+            return SECFailure;
+        }
+        return method->mul(derivedSecret, privateValue, publicValue);
+    }
+
+    /*
+     * We fail if the public value is the point at infinity, since
+     * this produces predictable results.
+     */
+    if (ec_point_at_infinity(publicValue)) {
+        PORT_SetError(SEC_ERROR_BAD_KEY);
+        return SECFailure;
+    }
+
+    MP_DIGITS(&k) = 0;
+    memset(derivedSecret, 0, sizeof *derivedSecret);
+    len = (ecParams->fieldID.size + 7) >> 3;
+    pointQ.len = EC_GetPointSize(ecParams);
+    if ((pointQ.data = PORT_Alloc(pointQ.len)) == NULL)
+        goto cleanup;
+
+    CHECK_MPI_OK(mp_init(&k));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&k, privateValue->data,
+                                         (mp_size)privateValue->len));
+
+    if (withCofactor && (ecParams->cofactor != 1)) {
+        /* multiply k with the cofactor */
+        MP_DIGITS(&cofactor) = 0;
+        CHECK_MPI_OK(mp_init(&cofactor));
+        mp_set(&cofactor, ecParams->cofactor);
+        CHECK_MPI_OK(mp_mul(&k, &cofactor, &k));
+    }
+
+    /* Multiply our private key and peer's public point */
+    if (ec_points_mul(ecParams, NULL, &k, publicValue, &pointQ) != SECSuccess) {
+        goto cleanup;
+    }
+    if (ec_point_at_infinity(&pointQ)) {
+        PORT_SetError(SEC_ERROR_BAD_KEY); /* XXX better error code? */
+        goto cleanup;
+    }
+
+    /* Allocate memory for the derived secret and copy
+     * the x co-ordinate of pointQ into it.
+     */
+    SECITEM_AllocItem(NULL, derivedSecret, len);
+    memcpy(derivedSecret->data, pointQ.data + 1, len);
+
+    rv = SECSuccess;
+
+#if EC_DEBUG
+    printf("derived_secret:\n");
+    for (i = 0; i < derivedSecret->len; i++)
+        printf("%02x:", derivedSecret->data[i]);
+    printf("\n");
+#endif
+
+cleanup:
+    mp_clear(&k);
+
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+    }
+
+    if (pointQ.data) {
+        PORT_ZFree(pointQ.data, pointQ.len);
+    }
+#else
+    PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+    return rv;
+}
+
+/* Computes the ECDSA signature (a concatenation of two values r and s)
+ * on the digest using the given key and the random value kb (used in
+ * computing s).
+ */
+SECStatus
+ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
+                         const SECItem *digest, const unsigned char *kb, const int kblen)
+{
+    SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+    mp_int x1;
+    mp_int d, k; /* private key, random integer */
+    mp_int r, s; /* tuple (r, s) is the signature */
+    mp_int t;    /* holding tmp values */
+    mp_int n;
+    mp_err err = MP_OKAY;
+    ECParams *ecParams = NULL;
+    SECItem kGpoint = { siBuffer, NULL, 0 };
+    int flen = 0;   /* length in bytes of the field size */
+    unsigned olen;  /* length in bytes of the base point order */
+    unsigned obits; /* length in bits  of the base point order */
+    unsigned char *t2 = NULL;
+
+#if EC_DEBUG
+    char mpstr[256];
+#endif
+
+    /* Initialize MPI integers. */
+    /* must happen before the first potential call to cleanup */
+    MP_DIGITS(&x1) = 0;
+    MP_DIGITS(&d) = 0;
+    MP_DIGITS(&k) = 0;
+    MP_DIGITS(&r) = 0;
+    MP_DIGITS(&s) = 0;
+    MP_DIGITS(&n) = 0;
+    MP_DIGITS(&t) = 0;
+
+    /* Check args */
+    if (!key || !signature || !digest || !kb || (kblen < 0)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        goto cleanup;
+    }
+
+    ecParams = &(key->ecParams);
+    flen = (ecParams->fieldID.size + 7) >> 3;
+    olen = ecParams->order.len;
+    if (signature->data == NULL) {
+        /* a call to get the signature length only */
+        goto finish;
+    }
+    if (signature->len < 2 * olen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        goto cleanup;
+    }
+
+    CHECK_MPI_OK(mp_init(&x1));
+    CHECK_MPI_OK(mp_init(&d));
+    CHECK_MPI_OK(mp_init(&k));
+    CHECK_MPI_OK(mp_init(&r));
+    CHECK_MPI_OK(mp_init(&s));
+    CHECK_MPI_OK(mp_init(&n));
+    CHECK_MPI_OK(mp_init(&t));
+
+    SECITEM_TO_MPINT(ecParams->order, &n);
+    SECITEM_TO_MPINT(key->privateValue, &d);
+
+    CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, kblen));
+    /* Make sure k is in the interval [1, n-1] */
+    if ((mp_cmp_z(&k) <= 0) || (mp_cmp(&k, &n) >= 0)) {
+#if EC_DEBUG
+        printf("k is outside [1, n-1]\n");
+        mp_tohex(&k, mpstr);
+        printf("k : %s \n", mpstr);
+        mp_tohex(&n, mpstr);
+        printf("n : %s \n", mpstr);
+#endif
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        goto cleanup;
+    }
+
+    /*
+    ** We do not want timing information to leak the length of k,
+    ** so we compute k*G using an equivalent scalar of fixed
+    ** bit-length.
+    ** Fix based on patch for ECDSA timing attack in the paper
+    ** by Billy Bob Brumley and Nicola Tuveri at
+    **   http://eprint.iacr.org/2011/232
+    **
+    ** How do we convert k to a value of a fixed bit-length?
+    ** k starts off as an integer satisfying 0 <= k < n.  Hence,
+    ** n <= k+n < 2n, which means k+n has either the same number
+    ** of bits as n or one more bit than n.  If k+n has the same
+    ** number of bits as n, the second addition ensures that the
+    ** final value has exactly one more bit than n.  Thus, we
+    ** always end up with a value that exactly one more bit than n.
+    */
+    CHECK_MPI_OK(mp_add(&k, &n, &k));
+    if (mpl_significant_bits(&k) <= mpl_significant_bits(&n)) {
+        CHECK_MPI_OK(mp_add(&k, &n, &k));
+    }
+
+    /*
+    ** ANSI X9.62, Section 5.3.2, Step 2
+    **
+    ** Compute kG
+    */
+    kGpoint.len = EC_GetPointSize(ecParams);
+    kGpoint.data = PORT_Alloc(kGpoint.len);
+    if ((kGpoint.data == NULL) ||
+        (ec_points_mul(ecParams, &k, NULL, NULL, &kGpoint) != SECSuccess))
+        goto cleanup;
+
+    /*
+    ** ANSI X9.62, Section 5.3.3, Step 1
+    **
+    ** Extract the x co-ordinate of kG into x1
+    */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&x1, kGpoint.data + 1,
+                                         (mp_size)flen));
+
+    /*
+    ** ANSI X9.62, Section 5.3.3, Step 2
+    **
+    ** r = x1 mod n  NOTE: n is the order of the curve
+    */
+    CHECK_MPI_OK(mp_mod(&x1, &n, &r));
+
+    /*
+    ** ANSI X9.62, Section 5.3.3, Step 3
+    **
+    ** verify r != 0
+    */
+    if (mp_cmp_z(&r) == 0) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        goto cleanup;
+    }
+
+    /*
+    ** ANSI X9.62, Section 5.3.3, Step 4
+    **
+    ** s = (k**-1 * (HASH(M) + d*r)) mod n
+    */
+    SECITEM_TO_MPINT(*digest, &s); /* s = HASH(M)     */
+
+    /* In the definition of EC signing, digests are truncated
+     * to the length of n in bits.
+     * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/
+    CHECK_MPI_OK((obits = mpl_significant_bits(&n)));
+    if (digest->len * 8 > obits) {
+        mpl_rsh(&s, &s, digest->len * 8 - obits);
+    }
+
+#if EC_DEBUG
+    mp_todecimal(&n, mpstr);
+    printf("n : %s (dec)\n", mpstr);
+    mp_todecimal(&d, mpstr);
+    printf("d : %s (dec)\n", mpstr);
+    mp_tohex(&x1, mpstr);
+    printf("x1: %s\n", mpstr);
+    mp_todecimal(&s, mpstr);
+    printf("digest: %s (decimal)\n", mpstr);
+    mp_todecimal(&r, mpstr);
+    printf("r : %s (dec)\n", mpstr);
+    mp_tohex(&r, mpstr);
+    printf("r : %s\n", mpstr);
+#endif
+
+    if ((t2 = PORT_Alloc(2 * ecParams->order.len)) == NULL) {
+        rv = SECFailure;
+        goto cleanup;
+    }
+    if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    CHECK_MPI_OK(mp_read_unsigned_octets(&t, t2, 2 * ecParams->order.len)); /* t <-$ Zn */
+    CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k));                                /* k = k * t mod n */
+    CHECK_MPI_OK(mp_invmod(&k, &n, &k));                                    /* k = k**-1 mod n */
+    CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k));                                /* k = k * t mod n */
+    CHECK_MPI_OK(mp_mulmod(&d, &r, &n, &d));                                /* d = d * r mod n */
+    CHECK_MPI_OK(mp_addmod(&s, &d, &n, &s));                                /* s = s + d mod n */
+    CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s));                                /* s = s * k mod n */
+
+#if EC_DEBUG
+    mp_todecimal(&s, mpstr);
+    printf("s : %s (dec)\n", mpstr);
+    mp_tohex(&s, mpstr);
+    printf("s : %s\n", mpstr);
+#endif
+
+    /*
+    ** ANSI X9.62, Section 5.3.3, Step 5
+    **
+    ** verify s != 0
+    */
+    if (mp_cmp_z(&s) == 0) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        goto cleanup;
+    }
+
+    /*
+    **
+    ** Signature is tuple (r, s)
+    */
+    CHECK_MPI_OK(mp_to_fixlen_octets(&r, signature->data, olen));
+    CHECK_MPI_OK(mp_to_fixlen_octets(&s, signature->data + olen, olen));
+finish:
+    signature->len = 2 * olen;
+
+    rv = SECSuccess;
+    err = MP_OKAY;
+cleanup:
+    mp_clear(&x1);
+    mp_clear(&d);
+    mp_clear(&k);
+    mp_clear(&r);
+    mp_clear(&s);
+    mp_clear(&n);
+    mp_clear(&t);
+
+    if (t2) {
+        PORT_Free(t2);
+    }
+
+    if (kGpoint.data) {
+        PORT_ZFree(kGpoint.data, kGpoint.len);
+    }
+
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+
+#if EC_DEBUG
+    printf("ECDSA signing with seed %s\n",
+           (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+#else
+    PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+    return rv;
+}
+
+/*
+** Computes the ECDSA signature on the digest using the given key
+** and a random seed.
+*/
+SECStatus
+ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+    SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+    int len;
+    unsigned char *kBytes = NULL;
+
+    if (!key) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* Generate random value k */
+    len = key->ecParams.order.len;
+    kBytes = ec_GenerateRandomPrivateKey(key->ecParams.order.data, len);
+    if (kBytes == NULL)
+        goto cleanup;
+
+    /* Generate ECDSA signature with the specified k value */
+    rv = ECDSA_SignDigestWithSeed(key, signature, digest, kBytes, len);
+
+cleanup:
+    if (kBytes) {
+        PORT_ZFree(kBytes, len);
+    }
+
+#if EC_DEBUG
+    printf("ECDSA signing %s\n",
+           (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+#else
+    PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+    return rv;
+}
+
+/*
+** Checks the signature on the given digest using the key provided.
+**
+** The key argument must represent a valid EC public key (a point on
+** the relevant curve).  If it is not a valid point, then the behavior
+** of this function is undefined.  In cases where a public key might
+** not be valid, use EC_ValidatePublicKey to check.
+*/
+SECStatus
+ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature,
+                   const SECItem *digest)
+{
+    SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+    mp_int r_, s_;       /* tuple (r', s') is received signature) */
+    mp_int c, u1, u2, v; /* intermediate values used in verification */
+    mp_int x1;
+    mp_int n;
+    mp_err err = MP_OKAY;
+    ECParams *ecParams = NULL;
+    SECItem pointC = { siBuffer, NULL, 0 };
+    int slen;       /* length in bytes of a half signature (r or s) */
+    int flen;       /* length in bytes of the field size */
+    unsigned olen;  /* length in bytes of the base point order */
+    unsigned obits; /* length in bits  of the base point order */
+
+#if EC_DEBUG
+    char mpstr[256];
+    printf("ECDSA verification called\n");
+#endif
+
+    /* Initialize MPI integers. */
+    /* must happen before the first potential call to cleanup */
+    MP_DIGITS(&r_) = 0;
+    MP_DIGITS(&s_) = 0;
+    MP_DIGITS(&c) = 0;
+    MP_DIGITS(&u1) = 0;
+    MP_DIGITS(&u2) = 0;
+    MP_DIGITS(&x1) = 0;
+    MP_DIGITS(&v) = 0;
+    MP_DIGITS(&n) = 0;
+
+    /* Check args */
+    if (!key || !signature || !digest) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        goto cleanup;
+    }
+
+    ecParams = &(key->ecParams);
+    flen = (ecParams->fieldID.size + 7) >> 3;
+    olen = ecParams->order.len;
+    if (signature->len == 0 || signature->len % 2 != 0 ||
+        signature->len > 2 * olen) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        goto cleanup;
+    }
+    slen = signature->len / 2;
+
+    SECITEM_AllocItem(NULL, &pointC, EC_GetPointSize(ecParams));
+    if (pointC.data == NULL)
+        goto cleanup;
+
+    CHECK_MPI_OK(mp_init(&r_));
+    CHECK_MPI_OK(mp_init(&s_));
+    CHECK_MPI_OK(mp_init(&c));
+    CHECK_MPI_OK(mp_init(&u1));
+    CHECK_MPI_OK(mp_init(&u2));
+    CHECK_MPI_OK(mp_init(&x1));
+    CHECK_MPI_OK(mp_init(&v));
+    CHECK_MPI_OK(mp_init(&n));
+
+    /*
+    ** Convert received signature (r', s') into MPI integers.
+    */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&r_, signature->data, slen));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&s_, signature->data + slen, slen));
+
+    /*
+    ** ANSI X9.62, Section 5.4.2, Steps 1 and 2
+    **
+    ** Verify that 0 < r' < n and 0 < s' < n
+    */
+    SECITEM_TO_MPINT(ecParams->order, &n);
+    if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 ||
+        mp_cmp(&r_, &n) >= 0 || mp_cmp(&s_, &n) >= 0) {
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        goto cleanup; /* will return rv == SECFailure */
+    }
+
+    /*
+    ** ANSI X9.62, Section 5.4.2, Step 3
+    **
+    ** c = (s')**-1 mod n
+    */
+    CHECK_MPI_OK(mp_invmod(&s_, &n, &c)); /* c = (s')**-1 mod n */
+
+    /*
+    ** ANSI X9.62, Section 5.4.2, Step 4
+    **
+    ** u1 = ((HASH(M')) * c) mod n
+    */
+    SECITEM_TO_MPINT(*digest, &u1); /* u1 = HASH(M)     */
+
+    /* In the definition of EC signing, digests are truncated
+     * to the length of n in bits.
+     * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/
+    CHECK_MPI_OK((obits = mpl_significant_bits(&n)));
+    if (digest->len * 8 > obits) { /* u1 = HASH(M')     */
+        mpl_rsh(&u1, &u1, digest->len * 8 - obits);
+    }
+
+#if EC_DEBUG
+    mp_todecimal(&r_, mpstr);
+    printf("r_: %s (dec)\n", mpstr);
+    mp_todecimal(&s_, mpstr);
+    printf("s_: %s (dec)\n", mpstr);
+    mp_todecimal(&c, mpstr);
+    printf("c : %s (dec)\n", mpstr);
+    mp_todecimal(&u1, mpstr);
+    printf("digest: %s (dec)\n", mpstr);
+#endif
+
+    CHECK_MPI_OK(mp_mulmod(&u1, &c, &n, &u1)); /* u1 = u1 * c mod n */
+
+    /*
+    ** ANSI X9.62, Section 5.4.2, Step 4
+    **
+    ** u2 = ((r') * c) mod n
+    */
+    CHECK_MPI_OK(mp_mulmod(&r_, &c, &n, &u2));
+
+    /*
+    ** ANSI X9.62, Section 5.4.3, Step 1
+    **
+    ** Compute u1*G + u2*Q
+    ** Here, A = u1.G     B = u2.Q    and   C = A + B
+    ** If the result, C, is the point at infinity, reject the signature
+    */
+    if (ec_points_mul(ecParams, &u1, &u2, &key->publicValue, &pointC) != SECSuccess) {
+        rv = SECFailure;
+        goto cleanup;
+    }
+    if (ec_point_at_infinity(&pointC)) {
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        rv = SECFailure;
+        goto cleanup;
+    }
+
+    CHECK_MPI_OK(mp_read_unsigned_octets(&x1, pointC.data + 1, flen));
+
+    /*
+    ** ANSI X9.62, Section 5.4.4, Step 2
+    **
+    ** v = x1 mod n
+    */
+    CHECK_MPI_OK(mp_mod(&x1, &n, &v));
+
+#if EC_DEBUG
+    mp_todecimal(&r_, mpstr);
+    printf("r_: %s (dec)\n", mpstr);
+    mp_todecimal(&v, mpstr);
+    printf("v : %s (dec)\n", mpstr);
+#endif
+
+    /*
+    ** ANSI X9.62, Section 5.4.4, Step 3
+    **
+    ** Verification:  v == r'
+    */
+    if (mp_cmp(&v, &r_)) {
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        rv = SECFailure; /* Signature failed to verify. */
+    } else {
+        rv = SECSuccess; /* Signature verified. */
+    }
+
+#if EC_DEBUG
+    mp_todecimal(&u1, mpstr);
+    printf("u1: %s (dec)\n", mpstr);
+    mp_todecimal(&u2, mpstr);
+    printf("u2: %s (dec)\n", mpstr);
+    mp_tohex(&x1, mpstr);
+    printf("x1: %s\n", mpstr);
+    mp_todecimal(&v, mpstr);
+    printf("v : %s (dec)\n", mpstr);
+#endif
+
+cleanup:
+    mp_clear(&r_);
+    mp_clear(&s_);
+    mp_clear(&c);
+    mp_clear(&u1);
+    mp_clear(&u2);
+    mp_clear(&x1);
+    mp_clear(&v);
+    mp_clear(&n);
+
+    if (pointC.data)
+        SECITEM_ZfreeItem(&pointC, PR_FALSE);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+
+#if EC_DEBUG
+    printf("ECDSA verification %s\n",
+           (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+#else
+    PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+    return rv;
+}
diff --git a/security/nss/lib/freebl/ec.h b/security/nss/lib/freebl/ec.h
new file mode 100644
index 000000000..bb65e82cd
--- /dev/null
+++ b/security/nss/lib/freebl/ec.h
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ec_h_
+#define __ec_h_
+
+#define EC_DEBUG 0
+
+#define ANSI_X962_CURVE_OID_TOTAL_LEN 10
+#define SECG_CURVE_OID_TOTAL_LEN 7
+#define PKIX_NEWCURVES_OID_TOTAL_LEN 11
+
+struct ECMethodStr {
+    ECCurveName name;
+    SECStatus (*mul)(SECItem *result, SECItem *scalar, SECItem *point);
+    SECStatus (*validate)(const SECItem *point);
+};
+typedef struct ECMethodStr ECMethod;
+
+#endif /* __ec_h_ */
diff --git a/security/nss/lib/freebl/ecdecode.c b/security/nss/lib/freebl/ecdecode.c
new file mode 100644
index 000000000..e1f1eb8a5
--- /dev/null
+++ b/security/nss/lib/freebl/ecdecode.c
@@ -0,0 +1,311 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NSS_DISABLE_ECC
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "secoid.h"
+#include "secitem.h"
+#include "secerr.h"
+#include "ec.h"
+#include "ecl-curve.h"
+
+#define CHECK_OK(func) \
+    if (func == NULL)  \
+    goto cleanup
+#define CHECK_SEC_OK(func)         \
+    if (SECSuccess != (rv = func)) \
+    goto cleanup
+
+/*
+ * Initializes a SECItem from a hexadecimal string
+ *
+ * Warning: This function ignores leading 00's, so any leading 00's
+ * in the hexadecimal string must be optional.
+ */
+static SECItem *
+hexString2SECItem(PLArenaPool *arena, SECItem *item, const char *str)
+{
+    int i = 0;
+    int byteval = 0;
+    int tmp = PORT_Strlen(str);
+
+    PORT_Assert(arena);
+    PORT_Assert(item);
+
+    if ((tmp % 2) != 0)
+        return NULL;
+
+    /* skip leading 00's unless the hex string is "00" */
+    while ((tmp > 2) && (str[0] == '0') && (str[1] == '0')) {
+        str += 2;
+        tmp -= 2;
+    }
+
+    item->data = (unsigned char *)PORT_ArenaAlloc(arena, tmp / 2);
+    if (item->data == NULL)
+        return NULL;
+    item->len = tmp / 2;
+
+    while (str[i]) {
+        if ((str[i] >= '0') && (str[i] <= '9'))
+            tmp = str[i] - '0';
+        else if ((str[i] >= 'a') && (str[i] <= 'f'))
+            tmp = str[i] - 'a' + 10;
+        else if ((str[i] >= 'A') && (str[i] <= 'F'))
+            tmp = str[i] - 'A' + 10;
+        else
+            return NULL;
+
+        byteval = byteval * 16 + tmp;
+        if ((i % 2) != 0) {
+            item->data[i / 2] = byteval;
+            byteval = 0;
+        }
+        i++;
+    }
+
+    return item;
+}
+
+/* Copy all of the fields from srcParams into dstParams
+ */
+SECStatus
+EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+              const ECParams *srcParams)
+{
+    SECStatus rv = SECFailure;
+
+    dstParams->arena = arena;
+    dstParams->type = srcParams->type;
+    dstParams->fieldID.size = srcParams->fieldID.size;
+    dstParams->fieldID.type = srcParams->fieldID.type;
+    if (srcParams->fieldID.type == ec_field_GFp ||
+        srcParams->fieldID.type == ec_field_plain) {
+        CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.prime,
+                                      &srcParams->fieldID.u.prime));
+    } else {
+        CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.poly,
+                                      &srcParams->fieldID.u.poly));
+    }
+    dstParams->fieldID.k1 = srcParams->fieldID.k1;
+    dstParams->fieldID.k2 = srcParams->fieldID.k2;
+    dstParams->fieldID.k3 = srcParams->fieldID.k3;
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.a,
+                                  &srcParams->curve.a));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.b,
+                                  &srcParams->curve.b));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.seed,
+                                  &srcParams->curve.seed));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->base,
+                                  &srcParams->base));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->order,
+                                  &srcParams->order));
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->DEREncoding,
+                                  &srcParams->DEREncoding));
+    dstParams->name = srcParams->name;
+    CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curveOID,
+                                  &srcParams->curveOID));
+    dstParams->cofactor = srcParams->cofactor;
+
+    return SECSuccess;
+
+cleanup:
+    return SECFailure;
+}
+
+static SECStatus
+gf_populate_params(ECCurveName name, ECFieldType field_type, ECParams *params)
+{
+    SECStatus rv = SECFailure;
+    const ECCurveParams *curveParams;
+    /* 2 ['0'+'4'] + MAX_ECKEY_LEN * 2 [x,y] * 2 [hex string] + 1 ['\0'] */
+    char genenc[3 + 2 * 2 * MAX_ECKEY_LEN];
+
+    if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve))
+        goto cleanup;
+    params->name = name;
+    curveParams = ecCurve_map[params->name];
+    CHECK_OK(curveParams);
+    params->fieldID.size = curveParams->size;
+    params->fieldID.type = field_type;
+    if (field_type == ec_field_GFp ||
+        field_type == ec_field_plain) {
+        CHECK_OK(hexString2SECItem(params->arena, &params->fieldID.u.prime,
+                                   curveParams->irr));
+    } else {
+        CHECK_OK(hexString2SECItem(params->arena, &params->fieldID.u.poly,
+                                   curveParams->irr));
+    }
+    CHECK_OK(hexString2SECItem(params->arena, &params->curve.a,
+                               curveParams->curvea));
+    CHECK_OK(hexString2SECItem(params->arena, &params->curve.b,
+                               curveParams->curveb));
+    genenc[0] = '0';
+    genenc[1] = '4';
+    genenc[2] = '\0';
+    strcat(genenc, curveParams->genx);
+    strcat(genenc, curveParams->geny);
+    CHECK_OK(hexString2SECItem(params->arena, &params->base, genenc));
+    CHECK_OK(hexString2SECItem(params->arena, &params->order,
+                               curveParams->order));
+    params->cofactor = curveParams->cofactor;
+
+    rv = SECSuccess;
+
+cleanup:
+    return rv;
+}
+
+SECStatus
+EC_FillParams(PLArenaPool *arena, const SECItem *encodedParams,
+              ECParams *params)
+{
+    SECStatus rv = SECFailure;
+    SECOidTag tag;
+    SECItem oid = { siBuffer, NULL, 0 };
+
+#if EC_DEBUG
+    int i;
+
+    printf("Encoded params in EC_DecodeParams: ");
+    for (i = 0; i < encodedParams->len; i++) {
+        printf("%02x:", encodedParams->data[i]);
+    }
+    printf("\n");
+#endif
+
+    if ((encodedParams->len != ANSI_X962_CURVE_OID_TOTAL_LEN) &&
+        (encodedParams->len != SECG_CURVE_OID_TOTAL_LEN) &&
+        (encodedParams->len != PKIX_NEWCURVES_OID_TOTAL_LEN)) {
+        PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+        return SECFailure;
+    };
+
+    oid.len = encodedParams->len - 2;
+    oid.data = encodedParams->data + 2;
+    if ((encodedParams->data[0] != SEC_ASN1_OBJECT_ID) ||
+        ((tag = SECOID_FindOIDTag(&oid)) == SEC_OID_UNKNOWN)) {
+        PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+        return SECFailure;
+    }
+
+    params->arena = arena;
+    params->cofactor = 0;
+    params->type = ec_params_named;
+    params->name = ECCurve_noName;
+
+    /* Fill out curveOID */
+    params->curveOID.len = oid.len;
+    params->curveOID.data = (unsigned char *)PORT_ArenaAlloc(arena, oid.len);
+    if (params->curveOID.data == NULL)
+        goto cleanup;
+    memcpy(params->curveOID.data, oid.data, oid.len);
+
+#if EC_DEBUG
+    printf("Curve: %s\n", SECOID_FindOIDTagDescription(tag));
+#endif
+
+    switch (tag) {
+        case SEC_OID_ANSIX962_EC_PRIME256V1:
+            /* Populate params for prime256v1 aka secp256r1
+             * (the NIST P-256 curve)
+             */
+            CHECK_SEC_OK(gf_populate_params(ECCurve_X9_62_PRIME_256V1, ec_field_GFp,
+                                            params));
+            break;
+
+        case SEC_OID_SECG_EC_SECP384R1:
+            /* Populate params for secp384r1
+             * (the NIST P-384 curve)
+             */
+            CHECK_SEC_OK(gf_populate_params(ECCurve_SECG_PRIME_384R1, ec_field_GFp,
+                                            params));
+            break;
+
+        case SEC_OID_SECG_EC_SECP521R1:
+            /* Populate params for secp521r1
+             * (the NIST P-521 curve)
+             */
+            CHECK_SEC_OK(gf_populate_params(ECCurve_SECG_PRIME_521R1, ec_field_GFp,
+                                            params));
+            break;
+
+        case SEC_OID_CURVE25519:
+            /* Populate params for Curve25519 */
+            CHECK_SEC_OK(gf_populate_params(ECCurve25519, ec_field_plain, params));
+            break;
+
+        default:
+            break;
+    };
+
+cleanup:
+    if (!params->cofactor) {
+        PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+#if EC_DEBUG
+        printf("Unrecognized curve, returning NULL params\n");
+#endif
+    }
+
+    return rv;
+}
+
+SECStatus
+EC_DecodeParams(const SECItem *encodedParams, ECParams **ecparams)
+{
+    PLArenaPool *arena;
+    ECParams *params;
+    SECStatus rv = SECFailure;
+
+    /* Initialize an arena for the ECParams structure */
+    if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE)))
+        return SECFailure;
+
+    params = (ECParams *)PORT_ArenaZAlloc(arena, sizeof(ECParams));
+    if (!params) {
+        PORT_FreeArena(arena, PR_TRUE);
+        return SECFailure;
+    }
+
+    /* Copy the encoded params */
+    SECITEM_AllocItem(arena, &(params->DEREncoding),
+                      encodedParams->len);
+    memcpy(params->DEREncoding.data, encodedParams->data, encodedParams->len);
+
+    /* Fill out the rest of the ECParams structure based on
+     * the encoded params
+     */
+    rv = EC_FillParams(arena, encodedParams, params);
+    if (rv == SECFailure) {
+        PORT_FreeArena(arena, PR_TRUE);
+        return SECFailure;
+    } else {
+        *ecparams = params;
+        ;
+        return SECSuccess;
+    }
+}
+
+int
+EC_GetPointSize(const ECParams *params)
+{
+    ECCurveName name = params->name;
+    const ECCurveParams *curveParams;
+
+    if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve) ||
+        ((curveParams = ecCurve_map[name]) == NULL)) {
+        /* unknown curve, calculate point size from params. assume standard curves with 2 points 
+         * and a point compression indicator byte */
+        int sizeInBytes = (params->fieldID.size + 7) / 8;
+        return sizeInBytes * 2 + 1;
+    }
+    return curveParams->pointSize;
+}
+
+#endif /* NSS_DISABLE_ECC */
diff --git a/security/nss/lib/freebl/ecl/README b/security/nss/lib/freebl/ecl/README
new file mode 100644
index 000000000..04a8b3b01
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/README
@@ -0,0 +1,267 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ 
+The ECL exposes routines for constructing and converting curve
+parameters for internal use.
+
+
+HEADER FILES
+============
+
+ecl-exp.h - Exports data structures and curve names. For use by code
+that does not have access to mp_ints.
+
+ecl-curve.h - Provides hex encodings (in the form of ECCurveParams
+structs) of standardizes elliptic curve domain parameters and mappings
+from ECCurveName to ECCurveParams. For use by code that does not have
+access to mp_ints.
+
+ecl.h - Interface to constructors for curve parameters and group object,
+and point multiplication operations. Used by higher level algorithms
+(like ECDH and ECDSA) to actually perform elliptic curve cryptography.
+
+ecl-priv.h - Data structures and functions for internal use within the
+library.
+
+ecp.h - Internal header file that contains all functions for point
+arithmetic over prime fields.
+
+DATA STRUCTURES AND TYPES
+=========================
+
+ECCurveName (from ecl-exp.h) - Opaque name for standardized elliptic
+curve domain parameters.
+
+ECCurveParams (from ecl-exp.h) - Provides hexadecimal encoding
+of elliptic curve domain parameters. Can be generated by a user
+and passed to ECGroup_fromHex or can be generated from a name by
+EC_GetNamedCurveParams. ecl-curve.h contains ECCurveParams structs for
+the standardized curves defined by ECCurveName.
+
+ECGroup (from ecl.h and ecl-priv.h) - Opaque data structure that
+represents a group of elliptic curve points for a particular set of
+elliptic curve domain parameters. Contains all domain parameters (curve
+a and b, field, base point) as well as pointers to the functions that
+should be used for point arithmetic and the underlying field GFMethod.
+Generated by either ECGroup_fromHex or ECGroup_fromName.
+
+GFMethod (from ecl-priv.h) - Represents a field underlying a set of
+elliptic curve domain parameters. Contains the irreducible that defines
+the field (either the prime or the binary polynomial) as well as
+pointers to the functions that should be used for field arithmetic.
+
+ARITHMETIC FUNCTIONS
+====================
+
+Higher-level algorithms (like ECDH and ECDSA) should call ECPoint_mul
+or ECPoints_mul (from ecl.h) to do point arithmetic. These functions
+will choose which underlying algorithms to use, based on the ECGroup
+structure.
+
+Point Multiplication
+--------------------
+
+ecl_mult.c provides the ECPoints_mul and ECPoint_mul wrappers.
+It also provides two implementations for the pts_mul operation -
+ec_pts_mul_basic (which computes kP, lQ, and then adds kP + lQ) and
+ec_pts_mul_simul_w2 (which does a simultaneous point multiplication
+using a table with window size 2*2).
+
+ec_naf.c provides an implementation of an algorithm to calculate a
+non-adjacent form of a scalar, minimizing the number of point
+additions that need to be done in a point multiplication.
+
+Point Arithmetic over Prime Fields
+----------------------------------
+
+ecp_aff.c provides point arithmetic using affine coordinates.
+
+ecp_jac.c provides point arithmetic using Jacobian projective
+coordinates and mixed Jacobian-affine coordinates. (Jacobian projective
+coordinates represent a point (x, y) as (X, Y, Z), where x=X/Z^2,
+y=Y/Z^3).
+
+ecp_jm.c provides point arithmetic using Modified Jacobian
+coordinates and mixed Modified_Jacobian-affine coordinates.
+(Modified Jacobian coordinates represent a point (x, y)
+as (X, Y, Z, a*Z^4), where x=X/Z^2, y=Y/Z^3, and a is
+the linear coefficient in the curve defining equation).
+
+ecp_192.c and ecp_224.c provide optimized field arithmetic.
+
+Point Arithmetic over Binary Polynomial Fields
+----------------------------------------------
+
+ec2_aff.c provides point arithmetic using affine coordinates.
+
+ec2_proj.c provides point arithmetic using projective coordinates.
+(Projective coordinates represent a point (x, y) as (X, Y, Z), where
+x=X/Z, y=Y/Z^2).
+
+ec2_mont.c provides point multiplication using Montgomery projective
+coordinates.
+
+ec2_163.c, ec2_193.c, and ec2_233.c provide optimized field arithmetic.
+
+Field Arithmetic
+----------------
+
+ecl_gf.c provides constructors for field objects (GFMethod) with the
+functions GFMethod_cons*. It also provides wrappers around the basic
+field operations.
+
+Prime Field Arithmetic
+----------------------
+
+The mpi library provides the basic prime field arithmetic.
+
+ecp_mont.c provides wrappers around the Montgomery multiplication
+functions from the mpi library and adds encoding and decoding functions.
+It also provides the function to construct a GFMethod object using
+Montgomery multiplication.
+
+ecp_192.c and ecp_224.c provide optimized modular reduction for the
+fields defined by nistp192 and nistp224 primes.
+
+ecl_gf.c provides wrappers around the basic field operations.
+
+Binary Polynomial Field Arithmetic
+----------------------------------
+
+../mpi/mp_gf2m.c provides basic binary polynomial field arithmetic,
+including addition, multiplication, squaring, mod, and division, as well
+as conversion ob polynomial representations between bitstring and int[].
+
+ec2_163.c, ec2_193.c, and ec2_233.c provide optimized field mod, mul,
+and sqr operations.
+
+ecl_gf.c provides wrappers around the basic field operations.
+
+Field Encoding
+--------------
+
+By default, field elements are encoded in their basic form. It is
+possible to use an alternative encoding, however. For example, it is
+possible to Montgomery representation of prime field elements and
+take advantage of the fast modular multiplication that Montgomery
+representation provides. The process of converting from basic form to
+Montgomery representation is called field encoding, and the opposite
+process would be field decoding. All internal point operations assume
+that the operands are field encoded as appropriate. By rewiring the
+underlying field arithmetic to perform operations on these encoded
+values, the same overlying point arithmetic operations can be used
+regardless of field representation.
+
+ALGORITHM WIRING
+================
+
+The EC library allows point and field arithmetic algorithms to be
+substituted ("wired-in") on a fine-grained basis. This allows for
+generic algorithms and algorithms that are optimized for a particular
+curve, field, or architecture, to coexist and to be automatically
+selected at runtime.
+
+Wiring Mechanism
+----------------
+
+The ECGroup and GFMethod structure contain pointers to the point and
+field arithmetic functions, respectively, that are to be used in
+operations.
+
+The selection of algorithms to use is handled in the function
+ecgroup_fromNameAndHex in ecl.c.
+
+Default Wiring
+--------------
+
+Curves over prime fields by default use montgomery field arithmetic,
+point multiplication using 5-bit window non-adjacent-form with 
+Modified Jacobian coordinates, and 2*2-bit simultaneous point 
+multiplication using Jacobian coordinates.
+(Wiring in function ECGroup_consGFp_mont in ecl.c.)
+
+Curves over prime fields that have optimized modular reduction (i.e.,
+secp160r1, nistp192, and nistp224) do not use Montgomery field
+arithmetic. Instead, they use basic field arithmetic with their
+optimized reduction (as in ecp_192.c and ecp_224.c). They
+use the same point multiplication and simultaneous point multiplication
+algorithms as other curves over prime fields.
+
+Curves over binary polynomial fields by default use generic field
+arithmetic with montgomery point multiplication and basic kP + lQ
+computation (multiply, multiply, and add). (Wiring in function
+ECGroup_cons_GF2m in ecl.c.)
+
+Curves over binary polynomial fields that have optimized field
+arithmetic (i.e., any 163-, 193, or 233-bit field) use their optimized
+field arithmetic. They use the same point multiplication and
+simultaneous point multiplication algorithms as other curves over binary
+fields.
+
+Example
+-------
+
+We provide an example for plugging in an optimized implementation for
+the Koblitz curve nistk163.
+
+Suppose the file ec2_k163.c contains the optimized implementation. In
+particular it contains a point multiplication function:
+
+	mp_err ec_GF2m_nistk163_pt_mul(const mp_int *n, const mp_int *px, 
+		const mp_int *py, mp_int *rx, mp_int *ry, const ECGroup *group);
+
+Since only a pt_mul function is provided, the generic pt_add function
+will be used.
+
+There are two options for handling the optimized field arithmetic used
+by the ..._pt_mul function. Say the optimized field arithmetic includes
+the following functions:
+
+	mp_err ec_GF2m_nistk163_add(const mp_int *a, const mp_int *b,
+		mp_int *r, const GFMethod *meth);
+	mp_err ec_GF2m_nistk163_mul(const mp_int *a, const mp_int *b,
+		mp_int *r, const GFMethod *meth);
+	mp_err ec_GF2m_nistk163_sqr(const mp_int *a, const mp_int *b,
+		mp_int *r, const GFMethod *meth);
+	mp_err ec_GF2m_nistk163_div(const mp_int *a, const mp_int *b,
+		mp_int *r, const GFMethod *meth);
+
+First, the optimized field arithmetic could simply be called directly
+by the ..._pt_mul function. This would be accomplished by changing
+the ecgroup_fromNameAndHex function in ecl.c to include the following
+statements:
+
+	if (name == ECCurve_NIST_K163) {
+		group = ECGroup_consGF2m(&irr, NULL, &curvea, &curveb, &genx,
+			&geny, &order, params->cofactor);
+		if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
+		MP_CHECKOK( ec_group_set_nistk163(group) );
+	}
+
+and including in ec2_k163.c the following function:
+
+	mp_err ec_group_set_nistk163(ECGroup *group) {
+		group->point_mul = &ec_GF2m_nistk163_pt_mul;
+		return MP_OKAY;
+	}
+
+As a result, ec_GF2m_pt_add and similar functions would use the
+basic binary polynomial field arithmetic ec_GF2m_add, ec_GF2m_mul,
+ec_GF2m_sqr, and ec_GF2m_div.
+
+Alternatively, the optimized field arithmetic could be wired into the
+group's GFMethod. This would be accomplished by putting the following
+function in ec2_k163.c:
+
+	mp_err ec_group_set_nistk163(ECGroup *group) {
+		group->meth->field_add = &ec_GF2m_nistk163_add;
+		group->meth->field_mul = &ec_GF2m_nistk163_mul;
+		group->meth->field_sqr = &ec_GF2m_nistk163_sqr;
+		group->meth->field_div = &ec_GF2m_nistk163_div;
+		group->point_mul = &ec_GF2m_nistk163_pt_mul;
+		return MP_OKAY;
+	}
+
+For an example of functions that use special field encodings, take a
+look at ecp_mont.c.
diff --git a/security/nss/lib/freebl/ecl/curve25519_32.c b/security/nss/lib/freebl/ecl/curve25519_32.c
new file mode 100644
index 000000000..0122961e6
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/curve25519_32.c
@@ -0,0 +1,390 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Derived from public domain code by Matthew Dempsky and D. J. Bernstein.
+ */
+
+#include "ecl-priv.h"
+#include "mpi.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+typedef uint32_t elem[32];
+
+/*
+ * Add two field elements.
+ * out = a + b
+ */
+static void
+add(elem out, const elem a, const elem b)
+{
+    uint32_t j;
+    uint32_t u = 0;
+    for (j = 0; j < 31; ++j) {
+        u += a[j] + b[j];
+        out[j] = u & 0xFF;
+        u >>= 8;
+    }
+    u += a[31] + b[31];
+    out[31] = u;
+}
+
+/*
+ * Subtract two field elements.
+ * out = a - b
+ */
+static void
+sub(elem out, const elem a, const elem b)
+{
+    uint32_t j;
+    uint32_t u;
+    u = 218;
+    for (j = 0; j < 31; ++j) {
+        u += a[j] + 0xFF00 - b[j];
+        out[j] = u & 0xFF;
+        u >>= 8;
+    }
+    u += a[31] - b[31];
+    out[31] = u;
+}
+
+/*
+ * "Squeeze" an element after multiplication (and square).
+ */
+static void
+squeeze(elem a)
+{
+    uint32_t j;
+    uint32_t u;
+    u = 0;
+    for (j = 0; j < 31; ++j) {
+        u += a[j];
+        a[j] = u & 0xFF;
+        u >>= 8;
+    }
+    u += a[31];
+    a[31] = u & 0x7F;
+    u = 19 * (u >> 7);
+    for (j = 0; j < 31; ++j) {
+        u += a[j];
+        a[j] = u & 0xFF;
+        u >>= 8;
+    }
+    a[31] += u;
+}
+
+static const elem minusp = { 19, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 128 };
+
+/*
+ * Reduce point a by 2^255-19
+ */
+static void
+reduce(elem a)
+{
+    elem aorig;
+    uint32_t j;
+    uint32_t negative;
+
+    for (j = 0; j < 32; ++j) {
+        aorig[j] = a[j];
+    }
+    add(a, a, minusp);
+    negative = 1 + ~((a[31] >> 7) & 1);
+    for (j = 0; j < 32; ++j) {
+        a[j] ^= negative & (aorig[j] ^ a[j]);
+    }
+}
+
+/*
+ * Multiplication and squeeze
+ * out = a * b
+ */
+static void
+mult(elem out, const elem a, const elem b)
+{
+    uint32_t i;
+    uint32_t j;
+    uint32_t u;
+
+    for (i = 0; i < 32; ++i) {
+        u = 0;
+        for (j = 0; j <= i; ++j) {
+            u += a[j] * b[i - j];
+        }
+        for (j = i + 1; j < 32; ++j) {
+            u += 38 * a[j] * b[i + 32 - j];
+        }
+        out[i] = u;
+    }
+    squeeze(out);
+}
+
+/*
+ * Multiplication
+ * out = 121665 * a
+ */
+static void
+mult121665(elem out, const elem a)
+{
+    uint32_t j;
+    uint32_t u;
+
+    u = 0;
+    for (j = 0; j < 31; ++j) {
+        u += 121665 * a[j];
+        out[j] = u & 0xFF;
+        u >>= 8;
+    }
+    u += 121665 * a[31];
+    out[31] = u & 0x7F;
+    u = 19 * (u >> 7);
+    for (j = 0; j < 31; ++j) {
+        u += out[j];
+        out[j] = u & 0xFF;
+        u >>= 8;
+    }
+    u += out[j];
+    out[j] = u;
+}
+
+/*
+ * Square a and squeeze the result.
+ * out = a * a
+ */
+static void
+square(elem out, const elem a)
+{
+    uint32_t i;
+    uint32_t j;
+    uint32_t u;
+
+    for (i = 0; i < 32; ++i) {
+        u = 0;
+        for (j = 0; j < i - j; ++j) {
+            u += a[j] * a[i - j];
+        }
+        for (j = i + 1; j < i + 32 - j; ++j) {
+            u += 38 * a[j] * a[i + 32 - j];
+        }
+        u *= 2;
+        if ((i & 1) == 0) {
+            u += a[i / 2] * a[i / 2];
+            u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
+        }
+        out[i] = u;
+    }
+    squeeze(out);
+}
+
+/*
+ * Constant time swap between r and s depending on b
+ */
+static void
+cswap(uint32_t p[64], uint32_t q[64], uint32_t b)
+{
+    uint32_t j;
+    uint32_t swap = 1 + ~b;
+
+    for (j = 0; j < 64; ++j) {
+        const uint32_t t = swap & (p[j] ^ q[j]);
+        p[j] ^= t;
+        q[j] ^= t;
+    }
+}
+
+/*
+ * Montgomery ladder
+ */
+static void
+monty(elem x_2_out, elem z_2_out,
+      const elem point, const elem scalar)
+{
+    uint32_t x_3[64] = { 0 };
+    uint32_t x_2[64] = { 0 };
+    uint32_t a0[64];
+    uint32_t a1[64];
+    uint32_t b0[64];
+    uint32_t b1[64];
+    uint32_t c1[64];
+    uint32_t r[32];
+    uint32_t s[32];
+    uint32_t t[32];
+    uint32_t u[32];
+    uint32_t swap = 0;
+    uint32_t k_t = 0;
+    int j;
+
+    for (j = 0; j < 32; ++j) {
+        x_3[j] = point[j];
+    }
+    x_3[32] = 1;
+    x_2[0] = 1;
+
+    for (j = 254; j >= 0; --j) {
+        k_t = (scalar[j >> 3] >> (j & 7)) & 1;
+        swap ^= k_t;
+        cswap(x_2, x_3, swap);
+        swap = k_t;
+        add(a0, x_2, x_2 + 32);
+        sub(a0 + 32, x_2, x_2 + 32);
+        add(a1, x_3, x_3 + 32);
+        sub(a1 + 32, x_3, x_3 + 32);
+        square(b0, a0);
+        square(b0 + 32, a0 + 32);
+        mult(b1, a1, a0 + 32);
+        mult(b1 + 32, a1 + 32, a0);
+        add(c1, b1, b1 + 32);
+        sub(c1 + 32, b1, b1 + 32);
+        square(r, c1 + 32);
+        sub(s, b0, b0 + 32);
+        mult121665(t, s);
+        add(u, t, b0);
+        mult(x_2, b0, b0 + 32);
+        mult(x_2 + 32, s, u);
+        square(x_3, c1);
+        mult(x_3 + 32, r, point);
+    }
+
+    cswap(x_2, x_3, swap);
+    for (j = 0; j < 32; ++j) {
+        x_2_out[j] = x_2[j];
+    }
+    for (j = 0; j < 32; ++j) {
+        z_2_out[j] = x_2[j + 32];
+    }
+}
+
+static void
+recip(elem out, const elem z)
+{
+    elem z2;
+    elem z9;
+    elem z11;
+    elem z2_5_0;
+    elem z2_10_0;
+    elem z2_20_0;
+    elem z2_50_0;
+    elem z2_100_0;
+    elem t0;
+    elem t1;
+    int i;
+
+    /* 2 */ square(z2, z);
+    /* 4 */ square(t1, z2);
+    /* 8 */ square(t0, t1);
+    /* 9 */ mult(z9, t0, z);
+    /* 11 */ mult(z11, z9, z2);
+    /* 22 */ square(t0, z11);
+    /* 2^5 - 2^0 = 31 */ mult(z2_5_0, t0, z9);
+
+    /* 2^6 - 2^1 */ square(t0, z2_5_0);
+    /* 2^7 - 2^2 */ square(t1, t0);
+    /* 2^8 - 2^3 */ square(t0, t1);
+    /* 2^9 - 2^4 */ square(t1, t0);
+    /* 2^10 - 2^5 */ square(t0, t1);
+    /* 2^10 - 2^0 */ mult(z2_10_0, t0, z2_5_0);
+
+    /* 2^11 - 2^1 */ square(t0, z2_10_0);
+    /* 2^12 - 2^2 */ square(t1, t0);
+    /* 2^20 - 2^10 */
+    for (i = 2; i < 10; i += 2) {
+        square(t0, t1);
+        square(t1, t0);
+    }
+    /* 2^20 - 2^0 */ mult(z2_20_0, t1, z2_10_0);
+
+    /* 2^21 - 2^1 */ square(t0, z2_20_0);
+    /* 2^22 - 2^2 */ square(t1, t0);
+    /* 2^40 - 2^20 */
+    for (i = 2; i < 20; i += 2) {
+        square(t0, t1);
+        square(t1, t0);
+    }
+    /* 2^40 - 2^0 */ mult(t0, t1, z2_20_0);
+
+    /* 2^41 - 2^1 */ square(t1, t0);
+    /* 2^42 - 2^2 */ square(t0, t1);
+    /* 2^50 - 2^10 */
+    for (i = 2; i < 10; i += 2) {
+        square(t1, t0);
+        square(t0, t1);
+    }
+    /* 2^50 - 2^0 */ mult(z2_50_0, t0, z2_10_0);
+
+    /* 2^51 - 2^1 */ square(t0, z2_50_0);
+    /* 2^52 - 2^2 */ square(t1, t0);
+    /* 2^100 - 2^50 */
+    for (i = 2; i < 50; i += 2) {
+        square(t0, t1);
+        square(t1, t0);
+    }
+    /* 2^100 - 2^0 */ mult(z2_100_0, t1, z2_50_0);
+
+    /* 2^101 - 2^1 */ square(t1, z2_100_0);
+    /* 2^102 - 2^2 */ square(t0, t1);
+    /* 2^200 - 2^100 */
+    for (i = 2; i < 100; i += 2) {
+        square(t1, t0);
+        square(t0, t1);
+    }
+    /* 2^200 - 2^0 */ mult(t1, t0, z2_100_0);
+
+    /* 2^201 - 2^1 */ square(t0, t1);
+    /* 2^202 - 2^2 */ square(t1, t0);
+    /* 2^250 - 2^50 */
+    for (i = 2; i < 50; i += 2) {
+        square(t0, t1);
+        square(t1, t0);
+    }
+    /* 2^250 - 2^0 */ mult(t0, t1, z2_50_0);
+
+    /* 2^251 - 2^1 */ square(t1, t0);
+    /* 2^252 - 2^2 */ square(t0, t1);
+    /* 2^253 - 2^3 */ square(t1, t0);
+    /* 2^254 - 2^4 */ square(t0, t1);
+    /* 2^255 - 2^5 */ square(t1, t0);
+    /* 2^255 - 21 */ mult(out, t1, z11);
+}
+
+/*
+ * Computes q = Curve25519(p, s)
+ */
+SECStatus
+ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p)
+{
+    elem point = { 0 };
+    elem x_2 = { 0 };
+    elem z_2 = { 0 };
+    elem X = { 0 };
+    elem scalar = { 0 };
+    uint32_t i;
+
+    /* read and mask scalar */
+    for (i = 0; i < 32; ++i) {
+        scalar[i] = s[i];
+    }
+    scalar[0] &= 0xF8;
+    scalar[31] &= 0x7F;
+    scalar[31] |= 64;
+
+    /* read and mask point */
+    for (i = 0; i < 32; ++i) {
+        point[i] = p[i];
+    }
+    point[31] &= 0x7F;
+
+    monty(x_2, z_2, point, scalar);
+    recip(z_2, z_2);
+    mult(X, x_2, z_2);
+    reduce(X);
+    for (i = 0; i < 32; ++i) {
+        q[i] = X[i];
+    }
+    return 0;
+}
diff --git a/security/nss/lib/freebl/ecl/curve25519_64.c b/security/nss/lib/freebl/ecl/curve25519_64.c
new file mode 100644
index 000000000..89327ad1c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/curve25519_64.c
@@ -0,0 +1,514 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Derived from public domain C code by Adan Langley and Daniel J. Bernstein
+ */
+
+#include "uint128.h"
+
+#include "ecl-priv.h"
+#include "mpi.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef uint8_t u8;
+typedef uint64_t felem;
+
+/* Sum two numbers: output += in */
+static void
+fsum(felem *output, const felem *in)
+{
+    unsigned i;
+    for (i = 0; i < 5; ++i) {
+        output[i] += in[i];
+    }
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ */
+static void
+fdifference_backwards(felem *ioutput, const felem *iin)
+{
+    static const int64_t twotothe51 = ((int64_t)1l << 51);
+    const int64_t *in = (const int64_t *)iin;
+    int64_t *out = (int64_t *)ioutput;
+
+    out[0] = in[0] - out[0];
+    out[1] = in[1] - out[1];
+    out[2] = in[2] - out[2];
+    out[3] = in[3] - out[3];
+    out[4] = in[4] - out[4];
+
+    // An arithmetic shift right of 63 places turns a positive number to 0 and a
+    // negative number to all 1's. This gives us a bitmask that lets us avoid
+    // side-channel prone branches.
+    int64_t t;
+
+#define NEGCHAIN(a, b)        \
+    t = out[a] >> 63;         \
+    out[a] += twotothe51 & t; \
+    out[b] -= 1 & t;
+
+#define NEGCHAIN19(a, b)      \
+    t = out[a] >> 63;         \
+    out[a] += twotothe51 & t; \
+    out[b] -= 19 & t;
+
+    NEGCHAIN(0, 1);
+    NEGCHAIN(1, 2);
+    NEGCHAIN(2, 3);
+    NEGCHAIN(3, 4);
+    NEGCHAIN19(4, 0);
+    NEGCHAIN(0, 1);
+    NEGCHAIN(1, 2);
+    NEGCHAIN(2, 3);
+    NEGCHAIN(3, 4);
+}
+
+/* Multiply a number by a scalar: output = in * scalar */
+static void
+fscalar_product(felem *output, const felem *in,
+                const felem scalar)
+{
+    uint128_t tmp, tmp2;
+
+    tmp = mul6464(in[0], scalar);
+    output[0] = mask51(tmp);
+
+    tmp2 = mul6464(in[1], scalar);
+    tmp = add128(tmp2, rshift128(tmp, 51));
+    output[1] = mask51(tmp);
+
+    tmp2 = mul6464(in[2], scalar);
+    tmp = add128(tmp2, rshift128(tmp, 51));
+    output[2] = mask51(tmp);
+
+    tmp2 = mul6464(in[3], scalar);
+    tmp = add128(tmp2, rshift128(tmp, 51));
+    output[3] = mask51(tmp);
+
+    tmp2 = mul6464(in[4], scalar);
+    tmp = add128(tmp2, rshift128(tmp, 51));
+    output[4] = mask51(tmp);
+
+    output[0] += mask_lower(rshift128(tmp, 51)) * 19;
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ */
+static void
+fmul(felem *output, const felem *in2, const felem *in)
+{
+    uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
+
+    t0 = mul6464(in[0], in2[0]);
+    t1 = add128(mul6464(in[1], in2[0]), mul6464(in[0], in2[1]));
+    t2 = add128(add128(mul6464(in[0], in2[2]),
+                       mul6464(in[2], in2[0])),
+                mul6464(in[1], in2[1]));
+    t3 = add128(add128(add128(mul6464(in[0], in2[3]),
+                              mul6464(in[3], in2[0])),
+                       mul6464(in[1], in2[2])),
+                mul6464(in[2], in2[1]));
+    t4 = add128(add128(add128(add128(mul6464(in[0], in2[4]),
+                                     mul6464(in[4], in2[0])),
+                              mul6464(in[3], in2[1])),
+                       mul6464(in[1], in2[3])),
+                mul6464(in[2], in2[2]));
+    t5 = add128(add128(add128(mul6464(in[4], in2[1]),
+                              mul6464(in[1], in2[4])),
+                       mul6464(in[2], in2[3])),
+                mul6464(in[3], in2[2]));
+    t6 = add128(add128(mul6464(in[4], in2[2]),
+                       mul6464(in[2], in2[4])),
+                mul6464(in[3], in2[3]));
+    t7 = add128(mul6464(in[3], in2[4]), mul6464(in[4], in2[3]));
+    t8 = mul6464(in[4], in2[4]);
+
+    t0 = add128(t0, mul12819(t5));
+    t1 = add128(t1, mul12819(t6));
+    t2 = add128(t2, mul12819(t7));
+    t3 = add128(t3, mul12819(t8));
+
+    t1 = add128(t1, rshift128(t0, 51));
+    t0 = mask51full(t0);
+    t2 = add128(t2, rshift128(t1, 51));
+    t1 = mask51full(t1);
+    t3 = add128(t3, rshift128(t2, 51));
+    t4 = add128(t4, rshift128(t3, 51));
+    t0 = add128(t0, mul12819(rshift128(t4, 51)));
+    t1 = add128(t1, rshift128(t0, 51));
+    t2 = mask51full(t2);
+    t2 = add128(t2, rshift128(t1, 51));
+
+    output[0] = mask51(t0);
+    output[1] = mask51(t1);
+    output[2] = mask_lower(t2);
+    output[3] = mask51(t3);
+    output[4] = mask51(t4);
+}
+
+static void
+fsquare(felem *output, const felem *in)
+{
+    uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
+
+    t0 = mul6464(in[0], in[0]);
+    t1 = lshift128(mul6464(in[0], in[1]), 1);
+    t2 = add128(lshift128(mul6464(in[0], in[2]), 1),
+                mul6464(in[1], in[1]));
+    t3 = add128(lshift128(mul6464(in[0], in[3]), 1),
+                lshift128(mul6464(in[1], in[2]), 1));
+    t4 = add128(add128(lshift128(mul6464(in[0], in[4]), 1),
+                       lshift128(mul6464(in[3], in[1]), 1)),
+                mul6464(in[2], in[2]));
+    t5 = add128(lshift128(mul6464(in[4], in[1]), 1),
+                lshift128(mul6464(in[2], in[3]), 1));
+    t6 = add128(lshift128(mul6464(in[4], in[2]), 1),
+                mul6464(in[3], in[3]));
+    t7 = lshift128(mul6464(in[3], in[4]), 1);
+    t8 = mul6464(in[4], in[4]);
+
+    t0 = add128(t0, mul12819(t5));
+    t1 = add128(t1, mul12819(t6));
+    t2 = add128(t2, mul12819(t7));
+    t3 = add128(t3, mul12819(t8));
+
+    t1 = add128(t1, rshift128(t0, 51));
+    t0 = mask51full(t0);
+    t2 = add128(t2, rshift128(t1, 51));
+    t1 = mask51full(t1);
+    t3 = add128(t3, rshift128(t2, 51));
+    t4 = add128(t4, rshift128(t3, 51));
+    t0 = add128(t0, mul12819(rshift128(t4, 51)));
+    t1 = add128(t1, rshift128(t0, 51));
+
+    output[0] = mask51(t0);
+    output[1] = mask_lower(t1);
+    output[2] = mask51(t2);
+    output[3] = mask51(t3);
+    output[4] = mask51(t4);
+}
+
+/* Take a 32-byte number and expand it into polynomial form */
+static void NO_SANITIZE_ALIGNMENT
+fexpand(felem *output, const u8 *in)
+{
+    output[0] = *((const uint64_t *)(in)) & MASK51;
+    output[1] = (*((const uint64_t *)(in + 6)) >> 3) & MASK51;
+    output[2] = (*((const uint64_t *)(in + 12)) >> 6) & MASK51;
+    output[3] = (*((const uint64_t *)(in + 19)) >> 1) & MASK51;
+    output[4] = (*((const uint64_t *)(in + 25)) >> 4) & MASK51;
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * 32-byte array
+ */
+static void
+fcontract(u8 *output, const felem *input)
+{
+    uint128_t t0 = init128x(input[0]);
+    uint128_t t1 = init128x(input[1]);
+    uint128_t t2 = init128x(input[2]);
+    uint128_t t3 = init128x(input[3]);
+    uint128_t t4 = init128x(input[4]);
+    uint128_t tmp = init128x(19);
+
+    t1 = add128(t1, rshift128(t0, 51));
+    t0 = mask51full(t0);
+    t2 = add128(t2, rshift128(t1, 51));
+    t1 = mask51full(t1);
+    t3 = add128(t3, rshift128(t2, 51));
+    t2 = mask51full(t2);
+    t4 = add128(t4, rshift128(t3, 51));
+    t3 = mask51full(t3);
+    t0 = add128(t0, mul12819(rshift128(t4, 51)));
+    t4 = mask51full(t4);
+
+    t1 = add128(t1, rshift128(t0, 51));
+    t0 = mask51full(t0);
+    t2 = add128(t2, rshift128(t1, 51));
+    t1 = mask51full(t1);
+    t3 = add128(t3, rshift128(t2, 51));
+    t2 = mask51full(t2);
+    t4 = add128(t4, rshift128(t3, 51));
+    t3 = mask51full(t3);
+    t0 = add128(t0, mul12819(rshift128(t4, 51)));
+    t4 = mask51full(t4);
+
+    /* now t is between 0 and 2^255-1, properly carried. */
+    /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
+
+    t0 = add128(t0, tmp);
+
+    t1 = add128(t1, rshift128(t0, 51));
+    t0 = mask51full(t0);
+    t2 = add128(t2, rshift128(t1, 51));
+    t1 = mask51full(t1);
+    t3 = add128(t3, rshift128(t2, 51));
+    t2 = mask51full(t2);
+    t4 = add128(t4, rshift128(t3, 51));
+    t3 = mask51full(t3);
+    t0 = add128(t0, mul12819(rshift128(t4, 51)));
+    t4 = mask51full(t4);
+
+    /* now between 19 and 2^255-1 in both cases, and offset by 19. */
+
+    t0 = add128(t0, init128x(0x8000000000000 - 19));
+    tmp = init128x(0x8000000000000 - 1);
+    t1 = add128(t1, tmp);
+    t2 = add128(t2, tmp);
+    t3 = add128(t3, tmp);
+    t4 = add128(t4, tmp);
+
+    /* now between 2^255 and 2^256-20, and offset by 2^255. */
+
+    t1 = add128(t1, rshift128(t0, 51));
+    t0 = mask51full(t0);
+    t2 = add128(t2, rshift128(t1, 51));
+    t1 = mask51full(t1);
+    t3 = add128(t3, rshift128(t2, 51));
+    t2 = mask51full(t2);
+    t4 = add128(t4, rshift128(t3, 51));
+    t3 = mask51full(t3);
+    t4 = mask51full(t4);
+
+    *((uint64_t *)(output)) = mask_lower(t0) | mask_lower(t1) << 51;
+    *((uint64_t *)(output + 8)) = (mask_lower(t1) >> 13) | (mask_lower(t2) << 38);
+    *((uint64_t *)(output + 16)) = (mask_lower(t2) >> 26) | (mask_lower(t3) << 25);
+    *((uint64_t *)(output + 24)) = (mask_lower(t3) >> 39) | (mask_lower(t4) << 12);
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ */
+static void
+fmonty(felem *x2, felem *z2,         /* output 2Q */
+       felem *x3, felem *z3,         /* output Q + Q' */
+       felem *x, felem *z,           /* input Q */
+       felem *xprime, felem *zprime, /* input Q' */
+       const felem *qmqp /* input Q - Q' */)
+{
+    felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5],
+        zzzprime[5];
+
+    memcpy(origx, x, 5 * sizeof(felem));
+    fsum(x, z);
+    fdifference_backwards(z, origx); // does x - z
+
+    memcpy(origxprime, xprime, sizeof(felem) * 5);
+    fsum(xprime, zprime);
+    fdifference_backwards(zprime, origxprime);
+    fmul(xxprime, xprime, z);
+    fmul(zzprime, x, zprime);
+    memcpy(origxprime, xxprime, sizeof(felem) * 5);
+    fsum(xxprime, zzprime);
+    fdifference_backwards(zzprime, origxprime);
+    fsquare(x3, xxprime);
+    fsquare(zzzprime, zzprime);
+    fmul(z3, zzzprime, qmqp);
+
+    fsquare(xx, x);
+    fsquare(zz, z);
+    fmul(x2, xx, zz);
+    fdifference_backwards(zz, xx); // does zz = xx - zz
+    fscalar_product(zzz, zz, 121665);
+    fsum(zzz, xx);
+    fmul(z2, zz, zzz);
+}
+
+// -----------------------------------------------------------------------------
+// Maybe swap the contents of two felem arrays (@a and @b), each @len elements
+// long. Perform the swap iff @swap is non-zero.
+//
+// This function performs the swap without leaking any side-channel
+// information.
+// -----------------------------------------------------------------------------
+static void
+swap_conditional(felem *a, felem *b, unsigned len, felem iswap)
+{
+    unsigned i;
+    const felem swap = 1 + ~iswap;
+
+    for (i = 0; i < len; ++i) {
+        const felem x = swap & (a[i] ^ b[i]);
+        a[i] ^= x;
+        b[i] ^= x;
+    }
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void
+cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q)
+{
+    felem a[5] = { 0 }, b[5] = { 1 }, c[5] = { 1 }, d[5] = { 0 };
+    felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+    felem e[5] = { 0 }, f[5] = { 1 }, g[5] = { 0 }, h[5] = { 1 };
+    felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+    unsigned i, j;
+
+    memcpy(nqpqx, q, sizeof(felem) * 5);
+
+    for (i = 0; i < 32; ++i) {
+        u8 byte = n[31 - i];
+        for (j = 0; j < 8; ++j) {
+            const felem bit = byte >> 7;
+
+            swap_conditional(nqx, nqpqx, 5, bit);
+            swap_conditional(nqz, nqpqz, 5, bit);
+            fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q);
+            swap_conditional(nqx2, nqpqx2, 5, bit);
+            swap_conditional(nqz2, nqpqz2, 5, bit);
+
+            t = nqx;
+            nqx = nqx2;
+            nqx2 = t;
+            t = nqz;
+            nqz = nqz2;
+            nqz2 = t;
+            t = nqpqx;
+            nqpqx = nqpqx2;
+            nqpqx2 = t;
+            t = nqpqz;
+            nqpqz = nqpqz2;
+            nqpqz2 = t;
+
+            byte <<= 1;
+        }
+    }
+
+    memcpy(resultx, nqx, sizeof(felem) * 5);
+    memcpy(resultz, nqz, sizeof(felem) * 5);
+}
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code
+// -----------------------------------------------------------------------------
+static void
+crecip(felem *out, const felem *z)
+{
+    felem z2[5];
+    felem z9[5];
+    felem z11[5];
+    felem z2_5_0[5];
+    felem z2_10_0[5];
+    felem z2_20_0[5];
+    felem z2_50_0[5];
+    felem z2_100_0[5];
+    felem t0[5];
+    felem t1[5];
+    int i;
+
+    /* 2 */ fsquare(z2, z);
+    /* 4 */ fsquare(t1, z2);
+    /* 8 */ fsquare(t0, t1);
+    /* 9 */ fmul(z9, t0, z);
+    /* 11 */ fmul(z11, z9, z2);
+    /* 22 */ fsquare(t0, z11);
+    /* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9);
+
+    /* 2^6 - 2^1 */ fsquare(t0, z2_5_0);
+    /* 2^7 - 2^2 */ fsquare(t1, t0);
+    /* 2^8 - 2^3 */ fsquare(t0, t1);
+    /* 2^9 - 2^4 */ fsquare(t1, t0);
+    /* 2^10 - 2^5 */ fsquare(t0, t1);
+    /* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0);
+
+    /* 2^11 - 2^1 */ fsquare(t0, z2_10_0);
+    /* 2^12 - 2^2 */ fsquare(t1, t0);
+    /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) {
+        fsquare(t0, t1);
+        fsquare(t1, t0);
+    }
+    /* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0);
+
+    /* 2^21 - 2^1 */ fsquare(t0, z2_20_0);
+    /* 2^22 - 2^2 */ fsquare(t1, t0);
+    /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) {
+        fsquare(t0, t1);
+        fsquare(t1, t0);
+    }
+    /* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0);
+
+    /* 2^41 - 2^1 */ fsquare(t1, t0);
+    /* 2^42 - 2^2 */ fsquare(t0, t1);
+    /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) {
+        fsquare(t1, t0);
+        fsquare(t0, t1);
+    }
+    /* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0);
+
+    /* 2^51 - 2^1 */ fsquare(t0, z2_50_0);
+    /* 2^52 - 2^2 */ fsquare(t1, t0);
+    /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) {
+        fsquare(t0, t1);
+        fsquare(t1, t0);
+    }
+    /* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0);
+
+    /* 2^101 - 2^1 */ fsquare(t1, z2_100_0);
+    /* 2^102 - 2^2 */ fsquare(t0, t1);
+    /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) {
+        fsquare(t1, t0);
+        fsquare(t0, t1);
+    }
+    /* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0);
+
+    /* 2^201 - 2^1 */ fsquare(t0, t1);
+    /* 2^202 - 2^2 */ fsquare(t1, t0);
+    /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) {
+        fsquare(t0, t1);
+        fsquare(t1, t0);
+    }
+    /* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0);
+
+    /* 2^251 - 2^1 */ fsquare(t1, t0);
+    /* 2^252 - 2^2 */ fsquare(t0, t1);
+    /* 2^253 - 2^3 */ fsquare(t1, t0);
+    /* 2^254 - 2^4 */ fsquare(t0, t1);
+    /* 2^255 - 2^5 */ fsquare(t1, t0);
+    /* 2^255 - 21 */ fmul(out, t1, z11);
+}
+
+SECStatus
+ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret,
+                  const uint8_t *basepoint)
+{
+    felem bp[5], x[5], z[5], zmone[5];
+    uint8_t e[32];
+    int i;
+
+    for (i = 0; i < 32; ++i) {
+        e[i] = secret[i];
+    }
+    e[0] &= 248;
+    e[31] &= 127;
+    e[31] |= 64;
+    fexpand(bp, basepoint);
+    cmult(x, z, e, bp);
+    crecip(zmone, z);
+    fmul(z, x, zmone);
+    fcontract(mypublic, z);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/ecl/ec_naf.c b/security/nss/lib/freebl/ecl/ec_naf.c
new file mode 100644
index 000000000..cad08cb27
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ec_naf.c
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl-priv.h"
+
+/* Returns 2^e as an integer. This is meant to be used for small powers of
+ * two. */
+int
+ec_twoTo(int e)
+{
+    int a = 1;
+    int i;
+
+    for (i = 0; i < e; i++) {
+        a *= 2;
+    }
+    return a;
+}
+
+/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should
+ * be an array of signed char's to output to, bitsize should be the number
+ * of bits of out, in is the original scalar, and w is the window size.
+ * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A.
+ * Menezes, "Software implementation of elliptic curve cryptography over
+ * binary fields", Proc. CHES 2000. */
+mp_err
+ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, int w)
+{
+    mp_int k;
+    mp_err res = MP_OKAY;
+    int i, twowm1, mask;
+
+    twowm1 = ec_twoTo(w - 1);
+    mask = 2 * twowm1 - 1;
+
+    MP_DIGITS(&k) = 0;
+    MP_CHECKOK(mp_init_copy(&k, in));
+
+    i = 0;
+    /* Compute wNAF form */
+    while (mp_cmp_z(&k) > 0) {
+        if (mp_isodd(&k)) {
+            out[i] = MP_DIGIT(&k, 0) & mask;
+            if (out[i] >= twowm1)
+                out[i] -= 2 * twowm1;
+
+            /* Subtract off out[i].  Note mp_sub_d only works with
+             * unsigned digits */
+            if (out[i] >= 0) {
+                MP_CHECKOK(mp_sub_d(&k, out[i], &k));
+            } else {
+                MP_CHECKOK(mp_add_d(&k, -(out[i]), &k));
+            }
+        } else {
+            out[i] = 0;
+        }
+        MP_CHECKOK(mp_div_2(&k, &k));
+        i++;
+    }
+    /* Zero out the remaining elements of the out array. */
+    for (; i < bitsize + 1; i++) {
+        out[i] = 0;
+    }
+CLEANUP:
+    mp_clear(&k);
+    return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecl-curve.h b/security/nss/lib/freebl/ecl/ecl-curve.h
new file mode 100644
index 000000000..df061396c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-curve.h
@@ -0,0 +1,123 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl-exp.h"
+#include <stdlib.h>
+
+#ifndef __ecl_curve_h_
+#define __ecl_curve_h_
+
+/* copied from certt.h */
+#define KU_DIGITAL_SIGNATURE (0x80) /* bit 0 */
+#define KU_KEY_AGREEMENT (0x08)     /* bit 4 */
+
+static const ECCurveParams ecCurve_NIST_P256 = {
+    "NIST-P256", ECField_GFp, 256,
+    "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF",
+    "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC",
+    "5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B",
+    "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296",
+    "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5",
+    "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551",
+    1, 128, 65, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const ECCurveParams ecCurve_NIST_P384 = {
+    "NIST-P384", ECField_GFp, 384,
+    "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF",
+    "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC",
+    "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF",
+    "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7",
+    "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F",
+    "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973",
+    1, 192, 97, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const ECCurveParams ecCurve_NIST_P521 = {
+    "NIST-P521", ECField_GFp, 521,
+    "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+    "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC",
+    "0051953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573DF883D2C34F1EF451FD46B503F00",
+    "00C6858E06B70404E9CD9E3ECB662395B4429C648139053FB521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B3C1856A429BF97E7E31C2E5BD66",
+    "011839296A789A3BC0045C8A5FB42C7D1BD998F54449579B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C7086A272C24088BE94769FD16650",
+    "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5C9B8899C47AEBB6FB71E91386409",
+    1, 256, 133, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const ECCurveParams ecCurve25519 = {
+    "Curve25519", ECField_GFp, 255,
+    "7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed",
+    "076D06",
+    "00",
+    "0900000000000000000000000000000000000000000000000000000000000000",
+    "20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9",
+    "1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed",
+    8, 128, 32, KU_KEY_AGREEMENT
+};
+
+/* mapping between ECCurveName enum and pointers to ECCurveParams */
+static const ECCurveParams *ecCurve_map[] = {
+    NULL,               /* ECCurve_noName */
+    NULL,               /* ECCurve_NIST_P192 */
+    NULL,               /* ECCurve_NIST_P224 */
+    &ecCurve_NIST_P256, /* ECCurve_NIST_P256 */
+    &ecCurve_NIST_P384, /* ECCurve_NIST_P384 */
+    &ecCurve_NIST_P521, /* ECCurve_NIST_P521 */
+    NULL,               /* ECCurve_NIST_K163 */
+    NULL,               /* ECCurve_NIST_B163 */
+    NULL,               /* ECCurve_NIST_K233 */
+    NULL,               /* ECCurve_NIST_B233 */
+    NULL,               /* ECCurve_NIST_K283 */
+    NULL,               /* ECCurve_NIST_B283 */
+    NULL,               /* ECCurve_NIST_K409 */
+    NULL,               /* ECCurve_NIST_B409 */
+    NULL,               /* ECCurve_NIST_K571 */
+    NULL,               /* ECCurve_NIST_B571 */
+    NULL,               /* ECCurve_X9_62_PRIME_192V2 */
+    NULL,               /* ECCurve_X9_62_PRIME_192V3 */
+    NULL,               /* ECCurve_X9_62_PRIME_239V1 */
+    NULL,               /* ECCurve_X9_62_PRIME_239V2 */
+    NULL,               /* ECCurve_X9_62_PRIME_239V3 */
+    NULL,               /* ECCurve_X9_62_CHAR2_PNB163V1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_PNB163V2 */
+    NULL,               /* ECCurve_X9_62_CHAR2_PNB163V3 */
+    NULL,               /* ECCurve_X9_62_CHAR2_PNB176V1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_TNB191V1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_TNB191V2 */
+    NULL,               /* ECCurve_X9_62_CHAR2_TNB191V3 */
+    NULL,               /* ECCurve_X9_62_CHAR2_PNB208W1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_TNB239V1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_TNB239V2 */
+    NULL,               /* ECCurve_X9_62_CHAR2_TNB239V3 */
+    NULL,               /* ECCurve_X9_62_CHAR2_PNB272W1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_PNB304W1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_TNB359V1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_PNB368W1 */
+    NULL,               /* ECCurve_X9_62_CHAR2_TNB431R1 */
+    NULL,               /* ECCurve_SECG_PRIME_112R1 */
+    NULL,               /* ECCurve_SECG_PRIME_112R2 */
+    NULL,               /* ECCurve_SECG_PRIME_128R1 */
+    NULL,               /* ECCurve_SECG_PRIME_128R2 */
+    NULL,               /* ECCurve_SECG_PRIME_160K1 */
+    NULL,               /* ECCurve_SECG_PRIME_160R1 */
+    NULL,               /* ECCurve_SECG_PRIME_160R2 */
+    NULL,               /* ECCurve_SECG_PRIME_192K1 */
+    NULL,               /* ECCurve_SECG_PRIME_224K1 */
+    NULL,               /* ECCurve_SECG_PRIME_256K1 */
+    NULL,               /* ECCurve_SECG_CHAR2_113R1 */
+    NULL,               /* ECCurve_SECG_CHAR2_113R2 */
+    NULL,               /* ECCurve_SECG_CHAR2_131R1 */
+    NULL,               /* ECCurve_SECG_CHAR2_131R2 */
+    NULL,               /* ECCurve_SECG_CHAR2_163R1 */
+    NULL,               /* ECCurve_SECG_CHAR2_193R1 */
+    NULL,               /* ECCurve_SECG_CHAR2_193R2 */
+    NULL,               /* ECCurve_SECG_CHAR2_239K1 */
+    NULL,               /* ECCurve_WTLS_1 */
+    NULL,               /* ECCurve_WTLS_8 */
+    NULL,               /* ECCurve_WTLS_9 */
+    &ecCurve25519,      /* ECCurve25519 */
+    NULL                /* ECCurve_pastLastCurve */
+};
+
+#endif
diff --git a/security/nss/lib/freebl/ecl/ecl-exp.h b/security/nss/lib/freebl/ecl/ecl-exp.h
new file mode 100644
index 000000000..44adb8a1c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-exp.h
@@ -0,0 +1,167 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecl_exp_h_
+#define __ecl_exp_h_
+
+/* Curve field type */
+typedef enum {
+    ECField_GFp,
+    ECField_GF2m
+} ECField;
+
+/* Hexadecimal encoding of curve parameters */
+struct ECCurveParamsStr {
+    char *text;
+    ECField field;
+    unsigned int size;
+    char *irr;
+    char *curvea;
+    char *curveb;
+    char *genx;
+    char *geny;
+    char *order;
+    int cofactor;
+    int security;
+    int pointSize;
+    unsigned int usage;
+};
+typedef struct ECCurveParamsStr ECCurveParams;
+
+/* Named curve parameters */
+typedef enum {
+
+    ECCurve_noName = 0,
+
+    /* NIST prime curves */
+    ECCurve_NIST_P192, /* not supported */
+    ECCurve_NIST_P224, /* not supported */
+    ECCurve_NIST_P256,
+    ECCurve_NIST_P384,
+    ECCurve_NIST_P521,
+
+    /* NIST binary curves */
+    ECCurve_NIST_K163, /* not supported */
+    ECCurve_NIST_B163, /* not supported */
+    ECCurve_NIST_K233, /* not supported */
+    ECCurve_NIST_B233, /* not supported */
+    ECCurve_NIST_K283, /* not supported */
+    ECCurve_NIST_B283, /* not supported */
+    ECCurve_NIST_K409, /* not supported */
+    ECCurve_NIST_B409, /* not supported */
+    ECCurve_NIST_K571, /* not supported */
+    ECCurve_NIST_B571, /* not supported */
+
+    /* ANSI X9.62 prime curves */
+    /* ECCurve_X9_62_PRIME_192V1 == ECCurve_NIST_P192 */
+    ECCurve_X9_62_PRIME_192V2, /* not supported */
+    ECCurve_X9_62_PRIME_192V3, /* not supported */
+    ECCurve_X9_62_PRIME_239V1, /* not supported */
+    ECCurve_X9_62_PRIME_239V2, /* not supported */
+    ECCurve_X9_62_PRIME_239V3, /* not supported */
+    /* ECCurve_X9_62_PRIME_256V1 == ECCurve_NIST_P256 */
+
+    /* ANSI X9.62 binary curves */
+    ECCurve_X9_62_CHAR2_PNB163V1, /* not supported */
+    ECCurve_X9_62_CHAR2_PNB163V2, /* not supported */
+    ECCurve_X9_62_CHAR2_PNB163V3, /* not supported */
+    ECCurve_X9_62_CHAR2_PNB176V1, /* not supported */
+    ECCurve_X9_62_CHAR2_TNB191V1, /* not supported */
+    ECCurve_X9_62_CHAR2_TNB191V2, /* not supported */
+    ECCurve_X9_62_CHAR2_TNB191V3, /* not supported */
+    ECCurve_X9_62_CHAR2_PNB208W1, /* not supported */
+    ECCurve_X9_62_CHAR2_TNB239V1, /* not supported */
+    ECCurve_X9_62_CHAR2_TNB239V2, /* not supported */
+    ECCurve_X9_62_CHAR2_TNB239V3, /* not supported */
+    ECCurve_X9_62_CHAR2_PNB272W1, /* not supported */
+    ECCurve_X9_62_CHAR2_PNB304W1, /* not supported */
+    ECCurve_X9_62_CHAR2_TNB359V1, /* not supported */
+    ECCurve_X9_62_CHAR2_PNB368W1, /* not supported */
+    ECCurve_X9_62_CHAR2_TNB431R1, /* not supported */
+
+    /* SEC2 prime curves */
+    ECCurve_SECG_PRIME_112R1, /* not supported */
+    ECCurve_SECG_PRIME_112R2, /* not supported */
+    ECCurve_SECG_PRIME_128R1, /* not supported */
+    ECCurve_SECG_PRIME_128R2, /* not supported */
+    ECCurve_SECG_PRIME_160K1, /* not supported */
+    ECCurve_SECG_PRIME_160R1, /* not supported */
+    ECCurve_SECG_PRIME_160R2, /* not supported */
+    ECCurve_SECG_PRIME_192K1, /* not supported */
+    /* ECCurve_SECG_PRIME_192R1 == ECCurve_NIST_P192 */
+    ECCurve_SECG_PRIME_224K1, /* not supported */
+    /* ECCurve_SECG_PRIME_224R1 == ECCurve_NIST_P224 */
+    ECCurve_SECG_PRIME_256K1, /* not supported */
+    /* ECCurve_SECG_PRIME_256R1 == ECCurve_NIST_P256 */
+    /* ECCurve_SECG_PRIME_384R1 == ECCurve_NIST_P384 */
+    /* ECCurve_SECG_PRIME_521R1 == ECCurve_NIST_P521 */
+
+    /* SEC2 binary curves */
+    ECCurve_SECG_CHAR2_113R1, /* not supported */
+    ECCurve_SECG_CHAR2_113R2, /* not supported */
+    ECCurve_SECG_CHAR2_131R1, /* not supported */
+    ECCurve_SECG_CHAR2_131R2, /* not supported */
+    /* ECCurve_SECG_CHAR2_163K1 == ECCurve_NIST_K163 */
+    ECCurve_SECG_CHAR2_163R1, /* not supported */
+    /* ECCurve_SECG_CHAR2_163R2 == ECCurve_NIST_B163 */
+    ECCurve_SECG_CHAR2_193R1, /* not supported */
+    ECCurve_SECG_CHAR2_193R2, /* not supported */
+    /* ECCurve_SECG_CHAR2_233K1 == ECCurve_NIST_K233 */
+    /* ECCurve_SECG_CHAR2_233R1 == ECCurve_NIST_B233 */
+    ECCurve_SECG_CHAR2_239K1, /* not supported */
+    /* ECCurve_SECG_CHAR2_283K1 == ECCurve_NIST_K283 */
+    /* ECCurve_SECG_CHAR2_283R1 == ECCurve_NIST_B283 */
+    /* ECCurve_SECG_CHAR2_409K1 == ECCurve_NIST_K409 */
+    /* ECCurve_SECG_CHAR2_409R1 == ECCurve_NIST_B409 */
+    /* ECCurve_SECG_CHAR2_571K1 == ECCurve_NIST_K571 */
+    /* ECCurve_SECG_CHAR2_571R1 == ECCurve_NIST_B571 */
+
+    /* WTLS curves */
+    ECCurve_WTLS_1, /* not supported */
+    /* there is no WTLS 2 curve */
+    /* ECCurve_WTLS_3 == ECCurve_NIST_K163 */
+    /* ECCurve_WTLS_4 == ECCurve_SECG_CHAR2_113R1 */
+    /* ECCurve_WTLS_5 == ECCurve_X9_62_CHAR2_PNB163V1 */
+    /* ECCurve_WTLS_6 == ECCurve_SECG_PRIME_112R1 */
+    /* ECCurve_WTLS_7 == ECCurve_SECG_PRIME_160R1 */
+    ECCurve_WTLS_8, /* not supported */
+    ECCurve_WTLS_9, /* not supported */
+    /* ECCurve_WTLS_10 == ECCurve_NIST_K233 */
+    /* ECCurve_WTLS_11 == ECCurve_NIST_B233 */
+    /* ECCurve_WTLS_12 == ECCurve_NIST_P224 */
+
+    ECCurve25519,
+
+    ECCurve_pastLastCurve
+} ECCurveName;
+
+/* Aliased named curves */
+
+#define ECCurve_X9_62_PRIME_192V1 ECCurve_NIST_P192 /* not supported */
+#define ECCurve_X9_62_PRIME_256V1 ECCurve_NIST_P256
+#define ECCurve_SECG_PRIME_192R1 ECCurve_NIST_P192 /* not supported */
+#define ECCurve_SECG_PRIME_224R1 ECCurve_NIST_P224 /* not supported */
+#define ECCurve_SECG_PRIME_256R1 ECCurve_NIST_P256
+#define ECCurve_SECG_PRIME_384R1 ECCurve_NIST_P384
+#define ECCurve_SECG_PRIME_521R1 ECCurve_NIST_P521
+#define ECCurve_SECG_CHAR2_163K1 ECCurve_NIST_K163  /* not supported */
+#define ECCurve_SECG_CHAR2_163R2 ECCurve_NIST_B163  /* not supported */
+#define ECCurve_SECG_CHAR2_233K1 ECCurve_NIST_K233  /* not supported */
+#define ECCurve_SECG_CHAR2_233R1 ECCurve_NIST_B233  /* not supported */
+#define ECCurve_SECG_CHAR2_283K1 ECCurve_NIST_K283  /* not supported */
+#define ECCurve_SECG_CHAR2_283R1 ECCurve_NIST_B283  /* not supported */
+#define ECCurve_SECG_CHAR2_409K1 ECCurve_NIST_K409  /* not supported */
+#define ECCurve_SECG_CHAR2_409R1 ECCurve_NIST_B409  /* not supported */
+#define ECCurve_SECG_CHAR2_571K1 ECCurve_NIST_K571  /* not supported */
+#define ECCurve_SECG_CHAR2_571R1 ECCurve_NIST_B571  /* not supported */
+#define ECCurve_WTLS_3 ECCurve_NIST_K163            /* not supported */
+#define ECCurve_WTLS_4 ECCurve_SECG_CHAR2_113R1     /* not supported */
+#define ECCurve_WTLS_5 ECCurve_X9_62_CHAR2_PNB163V1 /* not supported */
+#define ECCurve_WTLS_6 ECCurve_SECG_PRIME_112R1     /* not supported */
+#define ECCurve_WTLS_7 ECCurve_SECG_PRIME_160R1     /* not supported */
+#define ECCurve_WTLS_10 ECCurve_NIST_K233           /* not supported */
+#define ECCurve_WTLS_11 ECCurve_NIST_B233           /* not supported */
+#define ECCurve_WTLS_12 ECCurve_NIST_P224           /* not supported */
+
+#endif /* __ecl_exp_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl-priv.h b/security/nss/lib/freebl/ecl/ecl-priv.h
new file mode 100644
index 000000000..f43f19327
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-priv.h
@@ -0,0 +1,257 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecl_priv_h_
+#define __ecl_priv_h_
+
+#include "ecl.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "../blapii.h"
+
+/* MAX_FIELD_SIZE_DIGITS is the maximum size of field element supported */
+/* the following needs to go away... */
+#if defined(MP_USE_LONG_LONG_DIGIT) || defined(MP_USE_LONG_DIGIT)
+#define ECL_SIXTY_FOUR_BIT
+#else
+#define ECL_THIRTY_TWO_BIT
+#endif
+
+#define ECL_CURVE_DIGITS(curve_size_in_bits) \
+    (((curve_size_in_bits) + (sizeof(mp_digit) * 8 - 1)) / (sizeof(mp_digit) * 8))
+#define ECL_BITS (sizeof(mp_digit) * 8)
+#define ECL_MAX_FIELD_SIZE_DIGITS (80 / sizeof(mp_digit))
+
+/* Gets the i'th bit in the binary representation of a. If i >= length(a),
+ * then return 0. (The above behaviour differs from mpl_get_bit, which
+ * causes an error if i >= length(a).) */
+#define MP_GET_BIT(a, i) \
+    ((i) >= mpl_significant_bits((a))) ? 0 : mpl_get_bit((a), (i))
+
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+#define MP_ADD_CARRY(a1, a2, s, carry)      \
+    {                                       \
+        mp_word w;                          \
+        w = ((mp_word)carry) + (a1) + (a2); \
+        s = ACCUM(w);                       \
+        carry = CARRYOUT(w);                \
+    }
+
+#define MP_SUB_BORROW(a1, a2, s, borrow)   \
+    {                                      \
+        mp_word w;                         \
+        w = ((mp_word)(a1)) - (a2)-borrow; \
+        s = ACCUM(w);                      \
+        borrow = (w >> MP_DIGIT_BIT) & 1;  \
+    }
+
+#else
+/* NOTE,
+ * carry and borrow are both read and written.
+ * a1 or a2 and s could be the same variable.
+ * don't trash those outputs until their respective inputs have
+ * been read. */
+#define MP_ADD_CARRY(a1, a2, s, carry)           \
+    {                                            \
+        mp_digit tmp, sum;                       \
+        tmp = (a1);                              \
+        sum = tmp + (a2);                        \
+        tmp = (sum < tmp); /* detect overflow */ \
+        s = sum += carry;                        \
+        carry = tmp + (sum < carry);             \
+    }
+
+#define MP_SUB_BORROW(a1, a2, s, borrow)     \
+    {                                        \
+        mp_digit tmp;                        \
+        tmp = (a1);                          \
+        s = tmp - (a2);                      \
+        tmp = (s > tmp); /* detect borrow */ \
+        if (borrow && !s--)                  \
+            tmp++;                           \
+        borrow = tmp;                        \
+    }
+#endif
+
+struct GFMethodStr;
+typedef struct GFMethodStr GFMethod;
+struct GFMethodStr {
+    /* Indicates whether the structure was constructed from dynamic memory
+     * or statically created. */
+    int constructed;
+    /* Irreducible that defines the field. For prime fields, this is the
+     * prime p. For binary polynomial fields, this is the bitstring
+     * representation of the irreducible polynomial. */
+    mp_int irr;
+    /* For prime fields, the value irr_arr[0] is the number of bits in the
+     * field. For binary polynomial fields, the irreducible polynomial
+     * f(t) is represented as an array of unsigned int[], where f(t) is
+     * of the form: f(t) = t^p[0] + t^p[1] + ... + t^p[4] where m = p[0]
+     * > p[1] > ... > p[4] = 0. */
+    unsigned int irr_arr[5];
+    /* Field arithmetic methods. All methods (except field_enc and
+     * field_dec) are assumed to take field-encoded parameters and return
+     * field-encoded values. All methods (except field_enc and field_dec)
+     * are required to be implemented. */
+    mp_err (*field_add)(const mp_int *a, const mp_int *b, mp_int *r,
+                        const GFMethod *meth);
+    mp_err (*field_neg)(const mp_int *a, mp_int *r, const GFMethod *meth);
+    mp_err (*field_sub)(const mp_int *a, const mp_int *b, mp_int *r,
+                        const GFMethod *meth);
+    mp_err (*field_mod)(const mp_int *a, mp_int *r, const GFMethod *meth);
+    mp_err (*field_mul)(const mp_int *a, const mp_int *b, mp_int *r,
+                        const GFMethod *meth);
+    mp_err (*field_sqr)(const mp_int *a, mp_int *r, const GFMethod *meth);
+    mp_err (*field_div)(const mp_int *a, const mp_int *b, mp_int *r,
+                        const GFMethod *meth);
+    mp_err (*field_enc)(const mp_int *a, mp_int *r, const GFMethod *meth);
+    mp_err (*field_dec)(const mp_int *a, mp_int *r, const GFMethod *meth);
+    /* Extra storage for implementation-specific data.  Any memory
+     * allocated to these extra fields will be cleared by extra_free. */
+    void *extra1;
+    void *extra2;
+    void (*extra_free)(GFMethod *meth);
+};
+
+/* Construct generic GFMethods. */
+GFMethod *GFMethod_consGFp(const mp_int *irr);
+GFMethod *GFMethod_consGFp_mont(const mp_int *irr);
+
+/* Free the memory allocated (if any) to a GFMethod object. */
+void GFMethod_free(GFMethod *meth);
+
+struct ECGroupStr {
+    /* Indicates whether the structure was constructed from dynamic memory
+     * or statically created. */
+    int constructed;
+    /* Field definition and arithmetic. */
+    GFMethod *meth;
+    /* Textual representation of curve name, if any. */
+    char *text;
+    /* Curve parameters, field-encoded. */
+    mp_int curvea, curveb;
+    /* x and y coordinates of the base point, field-encoded. */
+    mp_int genx, geny;
+    /* Order and cofactor of the base point. */
+    mp_int order;
+    int cofactor;
+    /* Point arithmetic methods. All methods are assumed to take
+     * field-encoded parameters and return field-encoded values. All
+     * methods (except base_point_mul and points_mul) are required to be
+     * implemented. */
+    mp_err (*point_add)(const mp_int *px, const mp_int *py,
+                        const mp_int *qx, const mp_int *qy, mp_int *rx,
+                        mp_int *ry, const ECGroup *group);
+    mp_err (*point_sub)(const mp_int *px, const mp_int *py,
+                        const mp_int *qx, const mp_int *qy, mp_int *rx,
+                        mp_int *ry, const ECGroup *group);
+    mp_err (*point_dbl)(const mp_int *px, const mp_int *py, mp_int *rx,
+                        mp_int *ry, const ECGroup *group);
+    mp_err (*point_mul)(const mp_int *n, const mp_int *px,
+                        const mp_int *py, mp_int *rx, mp_int *ry,
+                        const ECGroup *group);
+    mp_err (*base_point_mul)(const mp_int *n, mp_int *rx, mp_int *ry,
+                             const ECGroup *group);
+    mp_err (*points_mul)(const mp_int *k1, const mp_int *k2,
+                         const mp_int *px, const mp_int *py, mp_int *rx,
+                         mp_int *ry, const ECGroup *group);
+    mp_err (*validate_point)(const mp_int *px, const mp_int *py, const ECGroup *group);
+    /* Extra storage for implementation-specific data.  Any memory
+     * allocated to these extra fields will be cleared by extra_free. */
+    void *extra1;
+    void *extra2;
+    void (*extra_free)(ECGroup *group);
+};
+
+/* Wrapper functions for generic prime field arithmetic. */
+mp_err ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
+                  const GFMethod *meth);
+mp_err ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
+                  const GFMethod *meth);
+
+/* fixed length in-line adds. Count is in words */
+mp_err ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth);
+mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth);
+mp_err ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth);
+mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth);
+mp_err ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth);
+mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth);
+mp_err ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth);
+mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth);
+
+mp_err ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
+                  const GFMethod *meth);
+mp_err ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
+                  const GFMethod *meth);
+/* Wrapper functions for generic binary polynomial field arithmetic. */
+mp_err ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
+                   const GFMethod *meth);
+mp_err ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
+                   const GFMethod *meth);
+mp_err ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
+                   const GFMethod *meth);
+
+/* Montgomery prime field arithmetic. */
+mp_err ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r,
+                       const GFMethod *meth);
+mp_err ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r,
+                       const GFMethod *meth);
+mp_err ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+void ec_GFp_extra_free_mont(GFMethod *meth);
+
+/* point multiplication */
+mp_err ec_pts_mul_basic(const mp_int *k1, const mp_int *k2,
+                        const mp_int *px, const mp_int *py, mp_int *rx,
+                        mp_int *ry, const ECGroup *group);
+mp_err ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2,
+                           const mp_int *px, const mp_int *py, mp_int *rx,
+                           mp_int *ry, const ECGroup *group);
+
+/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should
+ * be an array of signed char's to output to, bitsize should be the number
+ * of bits of out, in is the original scalar, and w is the window size.
+ * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A.
+ * Menezes, "Software implementation of elliptic curve cryptography over
+ * binary fields", Proc. CHES 2000. */
+mp_err ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in,
+                       int w);
+
+/* Optimized field arithmetic */
+mp_err ec_group_set_gfp192(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp224(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp256(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp384(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp521(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gf2m163(ECGroup *group, ECCurveName name);
+mp_err ec_group_set_gf2m193(ECGroup *group, ECCurveName name);
+mp_err ec_group_set_gf2m233(ECGroup *group, ECCurveName name);
+
+/* Optimized point multiplication */
+mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name);
+
+/* Optimized floating-point arithmetic */
+#ifdef ECL_USE_FP
+mp_err ec_group_set_secp160r1_fp(ECGroup *group);
+mp_err ec_group_set_nistp192_fp(ECGroup *group);
+mp_err ec_group_set_nistp224_fp(ECGroup *group);
+#endif
+
+SECStatus ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p);
+#endif /* __ecl_priv_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl.c b/security/nss/lib/freebl/ecl/ecl.c
new file mode 100644
index 000000000..3540af781
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl.c
@@ -0,0 +1,301 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "ecl.h"
+#include "ecl-priv.h"
+#include "ecp.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* Allocate memory for a new ECGroup object. */
+ECGroup *
+ECGroup_new()
+{
+    mp_err res = MP_OKAY;
+    ECGroup *group;
+    group = (ECGroup *)malloc(sizeof(ECGroup));
+    if (group == NULL)
+        return NULL;
+    group->constructed = MP_YES;
+    group->meth = NULL;
+    group->text = NULL;
+    MP_DIGITS(&group->curvea) = 0;
+    MP_DIGITS(&group->curveb) = 0;
+    MP_DIGITS(&group->genx) = 0;
+    MP_DIGITS(&group->geny) = 0;
+    MP_DIGITS(&group->order) = 0;
+    group->base_point_mul = NULL;
+    group->points_mul = NULL;
+    group->validate_point = NULL;
+    group->extra1 = NULL;
+    group->extra2 = NULL;
+    group->extra_free = NULL;
+    MP_CHECKOK(mp_init(&group->curvea));
+    MP_CHECKOK(mp_init(&group->curveb));
+    MP_CHECKOK(mp_init(&group->genx));
+    MP_CHECKOK(mp_init(&group->geny));
+    MP_CHECKOK(mp_init(&group->order));
+
+CLEANUP:
+    if (res != MP_OKAY) {
+        ECGroup_free(group);
+        return NULL;
+    }
+    return group;
+}
+
+/* Construct a generic ECGroup for elliptic curves over prime fields. */
+ECGroup *
+ECGroup_consGFp(const mp_int *irr, const mp_int *curvea,
+                const mp_int *curveb, const mp_int *genx,
+                const mp_int *geny, const mp_int *order, int cofactor)
+{
+    mp_err res = MP_OKAY;
+    ECGroup *group = NULL;
+
+    group = ECGroup_new();
+    if (group == NULL)
+        return NULL;
+
+    group->meth = GFMethod_consGFp(irr);
+    if (group->meth == NULL) {
+        res = MP_MEM;
+        goto CLEANUP;
+    }
+    MP_CHECKOK(mp_copy(curvea, &group->curvea));
+    MP_CHECKOK(mp_copy(curveb, &group->curveb));
+    MP_CHECKOK(mp_copy(genx, &group->genx));
+    MP_CHECKOK(mp_copy(geny, &group->geny));
+    MP_CHECKOK(mp_copy(order, &group->order));
+    group->cofactor = cofactor;
+    group->point_add = &ec_GFp_pt_add_aff;
+    group->point_sub = &ec_GFp_pt_sub_aff;
+    group->point_dbl = &ec_GFp_pt_dbl_aff;
+    group->point_mul = &ec_GFp_pt_mul_jm_wNAF;
+    group->base_point_mul = NULL;
+    group->points_mul = &ec_GFp_pts_mul_jac;
+    group->validate_point = &ec_GFp_validate_point;
+
+CLEANUP:
+    if (res != MP_OKAY) {
+        ECGroup_free(group);
+        return NULL;
+    }
+    return group;
+}
+
+/* Construct a generic ECGroup for elliptic curves over prime fields with
+ * field arithmetic implemented in Montgomery coordinates. */
+ECGroup *
+ECGroup_consGFp_mont(const mp_int *irr, const mp_int *curvea,
+                     const mp_int *curveb, const mp_int *genx,
+                     const mp_int *geny, const mp_int *order, int cofactor)
+{
+    mp_err res = MP_OKAY;
+    ECGroup *group = NULL;
+
+    group = ECGroup_new();
+    if (group == NULL)
+        return NULL;
+
+    group->meth = GFMethod_consGFp_mont(irr);
+    if (group->meth == NULL) {
+        res = MP_MEM;
+        goto CLEANUP;
+    }
+    MP_CHECKOK(group->meth->field_enc(curvea, &group->curvea, group->meth));
+    MP_CHECKOK(group->meth->field_enc(curveb, &group->curveb, group->meth));
+    MP_CHECKOK(group->meth->field_enc(genx, &group->genx, group->meth));
+    MP_CHECKOK(group->meth->field_enc(geny, &group->geny, group->meth));
+    MP_CHECKOK(mp_copy(order, &group->order));
+    group->cofactor = cofactor;
+    group->point_add = &ec_GFp_pt_add_aff;
+    group->point_sub = &ec_GFp_pt_sub_aff;
+    group->point_dbl = &ec_GFp_pt_dbl_aff;
+    group->point_mul = &ec_GFp_pt_mul_jm_wNAF;
+    group->base_point_mul = NULL;
+    group->points_mul = &ec_GFp_pts_mul_jac;
+    group->validate_point = &ec_GFp_validate_point;
+
+CLEANUP:
+    if (res != MP_OKAY) {
+        ECGroup_free(group);
+        return NULL;
+    }
+    return group;
+}
+
+/* Construct ECGroup from hex parameters and name, if any. Called by
+ * ECGroup_fromHex and ECGroup_fromName. */
+ECGroup *
+ecgroup_fromNameAndHex(const ECCurveName name,
+                       const ECCurveParams *params)
+{
+    mp_int irr, curvea, curveb, genx, geny, order;
+    int bits;
+    ECGroup *group = NULL;
+    mp_err res = MP_OKAY;
+
+    /* initialize values */
+    MP_DIGITS(&irr) = 0;
+    MP_DIGITS(&curvea) = 0;
+    MP_DIGITS(&curveb) = 0;
+    MP_DIGITS(&genx) = 0;
+    MP_DIGITS(&geny) = 0;
+    MP_DIGITS(&order) = 0;
+    MP_CHECKOK(mp_init(&irr));
+    MP_CHECKOK(mp_init(&curvea));
+    MP_CHECKOK(mp_init(&curveb));
+    MP_CHECKOK(mp_init(&genx));
+    MP_CHECKOK(mp_init(&geny));
+    MP_CHECKOK(mp_init(&order));
+    MP_CHECKOK(mp_read_radix(&irr, params->irr, 16));
+    MP_CHECKOK(mp_read_radix(&curvea, params->curvea, 16));
+    MP_CHECKOK(mp_read_radix(&curveb, params->curveb, 16));
+    MP_CHECKOK(mp_read_radix(&genx, params->genx, 16));
+    MP_CHECKOK(mp_read_radix(&geny, params->geny, 16));
+    MP_CHECKOK(mp_read_radix(&order, params->order, 16));
+
+    /* determine number of bits */
+    bits = mpl_significant_bits(&irr) - 1;
+    if (bits < MP_OKAY) {
+        res = bits;
+        goto CLEANUP;
+    }
+
+    /* determine which optimizations (if any) to use */
+    if (params->field == ECField_GFp) {
+        switch (name) {
+            case ECCurve_SECG_PRIME_256R1:
+                group =
+                    ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
+                                    &order, params->cofactor);
+                if (group == NULL) {
+                    res = MP_UNDEF;
+                    goto CLEANUP;
+                }
+                MP_CHECKOK(ec_group_set_gfp256(group, name));
+                MP_CHECKOK(ec_group_set_gfp256_32(group, name));
+                break;
+            case ECCurve_SECG_PRIME_521R1:
+                group =
+                    ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
+                                    &order, params->cofactor);
+                if (group == NULL) {
+                    res = MP_UNDEF;
+                    goto CLEANUP;
+                }
+                MP_CHECKOK(ec_group_set_gfp521(group, name));
+                break;
+            default:
+                /* use generic arithmetic */
+                group =
+                    ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny,
+                                         &order, params->cofactor);
+                if (group == NULL) {
+                    res = MP_UNDEF;
+                    goto CLEANUP;
+                }
+        }
+    } else {
+        res = MP_UNDEF;
+        goto CLEANUP;
+    }
+
+    /* set name, if any */
+    if ((group != NULL) && (params->text != NULL)) {
+        group->text = strdup(params->text);
+        if (group->text == NULL) {
+            res = MP_MEM;
+        }
+    }
+
+CLEANUP:
+    mp_clear(&irr);
+    mp_clear(&curvea);
+    mp_clear(&curveb);
+    mp_clear(&genx);
+    mp_clear(&geny);
+    mp_clear(&order);
+    if (res != MP_OKAY) {
+        ECGroup_free(group);
+        return NULL;
+    }
+    return group;
+}
+
+/* Construct ECGroup from hexadecimal representations of parameters. */
+ECGroup *
+ECGroup_fromHex(const ECCurveParams *params)
+{
+    return ecgroup_fromNameAndHex(ECCurve_noName, params);
+}
+
+/* Construct ECGroup from named parameters. */
+ECGroup *
+ECGroup_fromName(const ECCurveName name)
+{
+    ECGroup *group = NULL;
+    ECCurveParams *params = NULL;
+    mp_err res = MP_OKAY;
+
+    params = EC_GetNamedCurveParams(name);
+    if (params == NULL) {
+        res = MP_UNDEF;
+        goto CLEANUP;
+    }
+
+    /* construct actual group */
+    group = ecgroup_fromNameAndHex(name, params);
+    if (group == NULL) {
+        res = MP_UNDEF;
+        goto CLEANUP;
+    }
+
+CLEANUP:
+    EC_FreeCurveParams(params);
+    if (res != MP_OKAY) {
+        ECGroup_free(group);
+        return NULL;
+    }
+    return group;
+}
+
+/* Validates an EC public key as described in Section 5.2.2 of X9.62. */
+mp_err
+ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py)
+{
+    /* 1: Verify that publicValue is not the point at infinity */
+    /* 2: Verify that the coordinates of publicValue are elements
+     *    of the field.
+     */
+    /* 3: Verify that publicValue is on the curve. */
+    /* 4: Verify that the order of the curve times the publicValue
+     *    is the point at infinity.
+     */
+    return group->validate_point(px, py, group);
+}
+
+/* Free the memory allocated (if any) to an ECGroup object. */
+void
+ECGroup_free(ECGroup *group)
+{
+    if (group == NULL)
+        return;
+    GFMethod_free(group->meth);
+    if (group->constructed == MP_NO)
+        return;
+    mp_clear(&group->curvea);
+    mp_clear(&group->curveb);
+    mp_clear(&group->genx);
+    mp_clear(&group->geny);
+    mp_clear(&group->order);
+    if (group->text != NULL)
+        free(group->text);
+    if (group->extra_free != NULL)
+        group->extra_free(group);
+    free(group);
+}
diff --git a/security/nss/lib/freebl/ecl/ecl.h b/security/nss/lib/freebl/ecl/ecl.h
new file mode 100644
index 000000000..ddcbb1f3a
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl.h
@@ -0,0 +1,60 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Although this is not an exported header file, code which uses elliptic
+ * curve point operations will need to include it. */
+
+#ifndef __ecl_h_
+#define __ecl_h_
+
+#include "blapi.h"
+#include "ecl-exp.h"
+#include "mpi.h"
+
+struct ECGroupStr;
+typedef struct ECGroupStr ECGroup;
+
+/* Construct ECGroup from hexadecimal representations of parameters. */
+ECGroup *ECGroup_fromHex(const ECCurveParams *params);
+
+/* Construct ECGroup from named parameters. */
+ECGroup *ECGroup_fromName(const ECCurveName name);
+
+/* Free an allocated ECGroup. */
+void ECGroup_free(ECGroup *group);
+
+/* Construct ECCurveParams from an ECCurveName */
+ECCurveParams *EC_GetNamedCurveParams(const ECCurveName name);
+
+/* Duplicates an ECCurveParams */
+ECCurveParams *ECCurveParams_dup(const ECCurveParams *params);
+
+/* Free an allocated ECCurveParams */
+void EC_FreeCurveParams(ECCurveParams *params);
+
+/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k * P(x,
+ * y).  If x, y = NULL, then P is assumed to be the generator (base point)
+ * of the group of points on the elliptic curve. Input and output values
+ * are assumed to be NOT field-encoded. */
+mp_err ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px,
+                   const mp_int *py, mp_int *qx, mp_int *qy);
+
+/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Input and output values are assumed to
+ * be NOT field-encoded. */
+mp_err ECPoints_mul(const ECGroup *group, const mp_int *k1,
+                    const mp_int *k2, const mp_int *px, const mp_int *py,
+                    mp_int *qx, mp_int *qy);
+
+/* Validates an EC public key as described in Section 5.2.2 of X9.62.
+ * Returns MP_YES if the public key is valid, MP_NO if the public key
+ * is invalid, or an error code if the validation could not be
+ * performed. */
+mp_err ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py);
+
+SECStatus ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P);
+SECStatus ec_Curve25519_pt_validate(const SECItem *px);
+
+#endif /* __ecl_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl_curve.c b/security/nss/lib/freebl/ecl/ecl_curve.c
new file mode 100644
index 000000000..cf090cfc3
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl_curve.c
@@ -0,0 +1,93 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl.h"
+#include "ecl-curve.h"
+#include "ecl-priv.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define CHECK(func)       \
+    if ((func) == NULL) { \
+        res = 0;          \
+        goto CLEANUP;     \
+    }
+
+/* Duplicates an ECCurveParams */
+ECCurveParams *
+ECCurveParams_dup(const ECCurveParams *params)
+{
+    int res = 1;
+    ECCurveParams *ret = NULL;
+
+    CHECK(ret = (ECCurveParams *)calloc(1, sizeof(ECCurveParams)));
+    if (params->text != NULL) {
+        CHECK(ret->text = strdup(params->text));
+    }
+    ret->field = params->field;
+    ret->size = params->size;
+    if (params->irr != NULL) {
+        CHECK(ret->irr = strdup(params->irr));
+    }
+    if (params->curvea != NULL) {
+        CHECK(ret->curvea = strdup(params->curvea));
+    }
+    if (params->curveb != NULL) {
+        CHECK(ret->curveb = strdup(params->curveb));
+    }
+    if (params->genx != NULL) {
+        CHECK(ret->genx = strdup(params->genx));
+    }
+    if (params->geny != NULL) {
+        CHECK(ret->geny = strdup(params->geny));
+    }
+    if (params->order != NULL) {
+        CHECK(ret->order = strdup(params->order));
+    }
+    ret->cofactor = params->cofactor;
+
+CLEANUP:
+    if (res != 1) {
+        EC_FreeCurveParams(ret);
+        return NULL;
+    }
+    return ret;
+}
+
+#undef CHECK
+
+/* Construct ECCurveParams from an ECCurveName */
+ECCurveParams *
+EC_GetNamedCurveParams(const ECCurveName name)
+{
+    if ((name <= ECCurve_noName) || (ECCurve_pastLastCurve <= name) ||
+        (ecCurve_map[name] == NULL)) {
+        return NULL;
+    } else {
+        return ECCurveParams_dup(ecCurve_map[name]);
+    }
+}
+
+/* Free the memory allocated (if any) to an ECCurveParams object. */
+void
+EC_FreeCurveParams(ECCurveParams *params)
+{
+    if (params == NULL)
+        return;
+    if (params->text != NULL)
+        free(params->text);
+    if (params->irr != NULL)
+        free(params->irr);
+    if (params->curvea != NULL)
+        free(params->curvea);
+    if (params->curveb != NULL)
+        free(params->curveb);
+    if (params->genx != NULL)
+        free(params->genx);
+    if (params->geny != NULL)
+        free(params->geny);
+    if (params->order != NULL)
+        free(params->order);
+    free(params);
+}
diff --git a/security/nss/lib/freebl/ecl/ecl_gf.c b/security/nss/lib/freebl/ecl/ecl_gf.c
new file mode 100644
index 000000000..81b007705
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl_gf.c
@@ -0,0 +1,958 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mp_gf2m.h"
+#include "ecl-priv.h"
+#include "mpi-priv.h"
+#include <stdlib.h>
+
+/* Allocate memory for a new GFMethod object. */
+GFMethod *
+GFMethod_new()
+{
+    mp_err res = MP_OKAY;
+    GFMethod *meth;
+    meth = (GFMethod *)malloc(sizeof(GFMethod));
+    if (meth == NULL)
+        return NULL;
+    meth->constructed = MP_YES;
+    MP_DIGITS(&meth->irr) = 0;
+    meth->extra_free = NULL;
+    MP_CHECKOK(mp_init(&meth->irr));
+
+CLEANUP:
+    if (res != MP_OKAY) {
+        GFMethod_free(meth);
+        return NULL;
+    }
+    return meth;
+}
+
+/* Construct a generic GFMethod for arithmetic over prime fields with
+ * irreducible irr. */
+GFMethod *
+GFMethod_consGFp(const mp_int *irr)
+{
+    mp_err res = MP_OKAY;
+    GFMethod *meth = NULL;
+
+    meth = GFMethod_new();
+    if (meth == NULL)
+        return NULL;
+
+    MP_CHECKOK(mp_copy(irr, &meth->irr));
+    meth->irr_arr[0] = mpl_significant_bits(irr);
+    meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
+        meth->irr_arr[4] = 0;
+    switch (MP_USED(&meth->irr)) {
+        /* maybe we need 1 and 2 words here as well?*/
+        case 3:
+            meth->field_add = &ec_GFp_add_3;
+            meth->field_sub = &ec_GFp_sub_3;
+            break;
+        case 4:
+            meth->field_add = &ec_GFp_add_4;
+            meth->field_sub = &ec_GFp_sub_4;
+            break;
+        case 5:
+            meth->field_add = &ec_GFp_add_5;
+            meth->field_sub = &ec_GFp_sub_5;
+            break;
+        case 6:
+            meth->field_add = &ec_GFp_add_6;
+            meth->field_sub = &ec_GFp_sub_6;
+            break;
+        default:
+            meth->field_add = &ec_GFp_add;
+            meth->field_sub = &ec_GFp_sub;
+    }
+    meth->field_neg = &ec_GFp_neg;
+    meth->field_mod = &ec_GFp_mod;
+    meth->field_mul = &ec_GFp_mul;
+    meth->field_sqr = &ec_GFp_sqr;
+    meth->field_div = &ec_GFp_div;
+    meth->field_enc = NULL;
+    meth->field_dec = NULL;
+    meth->extra1 = NULL;
+    meth->extra2 = NULL;
+    meth->extra_free = NULL;
+
+CLEANUP:
+    if (res != MP_OKAY) {
+        GFMethod_free(meth);
+        return NULL;
+    }
+    return meth;
+}
+
+/* Free the memory allocated (if any) to a GFMethod object. */
+void
+GFMethod_free(GFMethod *meth)
+{
+    if (meth == NULL)
+        return;
+    if (meth->constructed == MP_NO)
+        return;
+    mp_clear(&meth->irr);
+    if (meth->extra_free != NULL)
+        meth->extra_free(meth);
+    free(meth);
+}
+
+/* Wrapper functions for generic prime field arithmetic. */
+
+/* Add two field elements.  Assumes that 0 <= a, b < meth->irr */
+mp_err
+ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
+           const GFMethod *meth)
+{
+    /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */
+    mp_err res;
+
+    if ((res = mp_add(a, b, r)) != MP_OKAY) {
+        return res;
+    }
+    if (mp_cmp(r, &meth->irr) >= 0) {
+        return mp_sub(r, &meth->irr, r);
+    }
+    return res;
+}
+
+/* Negates a field element.  Assumes that 0 <= a < meth->irr */
+mp_err
+ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    /* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */
+
+    if (mp_cmp_z(a) == 0) {
+        mp_zero(r);
+        return MP_OKAY;
+    }
+    return mp_sub(&meth->irr, a, r);
+}
+
+/* Subtracts two field elements.  Assumes that 0 <= a, b < meth->irr */
+mp_err
+ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
+           const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */
+    res = mp_sub(a, b, r);
+    if (res == MP_RANGE) {
+        MP_CHECKOK(mp_sub(b, a, r));
+        if (mp_cmp_z(r) < 0) {
+            MP_CHECKOK(mp_add(r, &meth->irr, r));
+        }
+        MP_CHECKOK(ec_GFp_neg(r, r, meth));
+    }
+    if (mp_cmp_z(r) < 0) {
+        MP_CHECKOK(mp_add(r, &meth->irr, r));
+    }
+CLEANUP:
+    return res;
+}
+/*
+ * Inline adds for small curve lengths.
+ */
+/* 3 words */
+mp_err
+ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
+             const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_digit a0 = 0, a1 = 0, a2 = 0;
+    mp_digit r0 = 0, r1 = 0, r2 = 0;
+    mp_digit carry;
+
+    switch (MP_USED(a)) {
+        case 3:
+            a2 = MP_DIGIT(a, 2);
+        case 2:
+            a1 = MP_DIGIT(a, 1);
+        case 1:
+            a0 = MP_DIGIT(a, 0);
+    }
+    switch (MP_USED(b)) {
+        case 3:
+            r2 = MP_DIGIT(b, 2);
+        case 2:
+            r1 = MP_DIGIT(b, 1);
+        case 1:
+            r0 = MP_DIGIT(b, 0);
+    }
+
+#ifndef MPI_AMD64_ADD
+    carry = 0;
+    MP_ADD_CARRY(a0, r0, r0, carry);
+    MP_ADD_CARRY(a1, r1, r1, carry);
+    MP_ADD_CARRY(a2, r2, r2, carry);
+#else
+    __asm__(
+        "xorq   %3,%3           \n\t"
+        "addq   %4,%0           \n\t"
+        "adcq   %5,%1           \n\t"
+        "adcq   %6,%2           \n\t"
+        "adcq   $0,%3           \n\t"
+        : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
+        : "r"(a0), "r"(a1), "r"(a2),
+          "0"(r0), "1"(r1), "2"(r2)
+        : "%cc");
+#endif
+
+    MP_CHECKOK(s_mp_pad(r, 3));
+    MP_DIGIT(r, 2) = r2;
+    MP_DIGIT(r, 1) = r1;
+    MP_DIGIT(r, 0) = r0;
+    MP_SIGN(r) = MP_ZPOS;
+    MP_USED(r) = 3;
+
+    /* Do quick 'subract' if we've gone over
+     * (add the 2's complement of the curve field) */
+    a2 = MP_DIGIT(&meth->irr, 2);
+    if (carry || r2 > a2 ||
+        ((r2 == a2) && mp_cmp(r, &meth->irr) != MP_LT)) {
+        a1 = MP_DIGIT(&meth->irr, 1);
+        a0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+        carry = 0;
+        MP_SUB_BORROW(r0, a0, r0, carry);
+        MP_SUB_BORROW(r1, a1, r1, carry);
+        MP_SUB_BORROW(r2, a2, r2, carry);
+#else
+        __asm__(
+            "subq   %3,%0           \n\t"
+            "sbbq   %4,%1           \n\t"
+            "sbbq   %5,%2           \n\t"
+            : "=r"(r0), "=r"(r1), "=r"(r2)
+            : "r"(a0), "r"(a1), "r"(a2),
+              "0"(r0), "1"(r1), "2"(r2)
+            : "%cc");
+#endif
+        MP_DIGIT(r, 2) = r2;
+        MP_DIGIT(r, 1) = r1;
+        MP_DIGIT(r, 0) = r0;
+    }
+
+    s_mp_clamp(r);
+
+CLEANUP:
+    return res;
+}
+
+/* 4 words */
+mp_err
+ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
+             const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
+    mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
+    mp_digit carry;
+
+    switch (MP_USED(a)) {
+        case 4:
+            a3 = MP_DIGIT(a, 3);
+        case 3:
+            a2 = MP_DIGIT(a, 2);
+        case 2:
+            a1 = MP_DIGIT(a, 1);
+        case 1:
+            a0 = MP_DIGIT(a, 0);
+    }
+    switch (MP_USED(b)) {
+        case 4:
+            r3 = MP_DIGIT(b, 3);
+        case 3:
+            r2 = MP_DIGIT(b, 2);
+        case 2:
+            r1 = MP_DIGIT(b, 1);
+        case 1:
+            r0 = MP_DIGIT(b, 0);
+    }
+
+#ifndef MPI_AMD64_ADD
+    carry = 0;
+    MP_ADD_CARRY(a0, r0, r0, carry);
+    MP_ADD_CARRY(a1, r1, r1, carry);
+    MP_ADD_CARRY(a2, r2, r2, carry);
+    MP_ADD_CARRY(a3, r3, r3, carry);
+#else
+    __asm__(
+        "xorq   %4,%4           \n\t"
+        "addq   %5,%0           \n\t"
+        "adcq   %6,%1           \n\t"
+        "adcq   %7,%2           \n\t"
+        "adcq   %8,%3           \n\t"
+        "adcq   $0,%4           \n\t"
+        : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
+        : "r"(a0), "r"(a1), "r"(a2), "r"(a3),
+          "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+        : "%cc");
+#endif
+
+    MP_CHECKOK(s_mp_pad(r, 4));
+    MP_DIGIT(r, 3) = r3;
+    MP_DIGIT(r, 2) = r2;
+    MP_DIGIT(r, 1) = r1;
+    MP_DIGIT(r, 0) = r0;
+    MP_SIGN(r) = MP_ZPOS;
+    MP_USED(r) = 4;
+
+    /* Do quick 'subract' if we've gone over
+     * (add the 2's complement of the curve field) */
+    a3 = MP_DIGIT(&meth->irr, 3);
+    if (carry || r3 > a3 ||
+        ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) {
+        a2 = MP_DIGIT(&meth->irr, 2);
+        a1 = MP_DIGIT(&meth->irr, 1);
+        a0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+        carry = 0;
+        MP_SUB_BORROW(r0, a0, r0, carry);
+        MP_SUB_BORROW(r1, a1, r1, carry);
+        MP_SUB_BORROW(r2, a2, r2, carry);
+        MP_SUB_BORROW(r3, a3, r3, carry);
+#else
+        __asm__(
+            "subq   %4,%0           \n\t"
+            "sbbq   %5,%1           \n\t"
+            "sbbq   %6,%2           \n\t"
+            "sbbq   %7,%3           \n\t"
+            : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
+            : "r"(a0), "r"(a1), "r"(a2), "r"(a3),
+              "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+            : "%cc");
+#endif
+        MP_DIGIT(r, 3) = r3;
+        MP_DIGIT(r, 2) = r2;
+        MP_DIGIT(r, 1) = r1;
+        MP_DIGIT(r, 0) = r0;
+    }
+
+    s_mp_clamp(r);
+
+CLEANUP:
+    return res;
+}
+
+/* 5 words */
+mp_err
+ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
+             const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
+    mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
+    mp_digit carry;
+
+    switch (MP_USED(a)) {
+        case 5:
+            a4 = MP_DIGIT(a, 4);
+        case 4:
+            a3 = MP_DIGIT(a, 3);
+        case 3:
+            a2 = MP_DIGIT(a, 2);
+        case 2:
+            a1 = MP_DIGIT(a, 1);
+        case 1:
+            a0 = MP_DIGIT(a, 0);
+    }
+    switch (MP_USED(b)) {
+        case 5:
+            r4 = MP_DIGIT(b, 4);
+        case 4:
+            r3 = MP_DIGIT(b, 3);
+        case 3:
+            r2 = MP_DIGIT(b, 2);
+        case 2:
+            r1 = MP_DIGIT(b, 1);
+        case 1:
+            r0 = MP_DIGIT(b, 0);
+    }
+
+    carry = 0;
+    MP_ADD_CARRY(a0, r0, r0, carry);
+    MP_ADD_CARRY(a1, r1, r1, carry);
+    MP_ADD_CARRY(a2, r2, r2, carry);
+    MP_ADD_CARRY(a3, r3, r3, carry);
+    MP_ADD_CARRY(a4, r4, r4, carry);
+
+    MP_CHECKOK(s_mp_pad(r, 5));
+    MP_DIGIT(r, 4) = r4;
+    MP_DIGIT(r, 3) = r3;
+    MP_DIGIT(r, 2) = r2;
+    MP_DIGIT(r, 1) = r1;
+    MP_DIGIT(r, 0) = r0;
+    MP_SIGN(r) = MP_ZPOS;
+    MP_USED(r) = 5;
+
+    /* Do quick 'subract' if we've gone over
+     * (add the 2's complement of the curve field) */
+    a4 = MP_DIGIT(&meth->irr, 4);
+    if (carry || r4 > a4 ||
+        ((r4 == a4) && mp_cmp(r, &meth->irr) != MP_LT)) {
+        a3 = MP_DIGIT(&meth->irr, 3);
+        a2 = MP_DIGIT(&meth->irr, 2);
+        a1 = MP_DIGIT(&meth->irr, 1);
+        a0 = MP_DIGIT(&meth->irr, 0);
+        carry = 0;
+        MP_SUB_BORROW(r0, a0, r0, carry);
+        MP_SUB_BORROW(r1, a1, r1, carry);
+        MP_SUB_BORROW(r2, a2, r2, carry);
+        MP_SUB_BORROW(r3, a3, r3, carry);
+        MP_SUB_BORROW(r4, a4, r4, carry);
+        MP_DIGIT(r, 4) = r4;
+        MP_DIGIT(r, 3) = r3;
+        MP_DIGIT(r, 2) = r2;
+        MP_DIGIT(r, 1) = r1;
+        MP_DIGIT(r, 0) = r0;
+    }
+
+    s_mp_clamp(r);
+
+CLEANUP:
+    return res;
+}
+
+/* 6 words */
+mp_err
+ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
+             const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
+    mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
+    mp_digit carry;
+
+    switch (MP_USED(a)) {
+        case 6:
+            a5 = MP_DIGIT(a, 5);
+        case 5:
+            a4 = MP_DIGIT(a, 4);
+        case 4:
+            a3 = MP_DIGIT(a, 3);
+        case 3:
+            a2 = MP_DIGIT(a, 2);
+        case 2:
+            a1 = MP_DIGIT(a, 1);
+        case 1:
+            a0 = MP_DIGIT(a, 0);
+    }
+    switch (MP_USED(b)) {
+        case 6:
+            r5 = MP_DIGIT(b, 5);
+        case 5:
+            r4 = MP_DIGIT(b, 4);
+        case 4:
+            r3 = MP_DIGIT(b, 3);
+        case 3:
+            r2 = MP_DIGIT(b, 2);
+        case 2:
+            r1 = MP_DIGIT(b, 1);
+        case 1:
+            r0 = MP_DIGIT(b, 0);
+    }
+
+    carry = 0;
+    MP_ADD_CARRY(a0, r0, r0, carry);
+    MP_ADD_CARRY(a1, r1, r1, carry);
+    MP_ADD_CARRY(a2, r2, r2, carry);
+    MP_ADD_CARRY(a3, r3, r3, carry);
+    MP_ADD_CARRY(a4, r4, r4, carry);
+    MP_ADD_CARRY(a5, r5, r5, carry);
+
+    MP_CHECKOK(s_mp_pad(r, 6));
+    MP_DIGIT(r, 5) = r5;
+    MP_DIGIT(r, 4) = r4;
+    MP_DIGIT(r, 3) = r3;
+    MP_DIGIT(r, 2) = r2;
+    MP_DIGIT(r, 1) = r1;
+    MP_DIGIT(r, 0) = r0;
+    MP_SIGN(r) = MP_ZPOS;
+    MP_USED(r) = 6;
+
+    /* Do quick 'subract' if we've gone over
+     * (add the 2's complement of the curve field) */
+    a5 = MP_DIGIT(&meth->irr, 5);
+    if (carry || r5 > a5 ||
+        ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) {
+        a4 = MP_DIGIT(&meth->irr, 4);
+        a3 = MP_DIGIT(&meth->irr, 3);
+        a2 = MP_DIGIT(&meth->irr, 2);
+        a1 = MP_DIGIT(&meth->irr, 1);
+        a0 = MP_DIGIT(&meth->irr, 0);
+        carry = 0;
+        MP_SUB_BORROW(r0, a0, r0, carry);
+        MP_SUB_BORROW(r1, a1, r1, carry);
+        MP_SUB_BORROW(r2, a2, r2, carry);
+        MP_SUB_BORROW(r3, a3, r3, carry);
+        MP_SUB_BORROW(r4, a4, r4, carry);
+        MP_SUB_BORROW(r5, a5, r5, carry);
+        MP_DIGIT(r, 5) = r5;
+        MP_DIGIT(r, 4) = r4;
+        MP_DIGIT(r, 3) = r3;
+        MP_DIGIT(r, 2) = r2;
+        MP_DIGIT(r, 1) = r1;
+        MP_DIGIT(r, 0) = r0;
+    }
+
+    s_mp_clamp(r);
+
+CLEANUP:
+    return res;
+}
+
+/*
+ * The following subraction functions do in-line subractions based
+ * on our curve size.
+ *
+ * ... 3 words
+ */
+mp_err
+ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
+             const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_digit b0 = 0, b1 = 0, b2 = 0;
+    mp_digit r0 = 0, r1 = 0, r2 = 0;
+    mp_digit borrow;
+
+    switch (MP_USED(a)) {
+        case 3:
+            r2 = MP_DIGIT(a, 2);
+        case 2:
+            r1 = MP_DIGIT(a, 1);
+        case 1:
+            r0 = MP_DIGIT(a, 0);
+    }
+    switch (MP_USED(b)) {
+        case 3:
+            b2 = MP_DIGIT(b, 2);
+        case 2:
+            b1 = MP_DIGIT(b, 1);
+        case 1:
+            b0 = MP_DIGIT(b, 0);
+    }
+
+#ifndef MPI_AMD64_ADD
+    borrow = 0;
+    MP_SUB_BORROW(r0, b0, r0, borrow);
+    MP_SUB_BORROW(r1, b1, r1, borrow);
+    MP_SUB_BORROW(r2, b2, r2, borrow);
+#else
+    __asm__(
+        "xorq   %3,%3           \n\t"
+        "subq   %4,%0           \n\t"
+        "sbbq   %5,%1           \n\t"
+        "sbbq   %6,%2           \n\t"
+        "adcq   $0,%3           \n\t"
+        : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
+        : "r"(b0), "r"(b1), "r"(b2),
+          "0"(r0), "1"(r1), "2"(r2)
+        : "%cc");
+#endif
+
+    /* Do quick 'add' if we've gone under 0
+     * (subtract the 2's complement of the curve field) */
+    if (borrow) {
+        b2 = MP_DIGIT(&meth->irr, 2);
+        b1 = MP_DIGIT(&meth->irr, 1);
+        b0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+        borrow = 0;
+        MP_ADD_CARRY(b0, r0, r0, borrow);
+        MP_ADD_CARRY(b1, r1, r1, borrow);
+        MP_ADD_CARRY(b2, r2, r2, borrow);
+#else
+        __asm__(
+            "addq   %3,%0           \n\t"
+            "adcq   %4,%1           \n\t"
+            "adcq   %5,%2           \n\t"
+            : "=r"(r0), "=r"(r1), "=r"(r2)
+            : "r"(b0), "r"(b1), "r"(b2),
+              "0"(r0), "1"(r1), "2"(r2)
+            : "%cc");
+#endif
+    }
+
+#ifdef MPI_AMD64_ADD
+    /* compiler fakeout? */
+    if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
+        MP_CHECKOK(s_mp_pad(r, 4));
+    }
+#endif
+    MP_CHECKOK(s_mp_pad(r, 3));
+    MP_DIGIT(r, 2) = r2;
+    MP_DIGIT(r, 1) = r1;
+    MP_DIGIT(r, 0) = r0;
+    MP_SIGN(r) = MP_ZPOS;
+    MP_USED(r) = 3;
+    s_mp_clamp(r);
+
+CLEANUP:
+    return res;
+}
+
+/* 4 words */
+mp_err
+ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
+             const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
+    mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
+    mp_digit borrow;
+
+    switch (MP_USED(a)) {
+        case 4:
+            r3 = MP_DIGIT(a, 3);
+        case 3:
+            r2 = MP_DIGIT(a, 2);
+        case 2:
+            r1 = MP_DIGIT(a, 1);
+        case 1:
+            r0 = MP_DIGIT(a, 0);
+    }
+    switch (MP_USED(b)) {
+        case 4:
+            b3 = MP_DIGIT(b, 3);
+        case 3:
+            b2 = MP_DIGIT(b, 2);
+        case 2:
+            b1 = MP_DIGIT(b, 1);
+        case 1:
+            b0 = MP_DIGIT(b, 0);
+    }
+
+#ifndef MPI_AMD64_ADD
+    borrow = 0;
+    MP_SUB_BORROW(r0, b0, r0, borrow);
+    MP_SUB_BORROW(r1, b1, r1, borrow);
+    MP_SUB_BORROW(r2, b2, r2, borrow);
+    MP_SUB_BORROW(r3, b3, r3, borrow);
+#else
+    __asm__(
+        "xorq   %4,%4           \n\t"
+        "subq   %5,%0           \n\t"
+        "sbbq   %6,%1           \n\t"
+        "sbbq   %7,%2           \n\t"
+        "sbbq   %8,%3           \n\t"
+        "adcq   $0,%4           \n\t"
+        : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow)
+        : "r"(b0), "r"(b1), "r"(b2), "r"(b3),
+          "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+        : "%cc");
+#endif
+
+    /* Do quick 'add' if we've gone under 0
+     * (subtract the 2's complement of the curve field) */
+    if (borrow) {
+        b3 = MP_DIGIT(&meth->irr, 3);
+        b2 = MP_DIGIT(&meth->irr, 2);
+        b1 = MP_DIGIT(&meth->irr, 1);
+        b0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+        borrow = 0;
+        MP_ADD_CARRY(b0, r0, r0, borrow);
+        MP_ADD_CARRY(b1, r1, r1, borrow);
+        MP_ADD_CARRY(b2, r2, r2, borrow);
+        MP_ADD_CARRY(b3, r3, r3, borrow);
+#else
+        __asm__(
+            "addq   %4,%0           \n\t"
+            "adcq   %5,%1           \n\t"
+            "adcq   %6,%2           \n\t"
+            "adcq   %7,%3           \n\t"
+            : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
+            : "r"(b0), "r"(b1), "r"(b2), "r"(b3),
+              "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+            : "%cc");
+#endif
+    }
+#ifdef MPI_AMD64_ADD
+    /* compiler fakeout? */
+    if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
+        MP_CHECKOK(s_mp_pad(r, 4));
+    }
+#endif
+    MP_CHECKOK(s_mp_pad(r, 4));
+    MP_DIGIT(r, 3) = r3;
+    MP_DIGIT(r, 2) = r2;
+    MP_DIGIT(r, 1) = r1;
+    MP_DIGIT(r, 0) = r0;
+    MP_SIGN(r) = MP_ZPOS;
+    MP_USED(r) = 4;
+    s_mp_clamp(r);
+
+CLEANUP:
+    return res;
+}
+
+/* 5 words */
+mp_err
+ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
+             const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
+    mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
+    mp_digit borrow;
+
+    switch (MP_USED(a)) {
+        case 5:
+            r4 = MP_DIGIT(a, 4);
+        case 4:
+            r3 = MP_DIGIT(a, 3);
+        case 3:
+            r2 = MP_DIGIT(a, 2);
+        case 2:
+            r1 = MP_DIGIT(a, 1);
+        case 1:
+            r0 = MP_DIGIT(a, 0);
+    }
+    switch (MP_USED(b)) {
+        case 5:
+            b4 = MP_DIGIT(b, 4);
+        case 4:
+            b3 = MP_DIGIT(b, 3);
+        case 3:
+            b2 = MP_DIGIT(b, 2);
+        case 2:
+            b1 = MP_DIGIT(b, 1);
+        case 1:
+            b0 = MP_DIGIT(b, 0);
+    }
+
+    borrow = 0;
+    MP_SUB_BORROW(r0, b0, r0, borrow);
+    MP_SUB_BORROW(r1, b1, r1, borrow);
+    MP_SUB_BORROW(r2, b2, r2, borrow);
+    MP_SUB_BORROW(r3, b3, r3, borrow);
+    MP_SUB_BORROW(r4, b4, r4, borrow);
+
+    /* Do quick 'add' if we've gone under 0
+     * (subtract the 2's complement of the curve field) */
+    if (borrow) {
+        b4 = MP_DIGIT(&meth->irr, 4);
+        b3 = MP_DIGIT(&meth->irr, 3);
+        b2 = MP_DIGIT(&meth->irr, 2);
+        b1 = MP_DIGIT(&meth->irr, 1);
+        b0 = MP_DIGIT(&meth->irr, 0);
+        borrow = 0;
+        MP_ADD_CARRY(b0, r0, r0, borrow);
+        MP_ADD_CARRY(b1, r1, r1, borrow);
+        MP_ADD_CARRY(b2, r2, r2, borrow);
+        MP_ADD_CARRY(b3, r3, r3, borrow);
+        MP_ADD_CARRY(b4, r4, r4, borrow);
+    }
+    MP_CHECKOK(s_mp_pad(r, 5));
+    MP_DIGIT(r, 4) = r4;
+    MP_DIGIT(r, 3) = r3;
+    MP_DIGIT(r, 2) = r2;
+    MP_DIGIT(r, 1) = r1;
+    MP_DIGIT(r, 0) = r0;
+    MP_SIGN(r) = MP_ZPOS;
+    MP_USED(r) = 5;
+    s_mp_clamp(r);
+
+CLEANUP:
+    return res;
+}
+
+/* 6 words */
+mp_err
+ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
+             const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
+    mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
+    mp_digit borrow;
+
+    switch (MP_USED(a)) {
+        case 6:
+            r5 = MP_DIGIT(a, 5);
+        case 5:
+            r4 = MP_DIGIT(a, 4);
+        case 4:
+            r3 = MP_DIGIT(a, 3);
+        case 3:
+            r2 = MP_DIGIT(a, 2);
+        case 2:
+            r1 = MP_DIGIT(a, 1);
+        case 1:
+            r0 = MP_DIGIT(a, 0);
+    }
+    switch (MP_USED(b)) {
+        case 6:
+            b5 = MP_DIGIT(b, 5);
+        case 5:
+            b4 = MP_DIGIT(b, 4);
+        case 4:
+            b3 = MP_DIGIT(b, 3);
+        case 3:
+            b2 = MP_DIGIT(b, 2);
+        case 2:
+            b1 = MP_DIGIT(b, 1);
+        case 1:
+            b0 = MP_DIGIT(b, 0);
+    }
+
+    borrow = 0;
+    MP_SUB_BORROW(r0, b0, r0, borrow);
+    MP_SUB_BORROW(r1, b1, r1, borrow);
+    MP_SUB_BORROW(r2, b2, r2, borrow);
+    MP_SUB_BORROW(r3, b3, r3, borrow);
+    MP_SUB_BORROW(r4, b4, r4, borrow);
+    MP_SUB_BORROW(r5, b5, r5, borrow);
+
+    /* Do quick 'add' if we've gone under 0
+     * (subtract the 2's complement of the curve field) */
+    if (borrow) {
+        b5 = MP_DIGIT(&meth->irr, 5);
+        b4 = MP_DIGIT(&meth->irr, 4);
+        b3 = MP_DIGIT(&meth->irr, 3);
+        b2 = MP_DIGIT(&meth->irr, 2);
+        b1 = MP_DIGIT(&meth->irr, 1);
+        b0 = MP_DIGIT(&meth->irr, 0);
+        borrow = 0;
+        MP_ADD_CARRY(b0, r0, r0, borrow);
+        MP_ADD_CARRY(b1, r1, r1, borrow);
+        MP_ADD_CARRY(b2, r2, r2, borrow);
+        MP_ADD_CARRY(b3, r3, r3, borrow);
+        MP_ADD_CARRY(b4, r4, r4, borrow);
+        MP_ADD_CARRY(b5, r5, r5, borrow);
+    }
+
+    MP_CHECKOK(s_mp_pad(r, 6));
+    MP_DIGIT(r, 5) = r5;
+    MP_DIGIT(r, 4) = r4;
+    MP_DIGIT(r, 3) = r3;
+    MP_DIGIT(r, 2) = r2;
+    MP_DIGIT(r, 1) = r1;
+    MP_DIGIT(r, 0) = r0;
+    MP_SIGN(r) = MP_ZPOS;
+    MP_USED(r) = 6;
+    s_mp_clamp(r);
+
+CLEANUP:
+    return res;
+}
+
+/* Reduces an integer to a field element. */
+mp_err
+ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    return mp_mod(a, &meth->irr, r);
+}
+
+/* Multiplies two field elements. */
+mp_err
+ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
+           const GFMethod *meth)
+{
+    return mp_mulmod(a, b, &meth->irr, r);
+}
+
+/* Squares a field element. */
+mp_err
+ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    return mp_sqrmod(a, &meth->irr, r);
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+mp_err
+ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
+           const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_int t;
+
+    /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+    if (a == NULL) {
+        return mp_invmod(b, &meth->irr, r);
+    } else {
+        /* MPI doesn't support divmod, so we implement it using invmod and
+         * mulmod. */
+        MP_CHECKOK(mp_init(&t));
+        MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
+        MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r));
+    CLEANUP:
+        mp_clear(&t);
+        return res;
+    }
+}
+
+/* Wrapper functions for generic binary polynomial field arithmetic. */
+
+/* Adds two field elements. */
+mp_err
+ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
+            const GFMethod *meth)
+{
+    return mp_badd(a, b, r);
+}
+
+/* Negates a field element. Note that for binary polynomial fields, the
+ * negation of a field element is the field element itself. */
+mp_err
+ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    if (a == r) {
+        return MP_OKAY;
+    } else {
+        return mp_copy(a, r);
+    }
+}
+
+/* Reduces a binary polynomial to a field element. */
+mp_err
+ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    return mp_bmod(a, meth->irr_arr, r);
+}
+
+/* Multiplies two field elements. */
+mp_err
+ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
+            const GFMethod *meth)
+{
+    return mp_bmulmod(a, b, meth->irr_arr, r);
+}
+
+/* Squares a field element. */
+mp_err
+ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    return mp_bsqrmod(a, meth->irr_arr, r);
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+mp_err
+ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
+            const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_int t;
+
+    /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+    if (a == NULL) {
+        /* The GF(2^m) portion of MPI doesn't support invmod, so we
+         * compute 1/b. */
+        MP_CHECKOK(mp_init(&t));
+        MP_CHECKOK(mp_set_int(&t, 1));
+        MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r));
+    CLEANUP:
+        mp_clear(&t);
+        return res;
+    } else {
+        return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r);
+    }
+}
diff --git a/security/nss/lib/freebl/ecl/ecl_mult.c b/security/nss/lib/freebl/ecl/ecl_mult.c
new file mode 100644
index 000000000..ffbcbf1d9
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl_mult.c
@@ -0,0 +1,305 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "ecl.h"
+#include "ecl-priv.h"
+#include <stdlib.h>
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k * P(x,
+ * y).  If x, y = NULL, then P is assumed to be the generator (base point)
+ * of the group of points on the elliptic curve. Input and output values
+ * are assumed to be NOT field-encoded. */
+mp_err
+ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px,
+            const mp_int *py, mp_int *rx, mp_int *ry)
+{
+    mp_err res = MP_OKAY;
+    mp_int kt;
+
+    ARGCHK((k != NULL) && (group != NULL), MP_BADARG);
+    MP_DIGITS(&kt) = 0;
+
+    /* want scalar to be less than or equal to group order */
+    if (mp_cmp(k, &group->order) > 0) {
+        MP_CHECKOK(mp_init(&kt));
+        MP_CHECKOK(mp_mod(k, &group->order, &kt));
+    } else {
+        MP_SIGN(&kt) = MP_ZPOS;
+        MP_USED(&kt) = MP_USED(k);
+        MP_ALLOC(&kt) = MP_ALLOC(k);
+        MP_DIGITS(&kt) = MP_DIGITS(k);
+    }
+
+    if ((px == NULL) || (py == NULL)) {
+        if (group->base_point_mul) {
+            MP_CHECKOK(group->base_point_mul(&kt, rx, ry, group));
+        } else {
+            MP_CHECKOK(group->point_mul(&kt, &group->genx, &group->geny, rx, ry,
+                                        group));
+        }
+    } else {
+        if (group->meth->field_enc) {
+            MP_CHECKOK(group->meth->field_enc(px, rx, group->meth));
+            MP_CHECKOK(group->meth->field_enc(py, ry, group->meth));
+            MP_CHECKOK(group->point_mul(&kt, rx, ry, rx, ry, group));
+        } else {
+            MP_CHECKOK(group->point_mul(&kt, px, py, rx, ry, group));
+        }
+    }
+    if (group->meth->field_dec) {
+        MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+        MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+    }
+
+CLEANUP:
+    if (MP_DIGITS(&kt) != MP_DIGITS(k)) {
+        mp_clear(&kt);
+    }
+    return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. */
+mp_err
+ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, const mp_int *px,
+                 const mp_int *py, mp_int *rx, mp_int *ry,
+                 const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int sx, sy;
+
+    ARGCHK(group != NULL, MP_BADARG);
+    ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+    /* if some arguments are not defined used ECPoint_mul */
+    if (k1 == NULL) {
+        return ECPoint_mul(group, k2, px, py, rx, ry);
+    } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+        return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+    }
+
+    MP_DIGITS(&sx) = 0;
+    MP_DIGITS(&sy) = 0;
+    MP_CHECKOK(mp_init(&sx));
+    MP_CHECKOK(mp_init(&sy));
+
+    MP_CHECKOK(ECPoint_mul(group, k1, NULL, NULL, &sx, &sy));
+    MP_CHECKOK(ECPoint_mul(group, k2, px, py, rx, ry));
+
+    if (group->meth->field_enc) {
+        MP_CHECKOK(group->meth->field_enc(&sx, &sx, group->meth));
+        MP_CHECKOK(group->meth->field_enc(&sy, &sy, group->meth));
+        MP_CHECKOK(group->meth->field_enc(rx, rx, group->meth));
+        MP_CHECKOK(group->meth->field_enc(ry, ry, group->meth));
+    }
+
+    MP_CHECKOK(group->point_add(&sx, &sy, rx, ry, rx, ry, group));
+
+    if (group->meth->field_dec) {
+        MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+        MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+    }
+
+CLEANUP:
+    mp_clear(&sx);
+    mp_clear(&sy);
+    return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. Uses
+ * algorithm 15 (simultaneous multiple point multiplication) from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST
+ * Elliptic Curves over Prime Fields. */
+mp_err
+ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, const mp_int *px,
+                    const mp_int *py, mp_int *rx, mp_int *ry,
+                    const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int precomp[4][4][2];
+    const mp_int *a, *b;
+    unsigned int i, j;
+    int ai, bi, d;
+
+    ARGCHK(group != NULL, MP_BADARG);
+    ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+    /* if some arguments are not defined used ECPoint_mul */
+    if (k1 == NULL) {
+        return ECPoint_mul(group, k2, px, py, rx, ry);
+    } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+        return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+    }
+
+    /* initialize precomputation table */
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 4; j++) {
+            MP_DIGITS(&precomp[i][j][0]) = 0;
+            MP_DIGITS(&precomp[i][j][1]) = 0;
+        }
+    }
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 4; j++) {
+            MP_CHECKOK(mp_init_size(&precomp[i][j][0],
+                                    ECL_MAX_FIELD_SIZE_DIGITS));
+            MP_CHECKOK(mp_init_size(&precomp[i][j][1],
+                                    ECL_MAX_FIELD_SIZE_DIGITS));
+        }
+    }
+
+    /* fill precomputation table */
+    /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */
+    if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) {
+        a = k2;
+        b = k1;
+        if (group->meth->field_enc) {
+            MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth));
+            MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth));
+        } else {
+            MP_CHECKOK(mp_copy(px, &precomp[1][0][0]));
+            MP_CHECKOK(mp_copy(py, &precomp[1][0][1]));
+        }
+        MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0]));
+        MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1]));
+    } else {
+        a = k1;
+        b = k2;
+        MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0]));
+        MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1]));
+        if (group->meth->field_enc) {
+            MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth));
+            MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth));
+        } else {
+            MP_CHECKOK(mp_copy(px, &precomp[0][1][0]));
+            MP_CHECKOK(mp_copy(py, &precomp[0][1][1]));
+        }
+    }
+    /* precompute [*][0][*] */
+    mp_zero(&precomp[0][0][0]);
+    mp_zero(&precomp[0][0][1]);
+    MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1],
+                                &precomp[2][0][0], &precomp[2][0][1], group));
+    MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1],
+                                &precomp[2][0][0], &precomp[2][0][1],
+                                &precomp[3][0][0], &precomp[3][0][1], group));
+    /* precompute [*][1][*] */
+    for (i = 1; i < 4; i++) {
+        MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+                                    &precomp[i][0][0], &precomp[i][0][1],
+                                    &precomp[i][1][0], &precomp[i][1][1], group));
+    }
+    /* precompute [*][2][*] */
+    MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1],
+                                &precomp[0][2][0], &precomp[0][2][1], group));
+    for (i = 1; i < 4; i++) {
+        MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1],
+                                    &precomp[i][0][0], &precomp[i][0][1],
+                                    &precomp[i][2][0], &precomp[i][2][1], group));
+    }
+    /* precompute [*][3][*] */
+    MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+                                &precomp[0][2][0], &precomp[0][2][1],
+                                &precomp[0][3][0], &precomp[0][3][1], group));
+    for (i = 1; i < 4; i++) {
+        MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1],
+                                    &precomp[i][0][0], &precomp[i][0][1],
+                                    &precomp[i][3][0], &precomp[i][3][1], group));
+    }
+
+    d = (mpl_significant_bits(a) + 1) / 2;
+
+    /* R = inf */
+    mp_zero(rx);
+    mp_zero(ry);
+
+    for (i = d; i-- > 0;) {
+        ai = MP_GET_BIT(a, 2 * i + 1);
+        ai <<= 1;
+        ai |= MP_GET_BIT(a, 2 * i);
+        bi = MP_GET_BIT(b, 2 * i + 1);
+        bi <<= 1;
+        bi |= MP_GET_BIT(b, 2 * i);
+        /* R = 2^2 * R */
+        MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group));
+        MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group));
+        /* R = R + (ai * A + bi * B) */
+        MP_CHECKOK(group->point_add(rx, ry, &precomp[ai][bi][0],
+                                    &precomp[ai][bi][1], rx, ry, group));
+    }
+
+    if (group->meth->field_dec) {
+        MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+        MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+    }
+
+CLEANUP:
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 4; j++) {
+            mp_clear(&precomp[i][j][0]);
+            mp_clear(&precomp[i][j][1]);
+        }
+    }
+    return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. */
+mp_err
+ECPoints_mul(const ECGroup *group, const mp_int *k1, const mp_int *k2,
+             const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry)
+{
+    mp_err res = MP_OKAY;
+    mp_int k1t, k2t;
+    const mp_int *k1p, *k2p;
+
+    MP_DIGITS(&k1t) = 0;
+    MP_DIGITS(&k2t) = 0;
+
+    ARGCHK(group != NULL, MP_BADARG);
+
+    /* want scalar to be less than or equal to group order */
+    if (k1 != NULL) {
+        if (mp_cmp(k1, &group->order) >= 0) {
+            MP_CHECKOK(mp_init(&k1t));
+            MP_CHECKOK(mp_mod(k1, &group->order, &k1t));
+            k1p = &k1t;
+        } else {
+            k1p = k1;
+        }
+    } else {
+        k1p = k1;
+    }
+    if (k2 != NULL) {
+        if (mp_cmp(k2, &group->order) >= 0) {
+            MP_CHECKOK(mp_init(&k2t));
+            MP_CHECKOK(mp_mod(k2, &group->order, &k2t));
+            k2p = &k2t;
+        } else {
+            k2p = k2;
+        }
+    } else {
+        k2p = k2;
+    }
+
+    /* if points_mul is defined, then use it */
+    if (group->points_mul) {
+        res = group->points_mul(k1p, k2p, px, py, rx, ry, group);
+    } else {
+        res = ec_pts_mul_simul_w2(k1p, k2p, px, py, rx, ry, group);
+    }
+
+CLEANUP:
+    mp_clear(&k1t);
+    mp_clear(&k2t);
+    return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp.h b/security/nss/lib/freebl/ecl/ecp.h
new file mode 100644
index 000000000..7e54e4e07
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp.h
@@ -0,0 +1,106 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecp_h_
+#define __ecp_h_
+
+#include "ecl-priv.h"
+
+/* Checks if point P(px, py) is at infinity.  Uses affine coordinates. */
+mp_err ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py);
+
+/* Sets P(px, py) to be the point at infinity.  Uses affine coordinates. */
+mp_err ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py);
+
+/* Computes R = P + Q where R is (rx, ry), P is (px, py) and Q is (qx,
+ * qy). Uses affine coordinates. */
+mp_err ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py,
+                         const mp_int *qx, const mp_int *qy, mp_int *rx,
+                         mp_int *ry, const ECGroup *group);
+
+/* Computes R = P - Q.  Uses affine coordinates. */
+mp_err ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py,
+                         const mp_int *qx, const mp_int *qy, mp_int *rx,
+                         mp_int *ry, const ECGroup *group);
+
+/* Computes R = 2P.  Uses affine coordinates. */
+mp_err ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx,
+                         mp_int *ry, const ECGroup *group);
+
+/* Validates a point on a GFp curve. */
+mp_err ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group);
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp.  Uses affine coordinates. */
+mp_err ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px,
+                         const mp_int *py, mp_int *rx, mp_int *ry,
+                         const ECGroup *group);
+#endif
+
+/* Converts a point P(px, py) from affine coordinates to Jacobian
+ * projective coordinates R(rx, ry, rz). */
+mp_err ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx,
+                         mp_int *ry, mp_int *rz, const ECGroup *group);
+
+/* Converts a point P(px, py, pz) from Jacobian projective coordinates to
+ * affine coordinates R(rx, ry). */
+mp_err ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py,
+                         const mp_int *pz, mp_int *rx, mp_int *ry,
+                         const ECGroup *group);
+
+/* Checks if point P(px, py, pz) is at infinity.  Uses Jacobian
+ * coordinates. */
+mp_err ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py,
+                            const mp_int *pz);
+
+/* Sets P(px, py, pz) to be the point at infinity.  Uses Jacobian
+ * coordinates. */
+mp_err ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz);
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, qz).  Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py,
+                             const mp_int *pz, const mp_int *qx,
+                             const mp_int *qy, mp_int *rx, mp_int *ry,
+                             mp_int *rz, const ECGroup *group);
+
+/* Computes R = 2P.  Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py,
+                         const mp_int *pz, mp_int *rx, mp_int *ry,
+                         mp_int *rz, const ECGroup *group);
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_JAC
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp.  Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px,
+                         const mp_int *py, mp_int *rx, mp_int *ry,
+                         const ECGroup *group);
+#endif
+
+/* Computes R(x, y) = k1 * G + k2 * P(x, y), where G is the generator
+ * (base point) of the group of points on the elliptic curve. Allows k1 =
+ * NULL or { k2, P } = NULL.  Implemented using mixed Jacobian-affine
+ * coordinates. Input and output values are assumed to be NOT
+ * field-encoded and are in affine form. */
+mp_err
+ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px,
+                   const mp_int *py, mp_int *rx, mp_int *ry,
+                   const ECGroup *group);
+
+/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic
+ * curve points P and R can be identical. Uses mixed Modified-Jacobian
+ * co-ordinates for doubling and Chudnovsky Jacobian coordinates for
+ * additions. Assumes input is already field-encoded using field_enc, and
+ * returns output that is still field-encoded. Uses 5-bit window NAF
+ * method (algorithm 11) for scalar-point multiplication from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic
+ * Curves Over Prime Fields. */
+mp_err
+ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py,
+                      mp_int *rx, mp_int *ry, const ECGroup *group);
+
+#endif /* __ecp_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecp_25519.c b/security/nss/lib/freebl/ecl/ecp_25519.c
new file mode 100644
index 000000000..a8d41520e
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_25519.c
@@ -0,0 +1,120 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* curve 25519 https://www.rfc-editor.org/rfc/rfc7748.txt */
+
+#ifdef FREEBL_NO_DEPEND
+#include "../stubs.h"
+#endif
+
+#include "ecl-priv.h"
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "secport.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+/*
+ * point validation is not necessary in general. But this checks a point (px)
+ * against some known bad values.
+ */
+SECStatus
+ec_Curve25519_pt_validate(const SECItem *px)
+{
+    PRUint8 *p;
+    int i;
+    PRUint8 forbiddenValues[12][32] = {
+        { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+        { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+        { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+          0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+          0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+          0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 },
+        { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
+          0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
+          0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
+          0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 },
+        { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+        { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+        { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+        { 0xcd, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+          0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+          0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+          0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 },
+        { 0x4c, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
+          0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
+          0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
+          0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 },
+        { 0xd9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+        { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+        { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+    };
+
+    /* The point must not be longer than 32 (it can be smaller). */
+    if (px->len <= 32) {
+        p = px->data;
+    } else {
+        return SECFailure;
+    }
+
+    for (i = 0; i < PR_ARRAY_SIZE(forbiddenValues); ++i) {
+        if (NSS_SecureMemcmp(p, forbiddenValues[i], px->len) == 0) {
+            return SECFailure;
+        }
+    }
+
+    return SECSuccess;
+}
+
+/*
+ * Scalar multiplication for Curve25519.
+ * If P == NULL, the base point is used.
+ * Returns X = k*P
+ */
+SECStatus
+ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P)
+{
+    PRUint8 *px;
+    PRUint8 basePoint[32] = { 9 };
+
+    if (!P) {
+        px = basePoint;
+    } else {
+        PORT_Assert(P->len == 32);
+        if (P->len != 32) {
+            return SECFailure;
+        }
+        px = P->data;
+    }
+
+    return ec_Curve25519_mul(X->data, k->data, px);
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_256.c b/security/nss/lib/freebl/ecl/ecp_256.c
new file mode 100644
index 000000000..ad4e630c1
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_256.c
@@ -0,0 +1,401 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1.  a can be r.
+ * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_size a_used = MP_USED(a);
+    int a_bits = mpl_significant_bits(a);
+    mp_digit carry;
+
+#ifdef ECL_THIRTY_TWO_BIT
+    mp_digit a8 = 0, a9 = 0, a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0, a15 = 0;
+    mp_digit r0, r1, r2, r3, r4, r5, r6, r7;
+    int r8; /* must be a signed value ! */
+#else
+    mp_digit a4 = 0, a5 = 0, a6 = 0, a7 = 0;
+    mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l;
+    mp_digit r0, r1, r2, r3;
+    int r4; /* must be a signed value ! */
+#endif
+    /* for polynomials larger than twice the field size
+     * use regular reduction */
+    if (a_bits < 256) {
+        if (a == r)
+            return MP_OKAY;
+        return mp_copy(a, r);
+    }
+    if (a_bits > 512) {
+        MP_CHECKOK(mp_mod(a, &meth->irr, r));
+    } else {
+
+#ifdef ECL_THIRTY_TWO_BIT
+        switch (a_used) {
+            case 16:
+                a15 = MP_DIGIT(a, 15);
+            case 15:
+                a14 = MP_DIGIT(a, 14);
+            case 14:
+                a13 = MP_DIGIT(a, 13);
+            case 13:
+                a12 = MP_DIGIT(a, 12);
+            case 12:
+                a11 = MP_DIGIT(a, 11);
+            case 11:
+                a10 = MP_DIGIT(a, 10);
+            case 10:
+                a9 = MP_DIGIT(a, 9);
+            case 9:
+                a8 = MP_DIGIT(a, 8);
+        }
+
+        r0 = MP_DIGIT(a, 0);
+        r1 = MP_DIGIT(a, 1);
+        r2 = MP_DIGIT(a, 2);
+        r3 = MP_DIGIT(a, 3);
+        r4 = MP_DIGIT(a, 4);
+        r5 = MP_DIGIT(a, 5);
+        r6 = MP_DIGIT(a, 6);
+        r7 = MP_DIGIT(a, 7);
+
+        /* sum 1 */
+        carry = 0;
+        MP_ADD_CARRY(r3, a11, r3, carry);
+        MP_ADD_CARRY(r4, a12, r4, carry);
+        MP_ADD_CARRY(r5, a13, r5, carry);
+        MP_ADD_CARRY(r6, a14, r6, carry);
+        MP_ADD_CARRY(r7, a15, r7, carry);
+        r8 = carry;
+        carry = 0;
+        MP_ADD_CARRY(r3, a11, r3, carry);
+        MP_ADD_CARRY(r4, a12, r4, carry);
+        MP_ADD_CARRY(r5, a13, r5, carry);
+        MP_ADD_CARRY(r6, a14, r6, carry);
+        MP_ADD_CARRY(r7, a15, r7, carry);
+        r8 += carry;
+        carry = 0;
+        /* sum 2 */
+        MP_ADD_CARRY(r3, a12, r3, carry);
+        MP_ADD_CARRY(r4, a13, r4, carry);
+        MP_ADD_CARRY(r5, a14, r5, carry);
+        MP_ADD_CARRY(r6, a15, r6, carry);
+        MP_ADD_CARRY(r7, 0, r7, carry);
+        r8 += carry;
+        carry = 0;
+        /* combine last bottom of sum 3 with second sum 2 */
+        MP_ADD_CARRY(r0, a8, r0, carry);
+        MP_ADD_CARRY(r1, a9, r1, carry);
+        MP_ADD_CARRY(r2, a10, r2, carry);
+        MP_ADD_CARRY(r3, a12, r3, carry);
+        MP_ADD_CARRY(r4, a13, r4, carry);
+        MP_ADD_CARRY(r5, a14, r5, carry);
+        MP_ADD_CARRY(r6, a15, r6, carry);
+        MP_ADD_CARRY(r7, a15, r7, carry); /* from sum 3 */
+        r8 += carry;
+        carry = 0;
+        /* sum 3 (rest of it)*/
+        MP_ADD_CARRY(r6, a14, r6, carry);
+        MP_ADD_CARRY(r7, 0, r7, carry);
+        r8 += carry;
+        carry = 0;
+        /* sum 4 (rest of it)*/
+        MP_ADD_CARRY(r0, a9, r0, carry);
+        MP_ADD_CARRY(r1, a10, r1, carry);
+        MP_ADD_CARRY(r2, a11, r2, carry);
+        MP_ADD_CARRY(r3, a13, r3, carry);
+        MP_ADD_CARRY(r4, a14, r4, carry);
+        MP_ADD_CARRY(r5, a15, r5, carry);
+        MP_ADD_CARRY(r6, a13, r6, carry);
+        MP_ADD_CARRY(r7, a8, r7, carry);
+        r8 += carry;
+        carry = 0;
+        /* diff 5 */
+        MP_SUB_BORROW(r0, a11, r0, carry);
+        MP_SUB_BORROW(r1, a12, r1, carry);
+        MP_SUB_BORROW(r2, a13, r2, carry);
+        MP_SUB_BORROW(r3, 0, r3, carry);
+        MP_SUB_BORROW(r4, 0, r4, carry);
+        MP_SUB_BORROW(r5, 0, r5, carry);
+        MP_SUB_BORROW(r6, a8, r6, carry);
+        MP_SUB_BORROW(r7, a10, r7, carry);
+        r8 -= carry;
+        carry = 0;
+        /* diff 6 */
+        MP_SUB_BORROW(r0, a12, r0, carry);
+        MP_SUB_BORROW(r1, a13, r1, carry);
+        MP_SUB_BORROW(r2, a14, r2, carry);
+        MP_SUB_BORROW(r3, a15, r3, carry);
+        MP_SUB_BORROW(r4, 0, r4, carry);
+        MP_SUB_BORROW(r5, 0, r5, carry);
+        MP_SUB_BORROW(r6, a9, r6, carry);
+        MP_SUB_BORROW(r7, a11, r7, carry);
+        r8 -= carry;
+        carry = 0;
+        /* diff 7 */
+        MP_SUB_BORROW(r0, a13, r0, carry);
+        MP_SUB_BORROW(r1, a14, r1, carry);
+        MP_SUB_BORROW(r2, a15, r2, carry);
+        MP_SUB_BORROW(r3, a8, r3, carry);
+        MP_SUB_BORROW(r4, a9, r4, carry);
+        MP_SUB_BORROW(r5, a10, r5, carry);
+        MP_SUB_BORROW(r6, 0, r6, carry);
+        MP_SUB_BORROW(r7, a12, r7, carry);
+        r8 -= carry;
+        carry = 0;
+        /* diff 8 */
+        MP_SUB_BORROW(r0, a14, r0, carry);
+        MP_SUB_BORROW(r1, a15, r1, carry);
+        MP_SUB_BORROW(r2, 0, r2, carry);
+        MP_SUB_BORROW(r3, a9, r3, carry);
+        MP_SUB_BORROW(r4, a10, r4, carry);
+        MP_SUB_BORROW(r5, a11, r5, carry);
+        MP_SUB_BORROW(r6, 0, r6, carry);
+        MP_SUB_BORROW(r7, a13, r7, carry);
+        r8 -= carry;
+
+        /* reduce the overflows */
+        while (r8 > 0) {
+            mp_digit r8_d = r8;
+            carry = 0;
+            MP_ADD_CARRY(r0, r8_d, r0, carry);
+            MP_ADD_CARRY(r1, 0, r1, carry);
+            MP_ADD_CARRY(r2, 0, r2, carry);
+            MP_ADD_CARRY(r3, 0 - r8_d, r3, carry);
+            MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry);
+            MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry);
+            MP_ADD_CARRY(r6, 0 - (r8_d + 1), r6, carry);
+            MP_ADD_CARRY(r7, (r8_d - 1), r7, carry);
+            r8 = carry;
+        }
+
+        /* reduce the underflows */
+        while (r8 < 0) {
+            mp_digit r8_d = -r8;
+            carry = 0;
+            MP_SUB_BORROW(r0, r8_d, r0, carry);
+            MP_SUB_BORROW(r1, 0, r1, carry);
+            MP_SUB_BORROW(r2, 0, r2, carry);
+            MP_SUB_BORROW(r3, 0 - r8_d, r3, carry);
+            MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry);
+            MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry);
+            MP_SUB_BORROW(r6, 0 - (r8_d + 1), r6, carry);
+            MP_SUB_BORROW(r7, (r8_d - 1), r7, carry);
+            r8 = 0 - carry;
+        }
+        if (a != r) {
+            MP_CHECKOK(s_mp_pad(r, 8));
+        }
+        MP_SIGN(r) = MP_ZPOS;
+        MP_USED(r) = 8;
+
+        MP_DIGIT(r, 7) = r7;
+        MP_DIGIT(r, 6) = r6;
+        MP_DIGIT(r, 5) = r5;
+        MP_DIGIT(r, 4) = r4;
+        MP_DIGIT(r, 3) = r3;
+        MP_DIGIT(r, 2) = r2;
+        MP_DIGIT(r, 1) = r1;
+        MP_DIGIT(r, 0) = r0;
+
+        /* final reduction if necessary */
+        if ((r7 == MP_DIGIT_MAX) &&
+            ((r6 > 1) || ((r6 == 1) &&
+                          (r5 || r4 || r3 ||
+                           ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) && (r0 == MP_DIGIT_MAX)))))) {
+            MP_CHECKOK(mp_sub(r, &meth->irr, r));
+        }
+
+        s_mp_clamp(r);
+#else
+        switch (a_used) {
+            case 8:
+                a7 = MP_DIGIT(a, 7);
+            case 7:
+                a6 = MP_DIGIT(a, 6);
+            case 6:
+                a5 = MP_DIGIT(a, 5);
+            case 5:
+                a4 = MP_DIGIT(a, 4);
+        }
+        a7l = a7 << 32;
+        a7h = a7 >> 32;
+        a6l = a6 << 32;
+        a6h = a6 >> 32;
+        a5l = a5 << 32;
+        a5h = a5 >> 32;
+        a4l = a4 << 32;
+        a4h = a4 >> 32;
+        r3 = MP_DIGIT(a, 3);
+        r2 = MP_DIGIT(a, 2);
+        r1 = MP_DIGIT(a, 1);
+        r0 = MP_DIGIT(a, 0);
+
+        /* sum 1 */
+        carry = 0;
+        MP_ADD_CARRY(r1, a5h << 32, r1, carry);
+        MP_ADD_CARRY(r2, a6, r2, carry);
+        MP_ADD_CARRY(r3, a7, r3, carry);
+        r4 = carry;
+        carry = 0;
+        MP_ADD_CARRY(r1, a5h << 32, r1, carry);
+        MP_ADD_CARRY(r2, a6, r2, carry);
+        MP_ADD_CARRY(r3, a7, r3, carry);
+        r4 += carry;
+        /* sum 2 */
+        carry = 0;
+        MP_ADD_CARRY(r1, a6l, r1, carry);
+        MP_ADD_CARRY(r2, a6h | a7l, r2, carry);
+        MP_ADD_CARRY(r3, a7h, r3, carry);
+        r4 += carry;
+        carry = 0;
+        MP_ADD_CARRY(r1, a6l, r1, carry);
+        MP_ADD_CARRY(r2, a6h | a7l, r2, carry);
+        MP_ADD_CARRY(r3, a7h, r3, carry);
+        r4 += carry;
+
+        /* sum 3 */
+        carry = 0;
+        MP_ADD_CARRY(r0, a4, r0, carry);
+        MP_ADD_CARRY(r1, a5l >> 32, r1, carry);
+        MP_ADD_CARRY(r2, 0, r2, carry);
+        MP_ADD_CARRY(r3, a7, r3, carry);
+        r4 += carry;
+        /* sum 4 */
+        carry = 0;
+        MP_ADD_CARRY(r0, a4h | a5l, r0, carry);
+        MP_ADD_CARRY(r1, a5h | (a6h << 32), r1, carry);
+        MP_ADD_CARRY(r2, a7, r2, carry);
+        MP_ADD_CARRY(r3, a6h | a4l, r3, carry);
+        r4 += carry;
+        /* diff 5 */
+        carry = 0;
+        MP_SUB_BORROW(r0, a5h | a6l, r0, carry);
+        MP_SUB_BORROW(r1, a6h, r1, carry);
+        MP_SUB_BORROW(r2, 0, r2, carry);
+        MP_SUB_BORROW(r3, (a4l >> 32) | a5l, r3, carry);
+        r4 -= carry;
+        /* diff 6 */
+        carry = 0;
+        MP_SUB_BORROW(r0, a6, r0, carry);
+        MP_SUB_BORROW(r1, a7, r1, carry);
+        MP_SUB_BORROW(r2, 0, r2, carry);
+        MP_SUB_BORROW(r3, a4h | (a5h << 32), r3, carry);
+        r4 -= carry;
+        /* diff 7 */
+        carry = 0;
+        MP_SUB_BORROW(r0, a6h | a7l, r0, carry);
+        MP_SUB_BORROW(r1, a7h | a4l, r1, carry);
+        MP_SUB_BORROW(r2, a4h | a5l, r2, carry);
+        MP_SUB_BORROW(r3, a6l, r3, carry);
+        r4 -= carry;
+        /* diff 8 */
+        carry = 0;
+        MP_SUB_BORROW(r0, a7, r0, carry);
+        MP_SUB_BORROW(r1, a4h << 32, r1, carry);
+        MP_SUB_BORROW(r2, a5, r2, carry);
+        MP_SUB_BORROW(r3, a6h << 32, r3, carry);
+        r4 -= carry;
+
+        /* reduce the overflows */
+        while (r4 > 0) {
+            mp_digit r4_long = r4;
+            mp_digit r4l = (r4_long << 32);
+            carry = 0;
+            MP_ADD_CARRY(r0, r4_long, r0, carry);
+            MP_ADD_CARRY(r1, 0 - r4l, r1, carry);
+            MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry);
+            MP_ADD_CARRY(r3, r4l - r4_long - 1, r3, carry);
+            r4 = carry;
+        }
+
+        /* reduce the underflows */
+        while (r4 < 0) {
+            mp_digit r4_long = -r4;
+            mp_digit r4l = (r4_long << 32);
+            carry = 0;
+            MP_SUB_BORROW(r0, r4_long, r0, carry);
+            MP_SUB_BORROW(r1, 0 - r4l, r1, carry);
+            MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry);
+            MP_SUB_BORROW(r3, r4l - r4_long - 1, r3, carry);
+            r4 = 0 - carry;
+        }
+
+        if (a != r) {
+            MP_CHECKOK(s_mp_pad(r, 4));
+        }
+        MP_SIGN(r) = MP_ZPOS;
+        MP_USED(r) = 4;
+
+        MP_DIGIT(r, 3) = r3;
+        MP_DIGIT(r, 2) = r2;
+        MP_DIGIT(r, 1) = r1;
+        MP_DIGIT(r, 0) = r0;
+
+        /* final reduction if necessary */
+        if ((r3 > 0xFFFFFFFF00000001ULL) ||
+            ((r3 == 0xFFFFFFFF00000001ULL) &&
+             (r2 || (r1 >> 32) ||
+              (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) {
+            /* very rare, just use mp_sub */
+            MP_CHECKOK(mp_sub(r, &meth->irr, r));
+        }
+
+        s_mp_clamp(r);
+#endif
+    }
+
+CLEANUP:
+    return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p256. Store the
+ * result in r.  r could be a.  Uses optimized modular reduction for p256.
+ */
+static mp_err
+ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    MP_CHECKOK(mp_sqr(a, r));
+    MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
+CLEANUP:
+    return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p256.
+ * Store the result in r.  r could be a or b; a could be b.  Uses
+ * optimized modular reduction for p256. */
+static mp_err
+ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    MP_CHECKOK(mp_mul(a, b, r));
+    MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
+CLEANUP:
+    return res;
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp256(ECGroup *group, ECCurveName name)
+{
+    if (name == ECCurve_NIST_P256) {
+        group->meth->field_mod = &ec_GFp_nistp256_mod;
+        group->meth->field_mul = &ec_GFp_nistp256_mul;
+        group->meth->field_sqr = &ec_GFp_nistp256_sqr;
+    }
+    return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_256_32.c b/security/nss/lib/freebl/ecl/ecp_256_32.c
new file mode 100644
index 000000000..515f6f731
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_256_32.c
@@ -0,0 +1,1535 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* A 32-bit implementation of the NIST P-256 elliptic curve. */
+
+#include <string.h>
+
+#include "prtypes.h"
+#include "mpi.h"
+#include "mpi-priv.h"
+#include "ecp.h"
+
+typedef PRUint8 u8;
+typedef PRUint32 u32;
+typedef PRUint64 u64;
+
+/* Our field elements are represented as nine, unsigned 32-bit words. Freebl's
+ * MPI library calls them digits, but here they are called limbs, which is
+ * GMP's terminology.
+ *
+ * The value of an felem (field element) is:
+ *   x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
+ *
+ * That is, each limb is alternately 29 or 28-bits wide in little-endian
+ * order.
+ *
+ * This means that an felem hits 2**257, rather than 2**256 as we would like. A
+ * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems
+ * when multiplying as terms end up one bit short of a limb which would require
+ * much bit-shifting to correct.
+ *
+ * Finally, the values stored in an felem are in Montgomery form. So the value
+ * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257.
+ */
+typedef u32 limb;
+#define NLIMBS 9
+typedef limb felem[NLIMBS];
+
+static const limb kBottom28Bits = 0xfffffff;
+static const limb kBottom29Bits = 0x1fffffff;
+
+/* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and
+ * 28-bit words.
+ */
+static const felem kOne = {
+    2, 0, 0, 0xffff800,
+    0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff,
+    0
+};
+static const felem kZero = { 0 };
+static const felem kP = {
+    0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff,
+    0, 0, 0x200000, 0xf000000,
+    0xfffffff
+};
+static const felem k2P = {
+    0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff,
+    0, 0, 0x400000, 0xe000000,
+    0x1fffffff
+};
+
+/* kPrecomputed contains precomputed values to aid the calculation of scalar
+ * multiples of the base point, G. It's actually two, equal length, tables
+ * concatenated.
+ *
+ * The first table contains (x,y) felem pairs for 16 multiples of the base
+ * point, G.
+ *
+ *   Index  |  Index (binary) | Value
+ *       0  |           0000  | 0G (all zeros, omitted)
+ *       1  |           0001  | G
+ *       2  |           0010  | 2**64G
+ *       3  |           0011  | 2**64G + G
+ *       4  |           0100  | 2**128G
+ *       5  |           0101  | 2**128G + G
+ *       6  |           0110  | 2**128G + 2**64G
+ *       7  |           0111  | 2**128G + 2**64G + G
+ *       8  |           1000  | 2**192G
+ *       9  |           1001  | 2**192G + G
+ *      10  |           1010  | 2**192G + 2**64G
+ *      11  |           1011  | 2**192G + 2**64G + G
+ *      12  |           1100  | 2**192G + 2**128G
+ *      13  |           1101  | 2**192G + 2**128G + G
+ *      14  |           1110  | 2**192G + 2**128G + 2**64G
+ *      15  |           1111  | 2**192G + 2**128G + 2**64G + G
+ *
+ * The second table follows the same style, but the terms are 2**32G,
+ * 2**96G, 2**160G, 2**224G.
+ *
+ * This is ~2KB of data.
+ */
+static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = {
+    0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee,
+    0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3,
+    0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c,
+    0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22,
+    0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050,
+    0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b,
+    0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa,
+    0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2,
+    0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609,
+    0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581,
+    0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca,
+    0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33,
+    0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6,
+    0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd,
+    0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,
+    0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881,
+    0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a,
+    0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26,
+    0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b,
+    0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023,
+    0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133,
+    0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa,
+    0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,
+    0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc,
+    0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8,
+    0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59,
+    0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39,
+    0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689,
+    0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa,
+    0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3,
+    0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1,
+    0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f,
+    0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,
+    0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d,
+    0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b,
+    0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,
+    0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a,
+    0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f,
+    0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb,
+    0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc,
+    0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9,
+    0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce,
+    0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2,
+    0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca,
+    0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229,
+    0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57,
+    0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c,
+    0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa,
+    0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651,
+    0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,
+    0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7,
+    0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c,
+    0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927,
+    0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298,
+    0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8,
+    0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2,
+    0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d,
+    0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4,
+    0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8,
+    0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78,
+};
+
+/* Field element operations:
+ */
+
+/* NON_ZERO_TO_ALL_ONES returns:
+ *   0xffffffff for 0 < x <= 2**31
+ *   0 for x == 0 or x > 2**31.
+ *
+ * x must be a u32 or an equivalent type such as limb.
+ */
+#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x)-1) >> 31) - 1)
+
+/* felem_reduce_carry adds a multiple of p in order to cancel |carry|,
+ * which is a term at 2**257.
+ *
+ * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
+ * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29.
+ */
+static void
+felem_reduce_carry(felem inout, limb carry)
+{
+    const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);
+
+    inout[0] += carry << 1;
+    inout[3] += 0x10000000 & carry_mask;
+    /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the
+     * previous line therefore this doesn't underflow.
+     */
+    inout[3] -= carry << 11;
+    inout[4] += (0x20000000 - 1) & carry_mask;
+    inout[5] += (0x10000000 - 1) & carry_mask;
+    inout[6] += (0x20000000 - 1) & carry_mask;
+    inout[6] -= carry << 22;
+    /* This may underflow if carry is non-zero but, if so, we'll fix it in the
+     * next line.
+     */
+    inout[7] -= 1 & carry_mask;
+    inout[7] += carry << 25;
+}
+
+/* felem_sum sets out = in+in2.
+ *
+ * On entry, in[i]+in2[i] must not overflow a 32-bit word.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
+ */
+static void
+felem_sum(felem out, const felem in, const felem in2)
+{
+    limb carry = 0;
+    unsigned int i;
+    for (i = 0;; i++) {
+        out[i] = in[i] + in2[i];
+        out[i] += carry;
+        carry = out[i] >> 29;
+        out[i] &= kBottom29Bits;
+
+        i++;
+        if (i == NLIMBS)
+            break;
+
+        out[i] = in[i] + in2[i];
+        out[i] += carry;
+        carry = out[i] >> 28;
+        out[i] &= kBottom28Bits;
+    }
+
+    felem_reduce_carry(out, carry);
+}
+
+#define two31m3 (((limb)1) << 31) - (((limb)1) << 3)
+#define two30m2 (((limb)1) << 30) - (((limb)1) << 2)
+#define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2)
+#define two31m2 (((limb)1) << 31) - (((limb)1) << 2)
+#define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2)
+#define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2)
+
+/* zero31 is 0 mod p.
+ */
+static const felem zero31 = {
+    two31m3, two30m2, two31m2, two30p13m2,
+    two31m2, two30m2, two31p24m2, two30m27m2,
+    two31m2
+};
+
+/* felem_diff sets out = in-in2.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
+ *           in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_diff(felem out, const felem in, const felem in2)
+{
+    limb carry = 0;
+    unsigned int i;
+
+    for (i = 0;; i++) {
+        out[i] = in[i] - in2[i];
+        out[i] += zero31[i];
+        out[i] += carry;
+        carry = out[i] >> 29;
+        out[i] &= kBottom29Bits;
+
+        i++;
+        if (i == NLIMBS)
+            break;
+
+        out[i] = in[i] - in2[i];
+        out[i] += zero31[i];
+        out[i] += carry;
+        carry = out[i] >> 28;
+        out[i] &= kBottom28Bits;
+    }
+
+    felem_reduce_carry(out, carry);
+}
+
+/* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words
+ * with the same 29,28,... bit positions as an felem.
+ *
+ * The values in felems are in Montgomery form: x*R mod p where R = 2**257.
+ * Since we just multiplied two Montgomery values together, the result is
+ * x*y*R*R mod p. We wish to divide by R in order for the result also to be
+ * in Montgomery form.
+ *
+ * On entry: tmp[i] < 2**64
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
+ */
+static void
+felem_reduce_degree(felem out, u64 tmp[17])
+{
+    /* The following table may be helpful when reading this code:
+     *
+     * Limb number:   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10...
+     * Width (bits):  29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29
+     * Start bit:     0 | 29| 57| 86|114|143|171|200|228|257|285
+     *   (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285
+     */
+    limb tmp2[18], carry, x, xMask;
+    unsigned int i;
+
+    /* tmp contains 64-bit words with the same 29,28,29-bit positions as an
+     * felem. So the top of an element of tmp might overlap with another
+     * element two positions down. The following loop eliminates this
+     * overlap.
+     */
+    tmp2[0] = tmp[0] & kBottom29Bits;
+
+    /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try
+     * and hint to the compiler that it can do a single-word shift by selecting
+     * the right register rather than doing a double-word shift and truncating
+     * afterwards.
+     */
+    tmp2[1] = ((limb)tmp[0]) >> 29;
+    tmp2[1] |= (((limb)(tmp[0] >> 32)) << 3) & kBottom28Bits;
+    tmp2[1] += ((limb)tmp[1]) & kBottom28Bits;
+    carry = tmp2[1] >> 28;
+    tmp2[1] &= kBottom28Bits;
+
+    for (i = 2; i < 17; i++) {
+        tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25;
+        tmp2[i] += ((limb)(tmp[i - 1])) >> 28;
+        tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 4) & kBottom29Bits;
+        tmp2[i] += ((limb)tmp[i]) & kBottom29Bits;
+        tmp2[i] += carry;
+        carry = tmp2[i] >> 29;
+        tmp2[i] &= kBottom29Bits;
+
+        i++;
+        if (i == 17)
+            break;
+        tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25;
+        tmp2[i] += ((limb)(tmp[i - 1])) >> 29;
+        tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 3) & kBottom28Bits;
+        tmp2[i] += ((limb)tmp[i]) & kBottom28Bits;
+        tmp2[i] += carry;
+        carry = tmp2[i] >> 28;
+        tmp2[i] &= kBottom28Bits;
+    }
+
+    tmp2[17] = ((limb)(tmp[15] >> 32)) >> 25;
+    tmp2[17] += ((limb)(tmp[16])) >> 29;
+    tmp2[17] += (((limb)(tmp[16] >> 32)) << 3);
+    tmp2[17] += carry;
+
+    /* Montgomery elimination of terms:
+     *
+     * Since R is 2**257, we can divide by R with a bitwise shift if we can
+     * ensure that the right-most 257 bits are all zero. We can make that true
+     * by adding multiplies of p without affecting the value.
+     *
+     * So we eliminate limbs from right to left. Since the bottom 29 bits of p
+     * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.
+     * We can do that for 8 further limbs and then right shift to eliminate the
+     * extra factor of R.
+     */
+    for (i = 0;; i += 2) {
+        tmp2[i + 1] += tmp2[i] >> 29;
+        x = tmp2[i] & kBottom29Bits;
+        xMask = NON_ZERO_TO_ALL_ONES(x);
+        tmp2[i] = 0;
+
+        /* The bounds calculations for this loop are tricky. Each iteration of
+         * the loop eliminates two words by adding values to words to their
+         * right.
+         *
+         * The following table contains the amounts added to each word (as an
+         * offset from the value of i at the top of the loop). The amounts are
+         * accounted for from the first and second half of the loop separately
+         * and are written as, for example, 28 to mean a value <2**28.
+         *
+         * Word:                   3   4   5   6   7   8   9   10
+         * Added in top half:     28  11      29  21  29  28
+         *                                        28  29
+         *                                            29
+         * Added in bottom half:      29  10      28  21  28   28
+         *                                            29
+         *
+         * The value that is currently offset 7 will be offset 5 for the next
+         * iteration and then offset 3 for the iteration after that. Therefore
+         * the total value added will be the values added at 7, 5 and 3.
+         *
+         * The following table accumulates these values. The sums at the bottom
+         * are written as, for example, 29+28, to mean a value < 2**29+2**28.
+         *
+         * Word:                   3   4   5   6   7   8   9  10  11  12  13
+         *                        28  11  10  29  21  29  28  28  28  28  28
+         *                            29  28  11  28  29  28  29  28  29  28
+         *                                    29  28  21  21  29  21  29  21
+         *                                        10  29  28  21  28  21  28
+         *                                        28  29  28  29  28  29  28
+         *                                            11  10  29  10  29  10
+         *                                            29  28  11  28  11
+         *                                                    29      29
+         *                        --------------------------------------------
+         *                                                30+ 31+ 30+ 31+ 30+
+         *                                                28+ 29+ 28+ 29+ 21+
+         *                                                21+ 28+ 21+ 28+ 10
+         *                                                10  21+ 10  21+
+         *                                                    11      11
+         *
+         * So the greatest amount is added to tmp2[10] and tmp2[12]. If
+         * tmp2[10/12] has an initial value of <2**29, then the maximum value
+         * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32,
+         * as required.
+         */
+        tmp2[i + 3] += (x << 10) & kBottom28Bits;
+        tmp2[i + 4] += (x >> 18);
+
+        tmp2[i + 6] += (x << 21) & kBottom29Bits;
+        tmp2[i + 7] += x >> 8;
+
+        /* At position 200, which is the starting bit position for word 7, we
+         * have a factor of 0xf000000 = 2**28 - 2**24.
+         */
+        tmp2[i + 7] += 0x10000000 & xMask;
+        /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */
+        tmp2[i + 8] += (x - 1) & xMask;
+        tmp2[i + 7] -= (x << 24) & kBottom28Bits;
+        tmp2[i + 8] -= x >> 4;
+
+        tmp2[i + 8] += 0x20000000 & xMask;
+        tmp2[i + 8] -= x;
+        tmp2[i + 8] += (x << 28) & kBottom29Bits;
+        tmp2[i + 9] += ((x >> 1) - 1) & xMask;
+
+        if (i + 1 == NLIMBS)
+            break;
+        tmp2[i + 2] += tmp2[i + 1] >> 28;
+        x = tmp2[i + 1] & kBottom28Bits;
+        xMask = NON_ZERO_TO_ALL_ONES(x);
+        tmp2[i + 1] = 0;
+
+        tmp2[i + 4] += (x << 11) & kBottom29Bits;
+        tmp2[i + 5] += (x >> 18);
+
+        tmp2[i + 7] += (x << 21) & kBottom28Bits;
+        tmp2[i + 8] += x >> 7;
+
+        /* At position 199, which is the starting bit of the 8th word when
+         * dealing with a context starting on an odd word, we have a factor of
+         * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th
+         * word from i+1 is i+8.
+         */
+        tmp2[i + 8] += 0x20000000 & xMask;
+        tmp2[i + 9] += (x - 1) & xMask;
+        tmp2[i + 8] -= (x << 25) & kBottom29Bits;
+        tmp2[i + 9] -= x >> 4;
+
+        tmp2[i + 9] += 0x10000000 & xMask;
+        tmp2[i + 9] -= x;
+        tmp2[i + 10] += (x - 1) & xMask;
+    }
+
+    /* We merge the right shift with a carry chain. The words above 2**257 have
+     * widths of 28,29,... which we need to correct when copying them down.
+     */
+    carry = 0;
+    for (i = 0; i < 8; i++) {
+        /* The maximum value of tmp2[i + 9] occurs on the first iteration and
+         * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is
+         * therefore safe.
+         */
+        out[i] = tmp2[i + 9];
+        out[i] += carry;
+        out[i] += (tmp2[i + 10] << 28) & kBottom29Bits;
+        carry = out[i] >> 29;
+        out[i] &= kBottom29Bits;
+
+        i++;
+        out[i] = tmp2[i + 9] >> 1;
+        out[i] += carry;
+        carry = out[i] >> 28;
+        out[i] &= kBottom28Bits;
+    }
+
+    out[8] = tmp2[17];
+    out[8] += carry;
+    carry = out[8] >> 29;
+    out[8] &= kBottom29Bits;
+
+    felem_reduce_carry(out, carry);
+}
+
+/* felem_square sets out=in*in.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_square(felem out, const felem in)
+{
+    u64 tmp[17];
+
+    tmp[0] = ((u64)in[0]) * in[0];
+    tmp[1] = ((u64)in[0]) * (in[1] << 1);
+    tmp[2] = ((u64)in[0]) * (in[2] << 1) +
+             ((u64)in[1]) * (in[1] << 1);
+    tmp[3] = ((u64)in[0]) * (in[3] << 1) +
+             ((u64)in[1]) * (in[2] << 1);
+    tmp[4] = ((u64)in[0]) * (in[4] << 1) +
+             ((u64)in[1]) * (in[3] << 2) +
+             ((u64)in[2]) * in[2];
+    tmp[5] = ((u64)in[0]) * (in[5] << 1) +
+             ((u64)in[1]) * (in[4] << 1) +
+             ((u64)in[2]) * (in[3] << 1);
+    tmp[6] = ((u64)in[0]) * (in[6] << 1) +
+             ((u64)in[1]) * (in[5] << 2) +
+             ((u64)in[2]) * (in[4] << 1) +
+             ((u64)in[3]) * (in[3] << 1);
+    tmp[7] = ((u64)in[0]) * (in[7] << 1) +
+             ((u64)in[1]) * (in[6] << 1) +
+             ((u64)in[2]) * (in[5] << 1) +
+             ((u64)in[3]) * (in[4] << 1);
+    /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
+     * which is < 2**64 as required.
+     */
+    tmp[8] = ((u64)in[0]) * (in[8] << 1) +
+             ((u64)in[1]) * (in[7] << 2) +
+             ((u64)in[2]) * (in[6] << 1) +
+             ((u64)in[3]) * (in[5] << 2) +
+             ((u64)in[4]) * in[4];
+    tmp[9] = ((u64)in[1]) * (in[8] << 1) +
+             ((u64)in[2]) * (in[7] << 1) +
+             ((u64)in[3]) * (in[6] << 1) +
+             ((u64)in[4]) * (in[5] << 1);
+    tmp[10] = ((u64)in[2]) * (in[8] << 1) +
+              ((u64)in[3]) * (in[7] << 2) +
+              ((u64)in[4]) * (in[6] << 1) +
+              ((u64)in[5]) * (in[5] << 1);
+    tmp[11] = ((u64)in[3]) * (in[8] << 1) +
+              ((u64)in[4]) * (in[7] << 1) +
+              ((u64)in[5]) * (in[6] << 1);
+    tmp[12] = ((u64)in[4]) * (in[8] << 1) +
+              ((u64)in[5]) * (in[7] << 2) +
+              ((u64)in[6]) * in[6];
+    tmp[13] = ((u64)in[5]) * (in[8] << 1) +
+              ((u64)in[6]) * (in[7] << 1);
+    tmp[14] = ((u64)in[6]) * (in[8] << 1) +
+              ((u64)in[7]) * (in[7] << 1);
+    tmp[15] = ((u64)in[7]) * (in[8] << 1);
+    tmp[16] = ((u64)in[8]) * in[8];
+
+    felem_reduce_degree(out, tmp);
+}
+
+/* felem_mul sets out=in*in2.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
+ *           in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_mul(felem out, const felem in, const felem in2)
+{
+    u64 tmp[17];
+
+    tmp[0] = ((u64)in[0]) * in2[0];
+    tmp[1] = ((u64)in[0]) * (in2[1] << 0) +
+             ((u64)in[1]) * (in2[0] << 0);
+    tmp[2] = ((u64)in[0]) * (in2[2] << 0) +
+             ((u64)in[1]) * (in2[1] << 1) +
+             ((u64)in[2]) * (in2[0] << 0);
+    tmp[3] = ((u64)in[0]) * (in2[3] << 0) +
+             ((u64)in[1]) * (in2[2] << 0) +
+             ((u64)in[2]) * (in2[1] << 0) +
+             ((u64)in[3]) * (in2[0] << 0);
+    tmp[4] = ((u64)in[0]) * (in2[4] << 0) +
+             ((u64)in[1]) * (in2[3] << 1) +
+             ((u64)in[2]) * (in2[2] << 0) +
+             ((u64)in[3]) * (in2[1] << 1) +
+             ((u64)in[4]) * (in2[0] << 0);
+    tmp[5] = ((u64)in[0]) * (in2[5] << 0) +
+             ((u64)in[1]) * (in2[4] << 0) +
+             ((u64)in[2]) * (in2[3] << 0) +
+             ((u64)in[3]) * (in2[2] << 0) +
+             ((u64)in[4]) * (in2[1] << 0) +
+             ((u64)in[5]) * (in2[0] << 0);
+    tmp[6] = ((u64)in[0]) * (in2[6] << 0) +
+             ((u64)in[1]) * (in2[5] << 1) +
+             ((u64)in[2]) * (in2[4] << 0) +
+             ((u64)in[3]) * (in2[3] << 1) +
+             ((u64)in[4]) * (in2[2] << 0) +
+             ((u64)in[5]) * (in2[1] << 1) +
+             ((u64)in[6]) * (in2[0] << 0);
+    tmp[7] = ((u64)in[0]) * (in2[7] << 0) +
+             ((u64)in[1]) * (in2[6] << 0) +
+             ((u64)in[2]) * (in2[5] << 0) +
+             ((u64)in[3]) * (in2[4] << 0) +
+             ((u64)in[4]) * (in2[3] << 0) +
+             ((u64)in[5]) * (in2[2] << 0) +
+             ((u64)in[6]) * (in2[1] << 0) +
+             ((u64)in[7]) * (in2[0] << 0);
+    /* tmp[8] has the greatest value but doesn't overflow. See logic in
+     * felem_square.
+     */
+    tmp[8] = ((u64)in[0]) * (in2[8] << 0) +
+             ((u64)in[1]) * (in2[7] << 1) +
+             ((u64)in[2]) * (in2[6] << 0) +
+             ((u64)in[3]) * (in2[5] << 1) +
+             ((u64)in[4]) * (in2[4] << 0) +
+             ((u64)in[5]) * (in2[3] << 1) +
+             ((u64)in[6]) * (in2[2] << 0) +
+             ((u64)in[7]) * (in2[1] << 1) +
+             ((u64)in[8]) * (in2[0] << 0);
+    tmp[9] = ((u64)in[1]) * (in2[8] << 0) +
+             ((u64)in[2]) * (in2[7] << 0) +
+             ((u64)in[3]) * (in2[6] << 0) +
+             ((u64)in[4]) * (in2[5] << 0) +
+             ((u64)in[5]) * (in2[4] << 0) +
+             ((u64)in[6]) * (in2[3] << 0) +
+             ((u64)in[7]) * (in2[2] << 0) +
+             ((u64)in[8]) * (in2[1] << 0);
+    tmp[10] = ((u64)in[2]) * (in2[8] << 0) +
+              ((u64)in[3]) * (in2[7] << 1) +
+              ((u64)in[4]) * (in2[6] << 0) +
+              ((u64)in[5]) * (in2[5] << 1) +
+              ((u64)in[6]) * (in2[4] << 0) +
+              ((u64)in[7]) * (in2[3] << 1) +
+              ((u64)in[8]) * (in2[2] << 0);
+    tmp[11] = ((u64)in[3]) * (in2[8] << 0) +
+              ((u64)in[4]) * (in2[7] << 0) +
+              ((u64)in[5]) * (in2[6] << 0) +
+              ((u64)in[6]) * (in2[5] << 0) +
+              ((u64)in[7]) * (in2[4] << 0) +
+              ((u64)in[8]) * (in2[3] << 0);
+    tmp[12] = ((u64)in[4]) * (in2[8] << 0) +
+              ((u64)in[5]) * (in2[7] << 1) +
+              ((u64)in[6]) * (in2[6] << 0) +
+              ((u64)in[7]) * (in2[5] << 1) +
+              ((u64)in[8]) * (in2[4] << 0);
+    tmp[13] = ((u64)in[5]) * (in2[8] << 0) +
+              ((u64)in[6]) * (in2[7] << 0) +
+              ((u64)in[7]) * (in2[6] << 0) +
+              ((u64)in[8]) * (in2[5] << 0);
+    tmp[14] = ((u64)in[6]) * (in2[8] << 0) +
+              ((u64)in[7]) * (in2[7] << 1) +
+              ((u64)in[8]) * (in2[6] << 0);
+    tmp[15] = ((u64)in[7]) * (in2[8] << 0) +
+              ((u64)in[8]) * (in2[7] << 0);
+    tmp[16] = ((u64)in[8]) * (in2[8] << 0);
+
+    felem_reduce_degree(out, tmp);
+}
+
+static void
+felem_assign(felem out, const felem in)
+{
+    memcpy(out, in, sizeof(felem));
+}
+
+/* felem_inv calculates |out| = |in|^{-1}
+ *
+ * Based on Fermat's Little Theorem:
+ *   a^p = a (mod p)
+ *   a^{p-1} = 1 (mod p)
+ *   a^{p-2} = a^{-1} (mod p)
+ */
+static void
+felem_inv(felem out, const felem in)
+{
+    felem ftmp, ftmp2;
+    /* each e_I will hold |in|^{2^I - 1} */
+    felem e2, e4, e8, e16, e32, e64;
+    unsigned int i;
+
+    felem_square(ftmp, in);    /* 2^1 */
+    felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */
+    felem_assign(e2, ftmp);
+    felem_square(ftmp, ftmp);  /* 2^3 - 2^1 */
+    felem_square(ftmp, ftmp);  /* 2^4 - 2^2 */
+    felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */
+    felem_assign(e4, ftmp);
+    felem_square(ftmp, ftmp);  /* 2^5 - 2^1 */
+    felem_square(ftmp, ftmp);  /* 2^6 - 2^2 */
+    felem_square(ftmp, ftmp);  /* 2^7 - 2^3 */
+    felem_square(ftmp, ftmp);  /* 2^8 - 2^4 */
+    felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */
+    felem_assign(e8, ftmp);
+    for (i = 0; i < 8; i++) {
+        felem_square(ftmp, ftmp);
+    }                          /* 2^16 - 2^8 */
+    felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */
+    felem_assign(e16, ftmp);
+    for (i = 0; i < 16; i++) {
+        felem_square(ftmp, ftmp);
+    }                           /* 2^32 - 2^16 */
+    felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */
+    felem_assign(e32, ftmp);
+    for (i = 0; i < 32; i++) {
+        felem_square(ftmp, ftmp);
+    } /* 2^64 - 2^32 */
+    felem_assign(e64, ftmp);
+    felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */
+    for (i = 0; i < 192; i++) {
+        felem_square(ftmp, ftmp);
+    } /* 2^256 - 2^224 + 2^192 */
+
+    felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */
+    for (i = 0; i < 16; i++) {
+        felem_square(ftmp2, ftmp2);
+    }                             /* 2^80 - 2^16 */
+    felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */
+    for (i = 0; i < 8; i++) {
+        felem_square(ftmp2, ftmp2);
+    }                            /* 2^88 - 2^8 */
+    felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */
+    for (i = 0; i < 4; i++) {
+        felem_square(ftmp2, ftmp2);
+    }                            /* 2^92 - 2^4 */
+    felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */
+    felem_square(ftmp2, ftmp2);  /* 2^93 - 2^1 */
+    felem_square(ftmp2, ftmp2);  /* 2^94 - 2^2 */
+    felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */
+    felem_square(ftmp2, ftmp2);  /* 2^95 - 2^1 */
+    felem_square(ftmp2, ftmp2);  /* 2^96 - 2^2 */
+    felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */
+
+    felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
+}
+
+/* felem_scalar_3 sets out=3*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_3(felem out)
+{
+    limb carry = 0;
+    unsigned int i;
+
+    for (i = 0;; i++) {
+        out[i] *= 3;
+        out[i] += carry;
+        carry = out[i] >> 29;
+        out[i] &= kBottom29Bits;
+
+        i++;
+        if (i == NLIMBS)
+            break;
+
+        out[i] *= 3;
+        out[i] += carry;
+        carry = out[i] >> 28;
+        out[i] &= kBottom28Bits;
+    }
+
+    felem_reduce_carry(out, carry);
+}
+
+/* felem_scalar_4 sets out=4*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_4(felem out)
+{
+    limb carry = 0, next_carry;
+    unsigned int i;
+
+    for (i = 0;; i++) {
+        next_carry = out[i] >> 27;
+        out[i] <<= 2;
+        out[i] &= kBottom29Bits;
+        out[i] += carry;
+        carry = next_carry + (out[i] >> 29);
+        out[i] &= kBottom29Bits;
+
+        i++;
+        if (i == NLIMBS)
+            break;
+        next_carry = out[i] >> 26;
+        out[i] <<= 2;
+        out[i] &= kBottom28Bits;
+        out[i] += carry;
+        carry = next_carry + (out[i] >> 28);
+        out[i] &= kBottom28Bits;
+    }
+
+    felem_reduce_carry(out, carry);
+}
+
+/* felem_scalar_8 sets out=8*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_8(felem out)
+{
+    limb carry = 0, next_carry;
+    unsigned int i;
+
+    for (i = 0;; i++) {
+        next_carry = out[i] >> 26;
+        out[i] <<= 3;
+        out[i] &= kBottom29Bits;
+        out[i] += carry;
+        carry = next_carry + (out[i] >> 29);
+        out[i] &= kBottom29Bits;
+
+        i++;
+        if (i == NLIMBS)
+            break;
+        next_carry = out[i] >> 25;
+        out[i] <<= 3;
+        out[i] &= kBottom28Bits;
+        out[i] += carry;
+        carry = next_carry + (out[i] >> 28);
+        out[i] &= kBottom28Bits;
+    }
+
+    felem_reduce_carry(out, carry);
+}
+
+/* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of
+ * time depending on the value of |in|.
+ */
+static char
+felem_is_zero_vartime(const felem in)
+{
+    limb carry;
+    int i;
+    limb tmp[NLIMBS];
+    felem_assign(tmp, in);
+
+    /* First, reduce tmp to a minimal form.
+     */
+    do {
+        carry = 0;
+        for (i = 0;; i++) {
+            tmp[i] += carry;
+            carry = tmp[i] >> 29;
+            tmp[i] &= kBottom29Bits;
+
+            i++;
+            if (i == NLIMBS)
+                break;
+
+            tmp[i] += carry;
+            carry = tmp[i] >> 28;
+            tmp[i] &= kBottom28Bits;
+        }
+
+        felem_reduce_carry(tmp, carry);
+    } while (carry);
+
+    /* tmp < 2**257, so the only possible zero values are 0, p and 2p.
+     */
+    return memcmp(tmp, kZero, sizeof(tmp)) == 0 ||
+           memcmp(tmp, kP, sizeof(tmp)) == 0 ||
+           memcmp(tmp, k2P, sizeof(tmp)) == 0;
+}
+
+/* Group operations:
+ *
+ * Elements of the elliptic curve group are represented in Jacobian
+ * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
+ * Jacobian form.
+ */
+
+/* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
+ */
+static void
+point_double(felem x_out, felem y_out, felem z_out,
+             const felem x, const felem y, const felem z)
+{
+    felem delta, gamma, alpha, beta, tmp, tmp2;
+
+    felem_square(delta, z);
+    felem_square(gamma, y);
+    felem_mul(beta, x, gamma);
+
+    felem_sum(tmp, x, delta);
+    felem_diff(tmp2, x, delta);
+    felem_mul(alpha, tmp, tmp2);
+    felem_scalar_3(alpha);
+
+    felem_sum(tmp, y, z);
+    felem_square(tmp, tmp);
+    felem_diff(tmp, tmp, gamma);
+    felem_diff(z_out, tmp, delta);
+
+    felem_scalar_4(beta);
+    felem_square(x_out, alpha);
+    felem_diff(x_out, x_out, beta);
+    felem_diff(x_out, x_out, beta);
+
+    felem_diff(tmp, beta, x_out);
+    felem_mul(tmp, alpha, tmp);
+    felem_square(tmp2, gamma);
+    felem_scalar_8(tmp2);
+    felem_diff(y_out, tmp, tmp2);
+}
+
+/* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.
+ * (i.e. the second point is affine.)
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * Note that this function does not handle P+P, infinity+P nor P+infinity
+ * correctly.
+ */
+static void
+point_add_mixed(felem x_out, felem y_out, felem z_out,
+                const felem x1, const felem y1, const felem z1,
+                const felem x2, const felem y2)
+{
+    felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;
+
+    felem_square(z1z1, z1);
+    felem_sum(tmp, z1, z1);
+
+    felem_mul(u2, x2, z1z1);
+    felem_mul(z1z1z1, z1, z1z1);
+    felem_mul(s2, y2, z1z1z1);
+    felem_diff(h, u2, x1);
+    felem_sum(i, h, h);
+    felem_square(i, i);
+    felem_mul(j, h, i);
+    felem_diff(r, s2, y1);
+    felem_sum(r, r, r);
+    felem_mul(v, x1, i);
+
+    felem_mul(z_out, tmp, h);
+    felem_square(rr, r);
+    felem_diff(x_out, rr, j);
+    felem_diff(x_out, x_out, v);
+    felem_diff(x_out, x_out, v);
+
+    felem_diff(tmp, v, x_out);
+    felem_mul(y_out, tmp, r);
+    felem_mul(tmp, y1, j);
+    felem_diff(y_out, y_out, tmp);
+    felem_diff(y_out, y_out, tmp);
+}
+
+/* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * Note that this function does not handle P+P, infinity+P nor P+infinity
+ * correctly.
+ */
+static void
+point_add(felem x_out, felem y_out, felem z_out,
+          const felem x1, const felem y1, const felem z1,
+          const felem x2, const felem y2, const felem z2)
+{
+    felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
+
+    felem_square(z1z1, z1);
+    felem_square(z2z2, z2);
+    felem_mul(u1, x1, z2z2);
+
+    felem_sum(tmp, z1, z2);
+    felem_square(tmp, tmp);
+    felem_diff(tmp, tmp, z1z1);
+    felem_diff(tmp, tmp, z2z2);
+
+    felem_mul(z2z2z2, z2, z2z2);
+    felem_mul(s1, y1, z2z2z2);
+
+    felem_mul(u2, x2, z1z1);
+    felem_mul(z1z1z1, z1, z1z1);
+    felem_mul(s2, y2, z1z1z1);
+    felem_diff(h, u2, u1);
+    felem_sum(i, h, h);
+    felem_square(i, i);
+    felem_mul(j, h, i);
+    felem_diff(r, s2, s1);
+    felem_sum(r, r, r);
+    felem_mul(v, u1, i);
+
+    felem_mul(z_out, tmp, h);
+    felem_square(rr, r);
+    felem_diff(x_out, rr, j);
+    felem_diff(x_out, x_out, v);
+    felem_diff(x_out, x_out, v);
+
+    felem_diff(tmp, v, x_out);
+    felem_mul(y_out, tmp, r);
+    felem_mul(tmp, s1, j);
+    felem_diff(y_out, y_out, tmp);
+    felem_diff(y_out, y_out, tmp);
+}
+
+/* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +
+ *                                                        {x2,y2,z2}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * This function handles the case where {x1,y1,z1}={x2,y2,z2}.
+ */
+static void
+point_add_or_double_vartime(
+    felem x_out, felem y_out, felem z_out,
+    const felem x1, const felem y1, const felem z1,
+    const felem x2, const felem y2, const felem z2)
+{
+    felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
+    char x_equal, y_equal;
+
+    felem_square(z1z1, z1);
+    felem_square(z2z2, z2);
+    felem_mul(u1, x1, z2z2);
+
+    felem_sum(tmp, z1, z2);
+    felem_square(tmp, tmp);
+    felem_diff(tmp, tmp, z1z1);
+    felem_diff(tmp, tmp, z2z2);
+
+    felem_mul(z2z2z2, z2, z2z2);
+    felem_mul(s1, y1, z2z2z2);
+
+    felem_mul(u2, x2, z1z1);
+    felem_mul(z1z1z1, z1, z1z1);
+    felem_mul(s2, y2, z1z1z1);
+    felem_diff(h, u2, u1);
+    x_equal = felem_is_zero_vartime(h);
+    felem_sum(i, h, h);
+    felem_square(i, i);
+    felem_mul(j, h, i);
+    felem_diff(r, s2, s1);
+    y_equal = felem_is_zero_vartime(r);
+    if (x_equal && y_equal) {
+        point_double(x_out, y_out, z_out, x1, y1, z1);
+        return;
+    }
+    felem_sum(r, r, r);
+    felem_mul(v, u1, i);
+
+    felem_mul(z_out, tmp, h);
+    felem_square(rr, r);
+    felem_diff(x_out, rr, j);
+    felem_diff(x_out, x_out, v);
+    felem_diff(x_out, x_out, v);
+
+    felem_diff(tmp, v, x_out);
+    felem_mul(y_out, tmp, r);
+    felem_mul(tmp, s1, j);
+    felem_diff(y_out, y_out, tmp);
+    felem_diff(y_out, y_out, tmp);
+}
+
+/* copy_conditional sets out=in if mask = 0xffffffff in constant time.
+ *
+ * On entry: mask is either 0 or 0xffffffff.
+ */
+static void
+copy_conditional(felem out, const felem in, limb mask)
+{
+    int i;
+
+    for (i = 0; i < NLIMBS; i++) {
+        const limb tmp = mask & (in[i] ^ out[i]);
+        out[i] ^= tmp;
+    }
+}
+
+/* select_affine_point sets {out_x,out_y} to the index'th entry of table.
+ * On entry: index < 16, table[0] must be zero.
+ */
+static void
+select_affine_point(felem out_x, felem out_y,
+                    const limb *table, limb index)
+{
+    limb i, j;
+
+    memset(out_x, 0, sizeof(felem));
+    memset(out_y, 0, sizeof(felem));
+
+    for (i = 1; i < 16; i++) {
+        limb mask = i ^ index;
+        mask |= mask >> 2;
+        mask |= mask >> 1;
+        mask &= 1;
+        mask--;
+        for (j = 0; j < NLIMBS; j++, table++) {
+            out_x[j] |= *table & mask;
+        }
+        for (j = 0; j < NLIMBS; j++, table++) {
+            out_y[j] |= *table & mask;
+        }
+    }
+}
+
+/* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of
+ * table.  On entry: index < 16, table[0] must be zero.
+ */
+static void
+select_jacobian_point(felem out_x, felem out_y, felem out_z,
+                      const limb *table, limb index)
+{
+    limb i, j;
+
+    memset(out_x, 0, sizeof(felem));
+    memset(out_y, 0, sizeof(felem));
+    memset(out_z, 0, sizeof(felem));
+
+    /* The implicit value at index 0 is all zero. We don't need to perform that
+     * iteration of the loop because we already set out_* to zero.
+     */
+    table += 3 * NLIMBS;
+
+    for (i = 1; i < 16; i++) {
+        limb mask = i ^ index;
+        mask |= mask >> 2;
+        mask |= mask >> 1;
+        mask &= 1;
+        mask--;
+        for (j = 0; j < NLIMBS; j++, table++) {
+            out_x[j] |= *table & mask;
+        }
+        for (j = 0; j < NLIMBS; j++, table++) {
+            out_y[j] |= *table & mask;
+        }
+        for (j = 0; j < NLIMBS; j++, table++) {
+            out_z[j] |= *table & mask;
+        }
+    }
+}
+
+/* get_bit returns the bit'th bit of scalar. */
+static char
+get_bit(const u8 scalar[32], int bit)
+{
+    return ((scalar[bit >> 3]) >> (bit & 7)) & 1;
+}
+
+/* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian
+ * number. Note that the value of scalar must be less than the order of the
+ * group.
+ */
+static void
+scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32])
+{
+    int i, j;
+    limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;
+    u32 table_offset;
+
+    felem px, py;
+    felem tx, ty, tz;
+
+    memset(nx, 0, sizeof(felem));
+    memset(ny, 0, sizeof(felem));
+    memset(nz, 0, sizeof(felem));
+
+    /* The loop adds bits at positions 0, 64, 128 and 192, followed by
+     * positions 32,96,160 and 224 and does this 32 times.
+     */
+    for (i = 0; i < 32; i++) {
+        if (i) {
+            point_double(nx, ny, nz, nx, ny, nz);
+        }
+        table_offset = 0;
+        for (j = 0; j <= 32; j += 32) {
+            char bit0 = get_bit(scalar, 31 - i + j);
+            char bit1 = get_bit(scalar, 95 - i + j);
+            char bit2 = get_bit(scalar, 159 - i + j);
+            char bit3 = get_bit(scalar, 223 - i + j);
+            limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
+
+            select_affine_point(px, py, kPrecomputed + table_offset, index);
+            table_offset += 30 * NLIMBS;
+
+            /* Since scalar is less than the order of the group, we know that
+             * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
+             * below.
+             */
+            point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);
+            /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero
+             * (a.k.a.  the point at infinity). We handle that situation by
+             * copying the point from the table.
+             */
+            copy_conditional(nx, px, n_is_infinity_mask);
+            copy_conditional(ny, py, n_is_infinity_mask);
+            copy_conditional(nz, kOne, n_is_infinity_mask);
+
+            /* Equally, the result is also wrong if the point from the table is
+             * zero, which happens when the index is zero. We handle that by
+             * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0.
+             */
+            p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
+            mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
+            copy_conditional(nx, tx, mask);
+            copy_conditional(ny, ty, mask);
+            copy_conditional(nz, tz, mask);
+            /* If p was not zero, then n is now non-zero. */
+            n_is_infinity_mask &= ~p_is_noninfinite_mask;
+        }
+    }
+}
+
+/* point_to_affine converts a Jacobian point to an affine point. If the input
+ * is the point at infinity then it returns (0, 0) in constant time.
+ */
+static void
+point_to_affine(felem x_out, felem y_out,
+                const felem nx, const felem ny, const felem nz)
+{
+    felem z_inv, z_inv_sq;
+    felem_inv(z_inv, nz);
+    felem_square(z_inv_sq, z_inv);
+    felem_mul(x_out, nx, z_inv_sq);
+    felem_mul(z_inv, z_inv, z_inv_sq);
+    felem_mul(y_out, ny, z_inv);
+}
+
+/* scalar_mult sets {nx,ny,nz} = scalar*{x,y}. */
+static void
+scalar_mult(felem nx, felem ny, felem nz,
+            const felem x, const felem y, const u8 scalar[32])
+{
+    int i;
+    felem px, py, pz, tx, ty, tz;
+    felem precomp[16][3];
+    limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;
+
+    /* We precompute 0,1,2,... times {x,y}. */
+    memset(precomp, 0, sizeof(felem) * 3);
+    memcpy(&precomp[1][0], x, sizeof(felem));
+    memcpy(&precomp[1][1], y, sizeof(felem));
+    memcpy(&precomp[1][2], kOne, sizeof(felem));
+
+    for (i = 2; i < 16; i += 2) {
+        point_double(precomp[i][0], precomp[i][1], precomp[i][2],
+                     precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);
+
+        point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],
+                        precomp[i][0], precomp[i][1], precomp[i][2], x, y);
+    }
+
+    memset(nx, 0, sizeof(felem));
+    memset(ny, 0, sizeof(felem));
+    memset(nz, 0, sizeof(felem));
+    n_is_infinity_mask = -1;
+
+    /* We add in a window of four bits each iteration and do this 64 times. */
+    for (i = 0; i < 64; i++) {
+        if (i) {
+            point_double(nx, ny, nz, nx, ny, nz);
+            point_double(nx, ny, nz, nx, ny, nz);
+            point_double(nx, ny, nz, nx, ny, nz);
+            point_double(nx, ny, nz, nx, ny, nz);
+        }
+
+        index = scalar[31 - i / 2];
+        if ((i & 1) == 1) {
+            index &= 15;
+        } else {
+            index >>= 4;
+        }
+
+        /* See the comments in scalar_base_mult about handling infinities. */
+        select_jacobian_point(px, py, pz, precomp[0][0], index);
+        point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
+        copy_conditional(nx, px, n_is_infinity_mask);
+        copy_conditional(ny, py, n_is_infinity_mask);
+        copy_conditional(nz, pz, n_is_infinity_mask);
+
+        p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
+        mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
+        copy_conditional(nx, tx, mask);
+        copy_conditional(ny, ty, mask);
+        copy_conditional(nz, tz, mask);
+        n_is_infinity_mask &= ~p_is_noninfinite_mask;
+    }
+}
+
+/* Interface with Freebl: */
+
+/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to
+ * little-endian order.
+ */
+#ifdef IS_BIG_ENDIAN
+#ifdef __APPLE__
+#include <libkern/OSByteOrder.h>
+#define BYTESWAP32(x) OSSwapInt32(x)
+#define BYTESWAP64(x) OSSwapInt64(x)
+#else
+#define BYTESWAP32(x) \
+    (((x) >> 24) | (((x) >> 8) & 0xff00) | (((x)&0xff00) << 8) | ((x) << 24))
+#define BYTESWAP64(x)                                       \
+    (((x) >> 56) | (((x) >> 40) & 0xff00) |                 \
+     (((x) >> 24) & 0xff0000) | (((x) >> 8) & 0xff000000) | \
+     (((x)&0xff000000) << 8) | (((x)&0xff0000) << 24) |     \
+     (((x)&0xff00) << 40) | ((x) << 56))
+#endif
+
+#ifdef MP_USE_UINT_DIGIT
+#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x)
+#else
+#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x)
+#endif
+#endif /* IS_BIG_ENDIAN */
+
+#ifdef MP_USE_UINT_DIGIT
+static const mp_digit kRInvDigits[8] = {
+    0x80000000, 1, 0xffffffff, 0,
+    0x80000001, 0xfffffffe, 1, 0x7fffffff
+};
+#else
+static const mp_digit kRInvDigits[4] = {
+    PR_UINT64(0x180000000), 0xffffffff,
+    PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)
+};
+#endif
+#define MP_DIGITS_IN_256_BITS (32 / sizeof(mp_digit))
+static const mp_int kRInv = {
+    MP_ZPOS,
+    MP_DIGITS_IN_256_BITS,
+    MP_DIGITS_IN_256_BITS,
+    (mp_digit *)kRInvDigits
+};
+
+static const limb kTwo28 = 0x10000000;
+static const limb kTwo29 = 0x20000000;
+
+/* to_montgomery sets out = R*in. */
+static mp_err
+to_montgomery(felem out, const mp_int *in, const ECGroup *group)
+{
+    /* There are no MPI functions for bitshift operations and we wish to shift
+     * in 257 bits left so we move the digits 256-bits left and then multiply
+     * by two.
+     */
+    mp_int in_shifted;
+    int i;
+    mp_err res;
+
+    MP_CHECKOK(mp_init(&in_shifted));
+    MP_CHECKOK(s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS));
+    memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS),
+           MP_DIGITS(in),
+           MP_USED(in) * sizeof(mp_digit));
+    MP_CHECKOK(mp_mul_2(&in_shifted, &in_shifted));
+    MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth));
+
+    for (i = 0;; i++) {
+        out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits;
+        MP_CHECKOK(mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL));
+
+        i++;
+        if (i == NLIMBS)
+            break;
+        out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits;
+        MP_CHECKOK(mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL));
+    }
+
+CLEANUP:
+    mp_clear(&in_shifted);
+    return res;
+}
+
+/* from_montgomery sets out=in/R. */
+static mp_err
+from_montgomery(mp_int *out, const felem in,
+                const ECGroup *group)
+{
+    mp_int result, tmp;
+    mp_err res;
+    int i;
+
+    MP_CHECKOK(mp_init(&result));
+    MP_CHECKOK(mp_init(&tmp));
+
+    MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS - 1], &result));
+    for (i = NLIMBS - 2; i >= 0; i--) {
+        if ((i & 1) == 0) {
+            MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp));
+        } else {
+            MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp));
+        }
+        MP_CHECKOK(mp_add_d(&tmp, in[i], &result));
+    }
+
+    MP_CHECKOK(mp_mul(&result, &kRInv, out));
+    MP_CHECKOK(group->meth->field_mod(out, out, group->meth));
+
+CLEANUP:
+    mp_clear(&result);
+    mp_clear(&tmp);
+    return res;
+}
+
+/* scalar_from_mp_int sets out_scalar=n, where n < the group order. */
+static void
+scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)
+{
+    /* We require that |n| is less than the order of the group and therefore it
+     * will fit into |out_scalar|. However, these is a timing side-channel here
+     * that we cannot avoid: if |n| is sufficiently small it may be one or more
+     * words too short and we'll copy less data.
+     */
+    memset(out_scalar, 0, 32);
+#ifdef IS_LITTLE_ENDIAN
+    memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));
+#else
+    {
+        mp_size i;
+        mp_digit swapped[MP_DIGITS_IN_256_BITS];
+        for (i = 0; i < MP_USED(n); i++) {
+            swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));
+        }
+        memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));
+    }
+#endif
+}
+
+/* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the
+ * order of the group.
+ */
+static mp_err
+ec_GFp_nistp256_base_point_mul(const mp_int *n,
+                               mp_int *out_x, mp_int *out_y,
+                               const ECGroup *group)
+{
+    u8 scalar[32];
+    felem x, y, z, x_affine, y_affine;
+    mp_err res;
+
+    /* FIXME(agl): test that n < order. */
+
+    scalar_from_mp_int(scalar, n);
+    scalar_base_mult(x, y, z, scalar);
+    point_to_affine(x_affine, y_affine, x, y, z);
+    MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+    MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+    return res;
+}
+
+/* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is <
+ * the order of the group.
+ */
+static mp_err
+ec_GFp_nistp256_point_mul(const mp_int *n,
+                          const mp_int *in_x, const mp_int *in_y,
+                          mp_int *out_x, mp_int *out_y,
+                          const ECGroup *group)
+{
+    u8 scalar[32];
+    felem x, y, z, x_affine, y_affine, px, py;
+    mp_err res;
+
+    scalar_from_mp_int(scalar, n);
+
+    MP_CHECKOK(to_montgomery(px, in_x, group));
+    MP_CHECKOK(to_montgomery(py, in_y, group));
+
+    scalar_mult(x, y, z, px, py, scalar);
+    point_to_affine(x_affine, y_affine, x, y, z);
+    MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+    MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+    return res;
+}
+
+/* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G +
+ * n2*{in_x,in_y}, where n1 and n2 are < the order of the group.
+ *
+ * As indicated by the name, this function operates in variable time. This
+ * is safe because it's used for signature validation which doesn't deal
+ * with secrets.
+ */
+static mp_err
+ec_GFp_nistp256_points_mul_vartime(
+    const mp_int *n1, const mp_int *n2,
+    const mp_int *in_x, const mp_int *in_y,
+    mp_int *out_x, mp_int *out_y,
+    const ECGroup *group)
+{
+    u8 scalar1[32], scalar2[32];
+    felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py;
+    mp_err res = MP_OKAY;
+
+    /* If n2 == NULL, this is just a base-point multiplication. */
+    if (n2 == NULL) {
+        return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group);
+    }
+
+    /* If n1 == nULL, this is just an arbitary-point multiplication. */
+    if (n1 == NULL) {
+        return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group);
+    }
+
+    /* If both scalars are zero, then the result is the point at infinity. */
+    if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) {
+        mp_zero(out_x);
+        mp_zero(out_y);
+        return res;
+    }
+
+    scalar_from_mp_int(scalar1, n1);
+    scalar_from_mp_int(scalar2, n2);
+
+    MP_CHECKOK(to_montgomery(px, in_x, group));
+    MP_CHECKOK(to_montgomery(py, in_y, group));
+    scalar_base_mult(x1, y1, z1, scalar1);
+    scalar_mult(x2, y2, z2, px, py, scalar2);
+
+    if (mp_cmp_z(n2) == 0) {
+        /* If n2 == 0, then {x2,y2,z2} is zero and the result is just
+     * {x1,y1,z1}. */
+    } else if (mp_cmp_z(n1) == 0) {
+        /* If n1 == 0, then {x1,y1,z1} is zero and the result is just
+     * {x2,y2,z2}. */
+        memcpy(x1, x2, sizeof(x2));
+        memcpy(y1, y2, sizeof(y2));
+        memcpy(z1, z2, sizeof(z2));
+    } else {
+        /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */
+        point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);
+    }
+
+    point_to_affine(x_affine, y_affine, x1, y1, z1);
+    MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+    MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+    return res;
+}
+
+/* Wire in fast point multiplication for named curves. */
+mp_err
+ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)
+{
+    if (name == ECCurve_NIST_P256) {
+        group->base_point_mul = &ec_GFp_nistp256_base_point_mul;
+        group->point_mul = &ec_GFp_nistp256_point_mul;
+        group->points_mul = &ec_GFp_nistp256_points_mul_vartime;
+    }
+    return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_384.c b/security/nss/lib/freebl/ecl/ecp_384.c
new file mode 100644
index 000000000..702fd976e
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_384.c
@@ -0,0 +1,258 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+/* Fast modular reduction for p384 = 2^384 - 2^128 - 2^96 + 2^32 - 1.  a can be r.
+ * Uses algorithm 2.30 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp384_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    int a_bits = mpl_significant_bits(a);
+    int i;
+
+    /* m1, m2 are statically-allocated mp_int of exactly the size we need */
+    mp_int m[10];
+
+#ifdef ECL_THIRTY_TWO_BIT
+    mp_digit s[10][12];
+    for (i = 0; i < 10; i++) {
+        MP_SIGN(&m[i]) = MP_ZPOS;
+        MP_ALLOC(&m[i]) = 12;
+        MP_USED(&m[i]) = 12;
+        MP_DIGITS(&m[i]) = s[i];
+    }
+#else
+    mp_digit s[10][6];
+    for (i = 0; i < 10; i++) {
+        MP_SIGN(&m[i]) = MP_ZPOS;
+        MP_ALLOC(&m[i]) = 6;
+        MP_USED(&m[i]) = 6;
+        MP_DIGITS(&m[i]) = s[i];
+    }
+#endif
+
+#ifdef ECL_THIRTY_TWO_BIT
+    /* for polynomials larger than twice the field size or polynomials
+     * not using all words, use regular reduction */
+    if ((a_bits > 768) || (a_bits <= 736)) {
+        MP_CHECKOK(mp_mod(a, &meth->irr, r));
+    } else {
+        for (i = 0; i < 12; i++) {
+            s[0][i] = MP_DIGIT(a, i);
+        }
+        s[1][0] = 0;
+        s[1][1] = 0;
+        s[1][2] = 0;
+        s[1][3] = 0;
+        s[1][4] = MP_DIGIT(a, 21);
+        s[1][5] = MP_DIGIT(a, 22);
+        s[1][6] = MP_DIGIT(a, 23);
+        s[1][7] = 0;
+        s[1][8] = 0;
+        s[1][9] = 0;
+        s[1][10] = 0;
+        s[1][11] = 0;
+        for (i = 0; i < 12; i++) {
+            s[2][i] = MP_DIGIT(a, i + 12);
+        }
+        s[3][0] = MP_DIGIT(a, 21);
+        s[3][1] = MP_DIGIT(a, 22);
+        s[3][2] = MP_DIGIT(a, 23);
+        for (i = 3; i < 12; i++) {
+            s[3][i] = MP_DIGIT(a, i + 9);
+        }
+        s[4][0] = 0;
+        s[4][1] = MP_DIGIT(a, 23);
+        s[4][2] = 0;
+        s[4][3] = MP_DIGIT(a, 20);
+        for (i = 4; i < 12; i++) {
+            s[4][i] = MP_DIGIT(a, i + 8);
+        }
+        s[5][0] = 0;
+        s[5][1] = 0;
+        s[5][2] = 0;
+        s[5][3] = 0;
+        s[5][4] = MP_DIGIT(a, 20);
+        s[5][5] = MP_DIGIT(a, 21);
+        s[5][6] = MP_DIGIT(a, 22);
+        s[5][7] = MP_DIGIT(a, 23);
+        s[5][8] = 0;
+        s[5][9] = 0;
+        s[5][10] = 0;
+        s[5][11] = 0;
+        s[6][0] = MP_DIGIT(a, 20);
+        s[6][1] = 0;
+        s[6][2] = 0;
+        s[6][3] = MP_DIGIT(a, 21);
+        s[6][4] = MP_DIGIT(a, 22);
+        s[6][5] = MP_DIGIT(a, 23);
+        s[6][6] = 0;
+        s[6][7] = 0;
+        s[6][8] = 0;
+        s[6][9] = 0;
+        s[6][10] = 0;
+        s[6][11] = 0;
+        s[7][0] = MP_DIGIT(a, 23);
+        for (i = 1; i < 12; i++) {
+            s[7][i] = MP_DIGIT(a, i + 11);
+        }
+        s[8][0] = 0;
+        s[8][1] = MP_DIGIT(a, 20);
+        s[8][2] = MP_DIGIT(a, 21);
+        s[8][3] = MP_DIGIT(a, 22);
+        s[8][4] = MP_DIGIT(a, 23);
+        s[8][5] = 0;
+        s[8][6] = 0;
+        s[8][7] = 0;
+        s[8][8] = 0;
+        s[8][9] = 0;
+        s[8][10] = 0;
+        s[8][11] = 0;
+        s[9][0] = 0;
+        s[9][1] = 0;
+        s[9][2] = 0;
+        s[9][3] = MP_DIGIT(a, 23);
+        s[9][4] = MP_DIGIT(a, 23);
+        s[9][5] = 0;
+        s[9][6] = 0;
+        s[9][7] = 0;
+        s[9][8] = 0;
+        s[9][9] = 0;
+        s[9][10] = 0;
+        s[9][11] = 0;
+
+        MP_CHECKOK(mp_add(&m[0], &m[1], r));
+        MP_CHECKOK(mp_add(r, &m[1], r));
+        MP_CHECKOK(mp_add(r, &m[2], r));
+        MP_CHECKOK(mp_add(r, &m[3], r));
+        MP_CHECKOK(mp_add(r, &m[4], r));
+        MP_CHECKOK(mp_add(r, &m[5], r));
+        MP_CHECKOK(mp_add(r, &m[6], r));
+        MP_CHECKOK(mp_sub(r, &m[7], r));
+        MP_CHECKOK(mp_sub(r, &m[8], r));
+        MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
+        s_mp_clamp(r);
+    }
+#else
+    /* for polynomials larger than twice the field size or polynomials
+     * not using all words, use regular reduction */
+    if ((a_bits > 768) || (a_bits <= 736)) {
+        MP_CHECKOK(mp_mod(a, &meth->irr, r));
+    } else {
+        for (i = 0; i < 6; i++) {
+            s[0][i] = MP_DIGIT(a, i);
+        }
+        s[1][0] = 0;
+        s[1][1] = 0;
+        s[1][2] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+        s[1][3] = MP_DIGIT(a, 11) >> 32;
+        s[1][4] = 0;
+        s[1][5] = 0;
+        for (i = 0; i < 6; i++) {
+            s[2][i] = MP_DIGIT(a, i + 6);
+        }
+        s[3][0] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+        s[3][1] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
+        for (i = 2; i < 6; i++) {
+            s[3][i] = (MP_DIGIT(a, i + 4) >> 32) | (MP_DIGIT(a, i + 5) << 32);
+        }
+        s[4][0] = (MP_DIGIT(a, 11) >> 32) << 32;
+        s[4][1] = MP_DIGIT(a, 10) << 32;
+        for (i = 2; i < 6; i++) {
+            s[4][i] = MP_DIGIT(a, i + 4);
+        }
+        s[5][0] = 0;
+        s[5][1] = 0;
+        s[5][2] = MP_DIGIT(a, 10);
+        s[5][3] = MP_DIGIT(a, 11);
+        s[5][4] = 0;
+        s[5][5] = 0;
+        s[6][0] = (MP_DIGIT(a, 10) << 32) >> 32;
+        s[6][1] = (MP_DIGIT(a, 10) >> 32) << 32;
+        s[6][2] = MP_DIGIT(a, 11);
+        s[6][3] = 0;
+        s[6][4] = 0;
+        s[6][5] = 0;
+        s[7][0] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
+        for (i = 1; i < 6; i++) {
+            s[7][i] = (MP_DIGIT(a, i + 5) >> 32) | (MP_DIGIT(a, i + 6) << 32);
+        }
+        s[8][0] = MP_DIGIT(a, 10) << 32;
+        s[8][1] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+        s[8][2] = MP_DIGIT(a, 11) >> 32;
+        s[8][3] = 0;
+        s[8][4] = 0;
+        s[8][5] = 0;
+        s[9][0] = 0;
+        s[9][1] = (MP_DIGIT(a, 11) >> 32) << 32;
+        s[9][2] = MP_DIGIT(a, 11) >> 32;
+        s[9][3] = 0;
+        s[9][4] = 0;
+        s[9][5] = 0;
+
+        MP_CHECKOK(mp_add(&m[0], &m[1], r));
+        MP_CHECKOK(mp_add(r, &m[1], r));
+        MP_CHECKOK(mp_add(r, &m[2], r));
+        MP_CHECKOK(mp_add(r, &m[3], r));
+        MP_CHECKOK(mp_add(r, &m[4], r));
+        MP_CHECKOK(mp_add(r, &m[5], r));
+        MP_CHECKOK(mp_add(r, &m[6], r));
+        MP_CHECKOK(mp_sub(r, &m[7], r));
+        MP_CHECKOK(mp_sub(r, &m[8], r));
+        MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
+        s_mp_clamp(r);
+    }
+#endif
+
+CLEANUP:
+    return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p384. Store the
+ * result in r.  r could be a.  Uses optimized modular reduction for p384.
+ */
+static mp_err
+ec_GFp_nistp384_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    MP_CHECKOK(mp_sqr(a, r));
+    MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
+CLEANUP:
+    return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p384.
+ * Store the result in r.  r could be a or b; a could be b.  Uses
+ * optimized modular reduction for p384. */
+static mp_err
+ec_GFp_nistp384_mul(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    MP_CHECKOK(mp_mul(a, b, r));
+    MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
+CLEANUP:
+    return res;
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp384(ECGroup *group, ECCurveName name)
+{
+    if (name == ECCurve_NIST_P384) {
+        group->meth->field_mod = &ec_GFp_nistp384_mod;
+        group->meth->field_mul = &ec_GFp_nistp384_mul;
+        group->meth->field_sqr = &ec_GFp_nistp384_sqr;
+    }
+    return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_521.c b/security/nss/lib/freebl/ecl/ecp_521.c
new file mode 100644
index 000000000..6ca0dbb11
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_521.c
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+#define ECP521_DIGITS ECL_CURVE_DIGITS(521)
+
+/* Fast modular reduction for p521 = 2^521 - 1.  a can be r. Uses
+ * algorithm 2.31 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp521_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    int a_bits = mpl_significant_bits(a);
+    unsigned int i;
+
+    /* m1, m2 are statically-allocated mp_int of exactly the size we need */
+    mp_int m1;
+
+    mp_digit s1[ECP521_DIGITS] = { 0 };
+
+    MP_SIGN(&m1) = MP_ZPOS;
+    MP_ALLOC(&m1) = ECP521_DIGITS;
+    MP_USED(&m1) = ECP521_DIGITS;
+    MP_DIGITS(&m1) = s1;
+
+    if (a_bits < 521) {
+        if (a == r)
+            return MP_OKAY;
+        return mp_copy(a, r);
+    }
+    /* for polynomials larger than twice the field size or polynomials
+     * not using all words, use regular reduction */
+    if (a_bits > (521 * 2)) {
+        MP_CHECKOK(mp_mod(a, &meth->irr, r));
+    } else {
+#define FIRST_DIGIT (ECP521_DIGITS - 1)
+        for (i = FIRST_DIGIT; i < MP_USED(a) - 1; i++) {
+            s1[i - FIRST_DIGIT] = (MP_DIGIT(a, i) >> 9) | (MP_DIGIT(a, 1 + i) << (MP_DIGIT_BIT - 9));
+        }
+        s1[i - FIRST_DIGIT] = MP_DIGIT(a, i) >> 9;
+
+        if (a != r) {
+            MP_CHECKOK(s_mp_pad(r, ECP521_DIGITS));
+            for (i = 0; i < ECP521_DIGITS; i++) {
+                MP_DIGIT(r, i) = MP_DIGIT(a, i);
+            }
+        }
+        MP_USED(r) = ECP521_DIGITS;
+        MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF;
+
+        MP_CHECKOK(s_mp_add(r, &m1));
+        if (MP_DIGIT(r, FIRST_DIGIT) & 0x200) {
+            MP_CHECKOK(s_mp_add_d(r, 1));
+            MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF;
+        } else if (s_mp_cmp(r, &meth->irr) == 0) {
+            mp_zero(r);
+        }
+        s_mp_clamp(r);
+    }
+
+CLEANUP:
+    return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p521. Store the
+ * result in r.  r could be a.  Uses optimized modular reduction for p521.
+ */
+static mp_err
+ec_GFp_nistp521_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    MP_CHECKOK(mp_sqr(a, r));
+    MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+CLEANUP:
+    return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p521.
+ * Store the result in r.  r could be a or b; a could be b.  Uses
+ * optimized modular reduction for p521. */
+static mp_err
+ec_GFp_nistp521_mul(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    MP_CHECKOK(mp_mul(a, b, r));
+    MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+CLEANUP:
+    return res;
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+static mp_err
+ec_GFp_nistp521_div(const mp_int *a, const mp_int *b, mp_int *r,
+                    const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+    mp_int t;
+
+    /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+    if (a == NULL) {
+        return mp_invmod(b, &meth->irr, r);
+    } else {
+        /* MPI doesn't support divmod, so we implement it using invmod and
+         * mulmod. */
+        MP_CHECKOK(mp_init(&t));
+        MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
+        MP_CHECKOK(mp_mul(a, &t, r));
+        MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+    CLEANUP:
+        mp_clear(&t);
+        return res;
+    }
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp521(ECGroup *group, ECCurveName name)
+{
+    if (name == ECCurve_NIST_P521) {
+        group->meth->field_mod = &ec_GFp_nistp521_mod;
+        group->meth->field_mul = &ec_GFp_nistp521_mul;
+        group->meth->field_sqr = &ec_GFp_nistp521_sqr;
+        group->meth->field_div = &ec_GFp_nistp521_div;
+    }
+    return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_aff.c b/security/nss/lib/freebl/ecl/ecp_aff.c
new file mode 100644
index 000000000..47fb27326
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_aff.c
@@ -0,0 +1,308 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mplogic.h"
+#include <stdlib.h>
+
+/* Checks if point P(px, py) is at infinity.  Uses affine coordinates. */
+mp_err
+ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py)
+{
+
+    if ((mp_cmp_z(px) == 0) && (mp_cmp_z(py) == 0)) {
+        return MP_YES;
+    } else {
+        return MP_NO;
+    }
+}
+
+/* Sets P(px, py) to be the point at infinity.  Uses affine coordinates. */
+mp_err
+ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py)
+{
+    mp_zero(px);
+    mp_zero(py);
+    return MP_OKAY;
+}
+
+/* Computes R = P + Q based on IEEE P1363 A.10.1. Elliptic curve points P,
+ * Q, and R can all be identical. Uses affine coordinates. Assumes input
+ * is already field-encoded using field_enc, and returns output that is
+ * still field-encoded. */
+mp_err
+ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, const mp_int *qx,
+                  const mp_int *qy, mp_int *rx, mp_int *ry,
+                  const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int lambda, temp, tempx, tempy;
+
+    MP_DIGITS(&lambda) = 0;
+    MP_DIGITS(&temp) = 0;
+    MP_DIGITS(&tempx) = 0;
+    MP_DIGITS(&tempy) = 0;
+    MP_CHECKOK(mp_init(&lambda));
+    MP_CHECKOK(mp_init(&temp));
+    MP_CHECKOK(mp_init(&tempx));
+    MP_CHECKOK(mp_init(&tempy));
+    /* if P = inf, then R = Q */
+    if (ec_GFp_pt_is_inf_aff(px, py) == 0) {
+        MP_CHECKOK(mp_copy(qx, rx));
+        MP_CHECKOK(mp_copy(qy, ry));
+        res = MP_OKAY;
+        goto CLEANUP;
+    }
+    /* if Q = inf, then R = P */
+    if (ec_GFp_pt_is_inf_aff(qx, qy) == 0) {
+        MP_CHECKOK(mp_copy(px, rx));
+        MP_CHECKOK(mp_copy(py, ry));
+        res = MP_OKAY;
+        goto CLEANUP;
+    }
+    /* if px != qx, then lambda = (py-qy) / (px-qx) */
+    if (mp_cmp(px, qx) != 0) {
+        MP_CHECKOK(group->meth->field_sub(py, qy, &tempy, group->meth));
+        MP_CHECKOK(group->meth->field_sub(px, qx, &tempx, group->meth));
+        MP_CHECKOK(group->meth->field_div(&tempy, &tempx, &lambda, group->meth));
+    } else {
+        /* if py != qy or qy = 0, then R = inf */
+        if (((mp_cmp(py, qy) != 0)) || (mp_cmp_z(qy) == 0)) {
+            mp_zero(rx);
+            mp_zero(ry);
+            res = MP_OKAY;
+            goto CLEANUP;
+        }
+        /* lambda = (3qx^2+a) / (2qy) */
+        MP_CHECKOK(group->meth->field_sqr(qx, &tempx, group->meth));
+        MP_CHECKOK(mp_set_int(&temp, 3));
+        if (group->meth->field_enc) {
+            MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth));
+        }
+        MP_CHECKOK(group->meth->field_mul(&tempx, &temp, &tempx, group->meth));
+        MP_CHECKOK(group->meth->field_add(&tempx, &group->curvea, &tempx, group->meth));
+        MP_CHECKOK(mp_set_int(&temp, 2));
+        if (group->meth->field_enc) {
+            MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth));
+        }
+        MP_CHECKOK(group->meth->field_mul(qy, &temp, &tempy, group->meth));
+        MP_CHECKOK(group->meth->field_div(&tempx, &tempy, &lambda, group->meth));
+    }
+    /* rx = lambda^2 - px - qx */
+    MP_CHECKOK(group->meth->field_sqr(&lambda, &tempx, group->meth));
+    MP_CHECKOK(group->meth->field_sub(&tempx, px, &tempx, group->meth));
+    MP_CHECKOK(group->meth->field_sub(&tempx, qx, &tempx, group->meth));
+    /* ry = (x1-x2) * lambda - y1 */
+    MP_CHECKOK(group->meth->field_sub(qx, &tempx, &tempy, group->meth));
+    MP_CHECKOK(group->meth->field_mul(&tempy, &lambda, &tempy, group->meth));
+    MP_CHECKOK(group->meth->field_sub(&tempy, qy, &tempy, group->meth));
+    MP_CHECKOK(mp_copy(&tempx, rx));
+    MP_CHECKOK(mp_copy(&tempy, ry));
+
+CLEANUP:
+    mp_clear(&lambda);
+    mp_clear(&temp);
+    mp_clear(&tempx);
+    mp_clear(&tempy);
+    return res;
+}
+
+/* Computes R = P - Q. Elliptic curve points P, Q, and R can all be
+ * identical. Uses affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, const mp_int *qx,
+                  const mp_int *qy, mp_int *rx, mp_int *ry,
+                  const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int nqy;
+
+    MP_DIGITS(&nqy) = 0;
+    MP_CHECKOK(mp_init(&nqy));
+    /* nqy = -qy */
+    MP_CHECKOK(group->meth->field_neg(qy, &nqy, group->meth));
+    res = group->point_add(px, py, qx, &nqy, rx, ry, group);
+CLEANUP:
+    mp_clear(&nqy);
+    return res;
+}
+
+/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses
+ * affine coordinates. Assumes input is already field-encoded using
+ * field_enc, and returns output that is still field-encoded. */
+mp_err
+ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx,
+                  mp_int *ry, const ECGroup *group)
+{
+    return ec_GFp_pt_add_aff(px, py, px, py, rx, ry, group);
+}
+
+/* by default, this routine is unused and thus doesn't need to be compiled */
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+/* Computes R = nP based on IEEE P1363 A.10.3. Elliptic curve points P and
+ * R can be identical. Uses affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, const mp_int *py,
+                  mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int k, k3, qx, qy, sx, sy;
+    int b1, b3, i, l;
+
+    MP_DIGITS(&k) = 0;
+    MP_DIGITS(&k3) = 0;
+    MP_DIGITS(&qx) = 0;
+    MP_DIGITS(&qy) = 0;
+    MP_DIGITS(&sx) = 0;
+    MP_DIGITS(&sy) = 0;
+    MP_CHECKOK(mp_init(&k));
+    MP_CHECKOK(mp_init(&k3));
+    MP_CHECKOK(mp_init(&qx));
+    MP_CHECKOK(mp_init(&qy));
+    MP_CHECKOK(mp_init(&sx));
+    MP_CHECKOK(mp_init(&sy));
+
+    /* if n = 0 then r = inf */
+    if (mp_cmp_z(n) == 0) {
+        mp_zero(rx);
+        mp_zero(ry);
+        res = MP_OKAY;
+        goto CLEANUP;
+    }
+    /* Q = P, k = n */
+    MP_CHECKOK(mp_copy(px, &qx));
+    MP_CHECKOK(mp_copy(py, &qy));
+    MP_CHECKOK(mp_copy(n, &k));
+    /* if n < 0 then Q = -Q, k = -k */
+    if (mp_cmp_z(n) < 0) {
+        MP_CHECKOK(group->meth->field_neg(&qy, &qy, group->meth));
+        MP_CHECKOK(mp_neg(&k, &k));
+    }
+#ifdef ECL_DEBUG /* basic double and add method */
+    l = mpl_significant_bits(&k) - 1;
+    MP_CHECKOK(mp_copy(&qx, &sx));
+    MP_CHECKOK(mp_copy(&qy, &sy));
+    for (i = l - 1; i >= 0; i--) {
+        /* S = 2S */
+        MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group));
+        /* if k_i = 1, then S = S + Q */
+        if (mpl_get_bit(&k, i) != 0) {
+            MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group));
+        }
+    }
+#else /* double and add/subtract method from \
+               * standard */
+    /* k3 = 3 * k */
+    MP_CHECKOK(mp_set_int(&k3, 3));
+    MP_CHECKOK(mp_mul(&k, &k3, &k3));
+    /* S = Q */
+    MP_CHECKOK(mp_copy(&qx, &sx));
+    MP_CHECKOK(mp_copy(&qy, &sy));
+    /* l = index of high order bit in binary representation of 3*k */
+    l = mpl_significant_bits(&k3) - 1;
+    /* for i = l-1 downto 1 */
+    for (i = l - 1; i >= 1; i--) {
+        /* S = 2S */
+        MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group));
+        b3 = MP_GET_BIT(&k3, i);
+        b1 = MP_GET_BIT(&k, i);
+        /* if k3_i = 1 and k_i = 0, then S = S + Q */
+        if ((b3 == 1) && (b1 == 0)) {
+            MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group));
+            /* if k3_i = 0 and k_i = 1, then S = S - Q */
+        } else if ((b3 == 0) && (b1 == 1)) {
+            MP_CHECKOK(group->point_sub(&sx, &sy, &qx, &qy, &sx, &sy, group));
+        }
+    }
+#endif
+    /* output S */
+    MP_CHECKOK(mp_copy(&sx, rx));
+    MP_CHECKOK(mp_copy(&sy, ry));
+
+CLEANUP:
+    mp_clear(&k);
+    mp_clear(&k3);
+    mp_clear(&qx);
+    mp_clear(&qy);
+    mp_clear(&sx);
+    mp_clear(&sy);
+    return res;
+}
+#endif
+
+/* Validates a point on a GFp curve. */
+mp_err
+ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group)
+{
+    mp_err res = MP_NO;
+    mp_int accl, accr, tmp, pxt, pyt;
+
+    MP_DIGITS(&accl) = 0;
+    MP_DIGITS(&accr) = 0;
+    MP_DIGITS(&tmp) = 0;
+    MP_DIGITS(&pxt) = 0;
+    MP_DIGITS(&pyt) = 0;
+    MP_CHECKOK(mp_init(&accl));
+    MP_CHECKOK(mp_init(&accr));
+    MP_CHECKOK(mp_init(&tmp));
+    MP_CHECKOK(mp_init(&pxt));
+    MP_CHECKOK(mp_init(&pyt));
+
+    /* 1: Verify that publicValue is not the point at infinity */
+    if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) {
+        res = MP_NO;
+        goto CLEANUP;
+    }
+    /* 2: Verify that the coordinates of publicValue are elements
+     *    of the field.
+     */
+    if ((MP_SIGN(px) == MP_NEG) || (mp_cmp(px, &group->meth->irr) >= 0) ||
+        (MP_SIGN(py) == MP_NEG) || (mp_cmp(py, &group->meth->irr) >= 0)) {
+        res = MP_NO;
+        goto CLEANUP;
+    }
+    /* 3: Verify that publicValue is on the curve. */
+    if (group->meth->field_enc) {
+        group->meth->field_enc(px, &pxt, group->meth);
+        group->meth->field_enc(py, &pyt, group->meth);
+    } else {
+        MP_CHECKOK(mp_copy(px, &pxt));
+        MP_CHECKOK(mp_copy(py, &pyt));
+    }
+    /* left-hand side: y^2  */
+    MP_CHECKOK(group->meth->field_sqr(&pyt, &accl, group->meth));
+    /* right-hand side: x^3 + a*x + b = (x^2 + a)*x + b by Horner's rule */
+    MP_CHECKOK(group->meth->field_sqr(&pxt, &tmp, group->meth));
+    MP_CHECKOK(group->meth->field_add(&tmp, &group->curvea, &tmp, group->meth));
+    MP_CHECKOK(group->meth->field_mul(&tmp, &pxt, &accr, group->meth));
+    MP_CHECKOK(group->meth->field_add(&accr, &group->curveb, &accr, group->meth));
+    /* check LHS - RHS == 0 */
+    MP_CHECKOK(group->meth->field_sub(&accl, &accr, &accr, group->meth));
+    if (mp_cmp_z(&accr) != 0) {
+        res = MP_NO;
+        goto CLEANUP;
+    }
+    /* 4: Verify that the order of the curve times the publicValue
+     *    is the point at infinity.
+     */
+    MP_CHECKOK(ECPoint_mul(group, &group->order, px, py, &pxt, &pyt));
+    if (ec_GFp_pt_is_inf_aff(&pxt, &pyt) != MP_YES) {
+        res = MP_NO;
+        goto CLEANUP;
+    }
+
+    res = MP_YES;
+
+CLEANUP:
+    mp_clear(&accl);
+    mp_clear(&accr);
+    mp_clear(&tmp);
+    mp_clear(&pxt);
+    mp_clear(&pyt);
+    return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_jac.c b/security/nss/lib/freebl/ecl/ecp_jac.c
new file mode 100644
index 000000000..535e75903
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_jac.c
@@ -0,0 +1,513 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mplogic.h"
+#include <stdlib.h>
+#ifdef ECL_DEBUG
+#include <assert.h>
+#endif
+
+/* Converts a point P(px, py) from affine coordinates to Jacobian
+ * projective coordinates R(rx, ry, rz). Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx,
+                  mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+
+    if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) {
+        MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+    } else {
+        MP_CHECKOK(mp_copy(px, rx));
+        MP_CHECKOK(mp_copy(py, ry));
+        MP_CHECKOK(mp_set_int(rz, 1));
+        if (group->meth->field_enc) {
+            MP_CHECKOK(group->meth->field_enc(rz, rz, group->meth));
+        }
+    }
+CLEANUP:
+    return res;
+}
+
+/* Converts a point P(px, py, pz) from Jacobian projective coordinates to
+ * affine coordinates R(rx, ry).  P and R can share x and y coordinates.
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded. */
+mp_err
+ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+                  mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int z1, z2, z3;
+
+    MP_DIGITS(&z1) = 0;
+    MP_DIGITS(&z2) = 0;
+    MP_DIGITS(&z3) = 0;
+    MP_CHECKOK(mp_init(&z1));
+    MP_CHECKOK(mp_init(&z2));
+    MP_CHECKOK(mp_init(&z3));
+
+    /* if point at infinity, then set point at infinity and exit */
+    if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+        MP_CHECKOK(ec_GFp_pt_set_inf_aff(rx, ry));
+        goto CLEANUP;
+    }
+
+    /* transform (px, py, pz) into (px / pz^2, py / pz^3) */
+    if (mp_cmp_d(pz, 1) == 0) {
+        MP_CHECKOK(mp_copy(px, rx));
+        MP_CHECKOK(mp_copy(py, ry));
+    } else {
+        MP_CHECKOK(group->meth->field_div(NULL, pz, &z1, group->meth));
+        MP_CHECKOK(group->meth->field_sqr(&z1, &z2, group->meth));
+        MP_CHECKOK(group->meth->field_mul(&z1, &z2, &z3, group->meth));
+        MP_CHECKOK(group->meth->field_mul(px, &z2, rx, group->meth));
+        MP_CHECKOK(group->meth->field_mul(py, &z3, ry, group->meth));
+    }
+
+CLEANUP:
+    mp_clear(&z1);
+    mp_clear(&z2);
+    mp_clear(&z3);
+    return res;
+}
+
+/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian
+ * coordinates. */
+mp_err
+ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, const mp_int *pz)
+{
+    return mp_cmp_z(pz);
+}
+
+/* Sets P(px, py, pz) to be the point at infinity.  Uses Jacobian
+ * coordinates. */
+mp_err
+ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz)
+{
+    mp_zero(pz);
+    return MP_OKAY;
+}
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, 1).  Elliptic curve points P, Q, and R can all be identical.
+ * Uses mixed Jacobian-affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. Uses equation (2) from Brown, Hankerson, Lopez, and
+ * Menezes. Software Implementation of the NIST Elliptic Curves Over Prime
+ * Fields. */
+mp_err
+ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+                      const mp_int *qx, const mp_int *qy, mp_int *rx,
+                      mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int A, B, C, D, C2, C3;
+
+    MP_DIGITS(&A) = 0;
+    MP_DIGITS(&B) = 0;
+    MP_DIGITS(&C) = 0;
+    MP_DIGITS(&D) = 0;
+    MP_DIGITS(&C2) = 0;
+    MP_DIGITS(&C3) = 0;
+    MP_CHECKOK(mp_init(&A));
+    MP_CHECKOK(mp_init(&B));
+    MP_CHECKOK(mp_init(&C));
+    MP_CHECKOK(mp_init(&D));
+    MP_CHECKOK(mp_init(&C2));
+    MP_CHECKOK(mp_init(&C3));
+
+    /* If either P or Q is the point at infinity, then return the other
+     * point */
+    if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+        MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group));
+        goto CLEANUP;
+    }
+    if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) {
+        MP_CHECKOK(mp_copy(px, rx));
+        MP_CHECKOK(mp_copy(py, ry));
+        MP_CHECKOK(mp_copy(pz, rz));
+        goto CLEANUP;
+    }
+
+    /* A = qx * pz^2, B = qy * pz^3 */
+    MP_CHECKOK(group->meth->field_sqr(pz, &A, group->meth));
+    MP_CHECKOK(group->meth->field_mul(&A, pz, &B, group->meth));
+    MP_CHECKOK(group->meth->field_mul(&A, qx, &A, group->meth));
+    MP_CHECKOK(group->meth->field_mul(&B, qy, &B, group->meth));
+
+    /* C = A - px, D = B - py */
+    MP_CHECKOK(group->meth->field_sub(&A, px, &C, group->meth));
+    MP_CHECKOK(group->meth->field_sub(&B, py, &D, group->meth));
+
+    if (mp_cmp_z(&C) == 0) {
+        /* P == Q or P == -Q */
+        if (mp_cmp_z(&D) == 0) {
+            /* P == Q */
+            /* It is cheaper to double (qx, qy, 1) than (px, py, pz). */
+            MP_DIGIT(&D, 0) = 1; /* Set D to 1. */
+            MP_CHECKOK(ec_GFp_pt_dbl_jac(qx, qy, &D, rx, ry, rz, group));
+        } else {
+            /* P == -Q */
+            MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+        }
+        goto CLEANUP;
+    }
+
+    /* C2 = C^2, C3 = C^3 */
+    MP_CHECKOK(group->meth->field_sqr(&C, &C2, group->meth));
+    MP_CHECKOK(group->meth->field_mul(&C, &C2, &C3, group->meth));
+
+    /* rz = pz * C */
+    MP_CHECKOK(group->meth->field_mul(pz, &C, rz, group->meth));
+
+    /* C = px * C^2 */
+    MP_CHECKOK(group->meth->field_mul(px, &C2, &C, group->meth));
+    /* A = D^2 */
+    MP_CHECKOK(group->meth->field_sqr(&D, &A, group->meth));
+
+    /* rx = D^2 - (C^3 + 2 * (px * C^2)) */
+    MP_CHECKOK(group->meth->field_add(&C, &C, rx, group->meth));
+    MP_CHECKOK(group->meth->field_add(&C3, rx, rx, group->meth));
+    MP_CHECKOK(group->meth->field_sub(&A, rx, rx, group->meth));
+
+    /* C3 = py * C^3 */
+    MP_CHECKOK(group->meth->field_mul(py, &C3, &C3, group->meth));
+
+    /* ry = D * (px * C^2 - rx) - py * C^3 */
+    MP_CHECKOK(group->meth->field_sub(&C, rx, ry, group->meth));
+    MP_CHECKOK(group->meth->field_mul(&D, ry, ry, group->meth));
+    MP_CHECKOK(group->meth->field_sub(ry, &C3, ry, group->meth));
+
+CLEANUP:
+    mp_clear(&A);
+    mp_clear(&B);
+    mp_clear(&C);
+    mp_clear(&D);
+    mp_clear(&C2);
+    mp_clear(&C3);
+    return res;
+}
+
+/* Computes R = 2P.  Elliptic curve points P and R can be identical.  Uses
+ * Jacobian coordinates.
+ *
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded.
+ *
+ * This routine implements Point Doubling in the Jacobian Projective
+ * space as described in the paper "Efficient elliptic curve exponentiation
+ * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono.
+ */
+mp_err
+ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz,
+                  mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int t0, t1, M, S;
+
+    MP_DIGITS(&t0) = 0;
+    MP_DIGITS(&t1) = 0;
+    MP_DIGITS(&M) = 0;
+    MP_DIGITS(&S) = 0;
+    MP_CHECKOK(mp_init(&t0));
+    MP_CHECKOK(mp_init(&t1));
+    MP_CHECKOK(mp_init(&M));
+    MP_CHECKOK(mp_init(&S));
+
+    /* P == inf or P == -P */
+    if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) {
+        MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+        goto CLEANUP;
+    }
+
+    if (mp_cmp_d(pz, 1) == 0) {
+        /* M = 3 * px^2 + a */
+        MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth));
+        MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth));
+        MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth));
+        MP_CHECKOK(group->meth->field_add(&t0, &group->curvea, &M, group->meth));
+    } else if (MP_SIGN(&group->curvea) == MP_NEG &&
+               MP_USED(&group->curvea) == 1 &&
+               MP_DIGIT(&group->curvea, 0) == 3) {
+        /* M = 3 * (px + pz^2) * (px - pz^2) */
+        MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth));
+        MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth));
+        MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth));
+        MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth));
+        MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth));
+        MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth));
+    } else {
+        /* M = 3 * (px^2) + a * (pz^4) */
+        MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth));
+        MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth));
+        MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth));
+        MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth));
+        MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth));
+        MP_CHECKOK(group->meth->field_mul(&M, &group->curvea, &M, group->meth));
+        MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth));
+    }
+
+    /* rz = 2 * py * pz */
+    /* t0 = 4 * py^2 */
+    if (mp_cmp_d(pz, 1) == 0) {
+        MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth));
+        MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth));
+    } else {
+        MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth));
+        MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth));
+        MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth));
+    }
+
+    /* S = 4 * px * py^2 = px * (2 * py)^2 */
+    MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth));
+
+    /* rx = M^2 - 2 * S */
+    MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth));
+    MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth));
+    MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth));
+
+    /* ry = M * (S - rx) - 8 * py^4 */
+    MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth));
+    if (mp_isodd(&t1)) {
+        MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1));
+    }
+    MP_CHECKOK(mp_div_2(&t1, &t1));
+    MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth));
+    MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth));
+    MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth));
+
+CLEANUP:
+    mp_clear(&t0);
+    mp_clear(&t1);
+    mp_clear(&M);
+    mp_clear(&S);
+    return res;
+}
+
+/* by default, this routine is unused and thus doesn't need to be compiled */
+#ifdef ECL_ENABLE_GFP_PT_MUL_JAC
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp.  Elliptic curve points P and R can be
+ * identical.  Uses mixed Jacobian-affine coordinates. Assumes input is
+ * already field-encoded using field_enc, and returns output that is still
+ * field-encoded. Uses 4-bit window method. */
+mp_err
+ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, const mp_int *py,
+                  mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int precomp[16][2], rz;
+    int i, ni, d;
+
+    MP_DIGITS(&rz) = 0;
+    for (i = 0; i < 16; i++) {
+        MP_DIGITS(&precomp[i][0]) = 0;
+        MP_DIGITS(&precomp[i][1]) = 0;
+    }
+
+    ARGCHK(group != NULL, MP_BADARG);
+    ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG);
+
+    /* initialize precomputation table */
+    for (i = 0; i < 16; i++) {
+        MP_CHECKOK(mp_init(&precomp[i][0]));
+        MP_CHECKOK(mp_init(&precomp[i][1]));
+    }
+
+    /* fill precomputation table */
+    mp_zero(&precomp[0][0]);
+    mp_zero(&precomp[0][1]);
+    MP_CHECKOK(mp_copy(px, &precomp[1][0]));
+    MP_CHECKOK(mp_copy(py, &precomp[1][1]));
+    for (i = 2; i < 16; i++) {
+        MP_CHECKOK(group->point_add(&precomp[1][0], &precomp[1][1],
+                                    &precomp[i - 1][0], &precomp[i - 1][1],
+                                    &precomp[i][0], &precomp[i][1], group));
+    }
+
+    d = (mpl_significant_bits(n) + 3) / 4;
+
+    /* R = inf */
+    MP_CHECKOK(mp_init(&rz));
+    MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+    for (i = d - 1; i >= 0; i--) {
+        /* compute window ni */
+        ni = MP_GET_BIT(n, 4 * i + 3);
+        ni <<= 1;
+        ni |= MP_GET_BIT(n, 4 * i + 2);
+        ni <<= 1;
+        ni |= MP_GET_BIT(n, 4 * i + 1);
+        ni <<= 1;
+        ni |= MP_GET_BIT(n, 4 * i);
+        /* R = 2^4 * R */
+        MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+        MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+        MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+        MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+        /* R = R + (ni * P) */
+        MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ni][0], &precomp[ni][1], rx, ry,
+                                         &rz, group));
+    }
+
+    /* convert result S to affine coordinates */
+    MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+CLEANUP:
+    mp_clear(&rz);
+    for (i = 0; i < 16; i++) {
+        mp_clear(&precomp[i][0]);
+        mp_clear(&precomp[i][1]);
+    }
+    return res;
+}
+#endif
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Uses mixed Jacobian-affine coordinates. Input and output values are
+ * assumed to be NOT field-encoded. Uses algorithm 15 (simultaneous
+ * multiple point multiplication) from Brown, Hankerson, Lopez, Menezes.
+ * Software Implementation of the NIST Elliptic Curves over Prime Fields. */
+mp_err
+ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px,
+                   const mp_int *py, mp_int *rx, mp_int *ry,
+                   const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int precomp[4][4][2];
+    mp_int rz;
+    const mp_int *a, *b;
+    unsigned int i, j;
+    int ai, bi, d;
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 4; j++) {
+            MP_DIGITS(&precomp[i][j][0]) = 0;
+            MP_DIGITS(&precomp[i][j][1]) = 0;
+        }
+    }
+    MP_DIGITS(&rz) = 0;
+
+    ARGCHK(group != NULL, MP_BADARG);
+    ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+    /* if some arguments are not defined used ECPoint_mul */
+    if (k1 == NULL) {
+        return ECPoint_mul(group, k2, px, py, rx, ry);
+    } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+        return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+    }
+
+    /* initialize precomputation table */
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 4; j++) {
+            MP_CHECKOK(mp_init(&precomp[i][j][0]));
+            MP_CHECKOK(mp_init(&precomp[i][j][1]));
+        }
+    }
+
+    /* fill precomputation table */
+    /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */
+    if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) {
+        a = k2;
+        b = k1;
+        if (group->meth->field_enc) {
+            MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth));
+            MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth));
+        } else {
+            MP_CHECKOK(mp_copy(px, &precomp[1][0][0]));
+            MP_CHECKOK(mp_copy(py, &precomp[1][0][1]));
+        }
+        MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0]));
+        MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1]));
+    } else {
+        a = k1;
+        b = k2;
+        MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0]));
+        MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1]));
+        if (group->meth->field_enc) {
+            MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth));
+            MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth));
+        } else {
+            MP_CHECKOK(mp_copy(px, &precomp[0][1][0]));
+            MP_CHECKOK(mp_copy(py, &precomp[0][1][1]));
+        }
+    }
+    /* precompute [*][0][*] */
+    mp_zero(&precomp[0][0][0]);
+    mp_zero(&precomp[0][0][1]);
+    MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1],
+                                &precomp[2][0][0], &precomp[2][0][1], group));
+    MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1],
+                                &precomp[2][0][0], &precomp[2][0][1],
+                                &precomp[3][0][0], &precomp[3][0][1], group));
+    /* precompute [*][1][*] */
+    for (i = 1; i < 4; i++) {
+        MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+                                    &precomp[i][0][0], &precomp[i][0][1],
+                                    &precomp[i][1][0], &precomp[i][1][1], group));
+    }
+    /* precompute [*][2][*] */
+    MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1],
+                                &precomp[0][2][0], &precomp[0][2][1], group));
+    for (i = 1; i < 4; i++) {
+        MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1],
+                                    &precomp[i][0][0], &precomp[i][0][1],
+                                    &precomp[i][2][0], &precomp[i][2][1], group));
+    }
+    /* precompute [*][3][*] */
+    MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+                                &precomp[0][2][0], &precomp[0][2][1],
+                                &precomp[0][3][0], &precomp[0][3][1], group));
+    for (i = 1; i < 4; i++) {
+        MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1],
+                                    &precomp[i][0][0], &precomp[i][0][1],
+                                    &precomp[i][3][0], &precomp[i][3][1], group));
+    }
+
+    d = (mpl_significant_bits(a) + 1) / 2;
+
+    /* R = inf */
+    MP_CHECKOK(mp_init(&rz));
+    MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+    for (i = d; i-- > 0;) {
+        ai = MP_GET_BIT(a, 2 * i + 1);
+        ai <<= 1;
+        ai |= MP_GET_BIT(a, 2 * i);
+        bi = MP_GET_BIT(b, 2 * i + 1);
+        bi <<= 1;
+        bi |= MP_GET_BIT(b, 2 * i);
+        /* R = 2^2 * R */
+        MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+        MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+        /* R = R + (ai * A + bi * B) */
+        MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ai][bi][0], &precomp[ai][bi][1],
+                                         rx, ry, &rz, group));
+    }
+
+    MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+    if (group->meth->field_dec) {
+        MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+        MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+    }
+
+CLEANUP:
+    mp_clear(&rz);
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 4; j++) {
+            mp_clear(&precomp[i][j][0]);
+            mp_clear(&precomp[i][j][1]);
+        }
+    }
+    return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_jm.c b/security/nss/lib/freebl/ecl/ecp_jm.c
new file mode 100644
index 000000000..a1106cea8
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_jm.c
@@ -0,0 +1,283 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "ecl-priv.h"
+#include "mplogic.h"
+#include <stdlib.h>
+
+#define MAX_SCRATCH 6
+
+/* Computes R = 2P.  Elliptic curve points P and R can be identical.  Uses
+ * Modified Jacobian coordinates.
+ *
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded.
+ *
+ */
+static mp_err
+ec_GFp_pt_dbl_jm(const mp_int *px, const mp_int *py, const mp_int *pz,
+                 const mp_int *paz4, mp_int *rx, mp_int *ry, mp_int *rz,
+                 mp_int *raz4, mp_int scratch[], const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int *t0, *t1, *M, *S;
+
+    t0 = &scratch[0];
+    t1 = &scratch[1];
+    M = &scratch[2];
+    S = &scratch[3];
+
+#if MAX_SCRATCH < 4
+#error "Scratch array defined too small "
+#endif
+
+    /* Check for point at infinity */
+    if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+        /* Set r = pt at infinity by setting rz = 0 */
+
+        MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+        goto CLEANUP;
+    }
+
+    /* M = 3 (px^2) + a*(pz^4) */
+    MP_CHECKOK(group->meth->field_sqr(px, t0, group->meth));
+    MP_CHECKOK(group->meth->field_add(t0, t0, M, group->meth));
+    MP_CHECKOK(group->meth->field_add(t0, M, t0, group->meth));
+    MP_CHECKOK(group->meth->field_add(t0, paz4, M, group->meth));
+
+    /* rz = 2 * py * pz */
+    MP_CHECKOK(group->meth->field_mul(py, pz, S, group->meth));
+    MP_CHECKOK(group->meth->field_add(S, S, rz, group->meth));
+
+    /* t0 = 2y^2 , t1 = 8y^4 */
+    MP_CHECKOK(group->meth->field_sqr(py, t0, group->meth));
+    MP_CHECKOK(group->meth->field_add(t0, t0, t0, group->meth));
+    MP_CHECKOK(group->meth->field_sqr(t0, t1, group->meth));
+    MP_CHECKOK(group->meth->field_add(t1, t1, t1, group->meth));
+
+    /* S = 4 * px * py^2 = 2 * px * t0 */
+    MP_CHECKOK(group->meth->field_mul(px, t0, S, group->meth));
+    MP_CHECKOK(group->meth->field_add(S, S, S, group->meth));
+
+    /* rx = M^2 - 2S */
+    MP_CHECKOK(group->meth->field_sqr(M, rx, group->meth));
+    MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth));
+    MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth));
+
+    /* ry = M * (S - rx) - t1 */
+    MP_CHECKOK(group->meth->field_sub(S, rx, S, group->meth));
+    MP_CHECKOK(group->meth->field_mul(S, M, ry, group->meth));
+    MP_CHECKOK(group->meth->field_sub(ry, t1, ry, group->meth));
+
+    /* ra*z^4 = 2*t1*(apz4) */
+    MP_CHECKOK(group->meth->field_mul(paz4, t1, raz4, group->meth));
+    MP_CHECKOK(group->meth->field_add(raz4, raz4, raz4, group->meth));
+
+CLEANUP:
+    return res;
+}
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, 1).  Elliptic curve points P, Q, and R can all be identical.
+ * Uses mixed Modified_Jacobian-affine coordinates. Assumes input is
+ * already field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+static mp_err
+ec_GFp_pt_add_jm_aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+                     const mp_int *paz4, const mp_int *qx,
+                     const mp_int *qy, mp_int *rx, mp_int *ry, mp_int *rz,
+                     mp_int *raz4, mp_int scratch[], const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int *A, *B, *C, *D, *C2, *C3;
+
+    A = &scratch[0];
+    B = &scratch[1];
+    C = &scratch[2];
+    D = &scratch[3];
+    C2 = &scratch[4];
+    C3 = &scratch[5];
+
+#if MAX_SCRATCH < 6
+#error "Scratch array defined too small "
+#endif
+
+    /* If either P or Q is the point at infinity, then return the other
+     * point */
+    if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+        MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group));
+        MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth));
+        MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth));
+        MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth));
+        goto CLEANUP;
+    }
+    if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) {
+        MP_CHECKOK(mp_copy(px, rx));
+        MP_CHECKOK(mp_copy(py, ry));
+        MP_CHECKOK(mp_copy(pz, rz));
+        MP_CHECKOK(mp_copy(paz4, raz4));
+        goto CLEANUP;
+    }
+
+    /* A = qx * pz^2, B = qy * pz^3 */
+    MP_CHECKOK(group->meth->field_sqr(pz, A, group->meth));
+    MP_CHECKOK(group->meth->field_mul(A, pz, B, group->meth));
+    MP_CHECKOK(group->meth->field_mul(A, qx, A, group->meth));
+    MP_CHECKOK(group->meth->field_mul(B, qy, B, group->meth));
+
+    /* C = A - px, D = B - py */
+    MP_CHECKOK(group->meth->field_sub(A, px, C, group->meth));
+    MP_CHECKOK(group->meth->field_sub(B, py, D, group->meth));
+
+    /* C2 = C^2, C3 = C^3 */
+    MP_CHECKOK(group->meth->field_sqr(C, C2, group->meth));
+    MP_CHECKOK(group->meth->field_mul(C, C2, C3, group->meth));
+
+    /* rz = pz * C */
+    MP_CHECKOK(group->meth->field_mul(pz, C, rz, group->meth));
+
+    /* C = px * C^2 */
+    MP_CHECKOK(group->meth->field_mul(px, C2, C, group->meth));
+    /* A = D^2 */
+    MP_CHECKOK(group->meth->field_sqr(D, A, group->meth));
+
+    /* rx = D^2 - (C^3 + 2 * (px * C^2)) */
+    MP_CHECKOK(group->meth->field_add(C, C, rx, group->meth));
+    MP_CHECKOK(group->meth->field_add(C3, rx, rx, group->meth));
+    MP_CHECKOK(group->meth->field_sub(A, rx, rx, group->meth));
+
+    /* C3 = py * C^3 */
+    MP_CHECKOK(group->meth->field_mul(py, C3, C3, group->meth));
+
+    /* ry = D * (px * C^2 - rx) - py * C^3 */
+    MP_CHECKOK(group->meth->field_sub(C, rx, ry, group->meth));
+    MP_CHECKOK(group->meth->field_mul(D, ry, ry, group->meth));
+    MP_CHECKOK(group->meth->field_sub(ry, C3, ry, group->meth));
+
+    /* raz4 = a * rz^4 */
+    MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth));
+    MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth));
+    MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth));
+CLEANUP:
+    return res;
+}
+
+/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic
+ * curve points P and R can be identical. Uses mixed Modified-Jacobian
+ * co-ordinates for doubling and Chudnovsky Jacobian coordinates for
+ * additions. Assumes input is already field-encoded using field_enc, and
+ * returns output that is still field-encoded. Uses 5-bit window NAF
+ * method (algorithm 11) for scalar-point multiplication from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic
+ * Curves Over Prime Fields. */
+mp_err
+ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py,
+                      mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+    mp_err res = MP_OKAY;
+    mp_int precomp[16][2], rz, tpx, tpy;
+    mp_int raz4;
+    mp_int scratch[MAX_SCRATCH];
+    signed char *naf = NULL;
+    int i, orderBitSize;
+
+    MP_DIGITS(&rz) = 0;
+    MP_DIGITS(&raz4) = 0;
+    MP_DIGITS(&tpx) = 0;
+    MP_DIGITS(&tpy) = 0;
+    for (i = 0; i < 16; i++) {
+        MP_DIGITS(&precomp[i][0]) = 0;
+        MP_DIGITS(&precomp[i][1]) = 0;
+    }
+    for (i = 0; i < MAX_SCRATCH; i++) {
+        MP_DIGITS(&scratch[i]) = 0;
+    }
+
+    ARGCHK(group != NULL, MP_BADARG);
+    ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG);
+
+    /* initialize precomputation table */
+    MP_CHECKOK(mp_init(&tpx));
+    MP_CHECKOK(mp_init(&tpy));
+    ;
+    MP_CHECKOK(mp_init(&rz));
+    MP_CHECKOK(mp_init(&raz4));
+
+    for (i = 0; i < 16; i++) {
+        MP_CHECKOK(mp_init(&precomp[i][0]));
+        MP_CHECKOK(mp_init(&precomp[i][1]));
+    }
+    for (i = 0; i < MAX_SCRATCH; i++) {
+        MP_CHECKOK(mp_init(&scratch[i]));
+    }
+
+    /* Set out[8] = P */
+    MP_CHECKOK(mp_copy(px, &precomp[8][0]));
+    MP_CHECKOK(mp_copy(py, &precomp[8][1]));
+
+    /* Set (tpx, tpy) = 2P */
+    MP_CHECKOK(group->point_dbl(&precomp[8][0], &precomp[8][1], &tpx, &tpy,
+                                group));
+
+    /* Set 3P, 5P, ..., 15P */
+    for (i = 8; i < 15; i++) {
+        MP_CHECKOK(group->point_add(&precomp[i][0], &precomp[i][1], &tpx, &tpy,
+                                    &precomp[i + 1][0], &precomp[i + 1][1],
+                                    group));
+    }
+
+    /* Set -15P, -13P, ..., -P */
+    for (i = 0; i < 8; i++) {
+        MP_CHECKOK(mp_copy(&precomp[15 - i][0], &precomp[i][0]));
+        MP_CHECKOK(group->meth->field_neg(&precomp[15 - i][1], &precomp[i][1],
+                                          group->meth));
+    }
+
+    /* R = inf */
+    MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+    orderBitSize = mpl_significant_bits(&group->order);
+
+    /* Allocate memory for NAF */
+    naf = (signed char *)malloc(sizeof(signed char) * (orderBitSize + 1));
+    if (naf == NULL) {
+        res = MP_MEM;
+        goto CLEANUP;
+    }
+
+    /* Compute 5NAF */
+    ec_compute_wNAF(naf, orderBitSize, n, 5);
+
+    /* wNAF method */
+    for (i = orderBitSize; i >= 0; i--) {
+        /* R = 2R */
+        ec_GFp_pt_dbl_jm(rx, ry, &rz, &raz4, rx, ry, &rz,
+                         &raz4, scratch, group);
+        if (naf[i] != 0) {
+            ec_GFp_pt_add_jm_aff(rx, ry, &rz, &raz4,
+                                 &precomp[(naf[i] + 15) / 2][0],
+                                 &precomp[(naf[i] + 15) / 2][1], rx, ry,
+                                 &rz, &raz4, scratch, group);
+        }
+    }
+
+    /* convert result S to affine coordinates */
+    MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+CLEANUP:
+    for (i = 0; i < MAX_SCRATCH; i++) {
+        mp_clear(&scratch[i]);
+    }
+    for (i = 0; i < 16; i++) {
+        mp_clear(&precomp[i][0]);
+        mp_clear(&precomp[i][1]);
+    }
+    mp_clear(&tpx);
+    mp_clear(&tpy);
+    mp_clear(&rz);
+    mp_clear(&raz4);
+    free(naf);
+    return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_mont.c b/security/nss/lib/freebl/ecl/ecp_mont.c
new file mode 100644
index 000000000..779685b4d
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_mont.c
@@ -0,0 +1,154 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Uses Montgomery reduction for field arithmetic.  See mpi/mpmontg.c for
+ * code implementation. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+#include "ecl-priv.h"
+#include "ecp.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Construct a generic GFMethod for arithmetic over prime fields with
+ * irreducible irr. */
+GFMethod *
+GFMethod_consGFp_mont(const mp_int *irr)
+{
+    mp_err res = MP_OKAY;
+    GFMethod *meth = NULL;
+    mp_mont_modulus *mmm;
+
+    meth = GFMethod_consGFp(irr);
+    if (meth == NULL)
+        return NULL;
+
+    mmm = (mp_mont_modulus *)malloc(sizeof(mp_mont_modulus));
+    if (mmm == NULL) {
+        res = MP_MEM;
+        goto CLEANUP;
+    }
+
+    meth->field_mul = &ec_GFp_mul_mont;
+    meth->field_sqr = &ec_GFp_sqr_mont;
+    meth->field_div = &ec_GFp_div_mont;
+    meth->field_enc = &ec_GFp_enc_mont;
+    meth->field_dec = &ec_GFp_dec_mont;
+    meth->extra1 = mmm;
+    meth->extra2 = NULL;
+    meth->extra_free = &ec_GFp_extra_free_mont;
+
+    mmm->N = meth->irr;
+    mmm->n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(&meth->irr, 0));
+
+CLEANUP:
+    if (res != MP_OKAY) {
+        GFMethod_free(meth);
+        return NULL;
+    }
+    return meth;
+}
+
+/* Wrapper functions for generic prime field arithmetic. */
+
+/* Field multiplication using Montgomery reduction. */
+mp_err
+ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r,
+                const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+#ifdef MP_MONT_USE_MP_MUL
+    /* if MP_MONT_USE_MP_MUL is defined, then the function s_mp_mul_mont
+     * is not implemented and we have to use mp_mul and s_mp_redc directly
+     */
+    MP_CHECKOK(mp_mul(a, b, r));
+    MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1));
+#else
+    mp_int s;
+
+    MP_DIGITS(&s) = 0;
+    /* s_mp_mul_mont doesn't allow source and destination to be the same */
+    if ((a == r) || (b == r)) {
+        MP_CHECKOK(mp_init(&s));
+        MP_CHECKOK(s_mp_mul_mont(a, b, &s, (mp_mont_modulus *)meth->extra1));
+        MP_CHECKOK(mp_copy(&s, r));
+        mp_clear(&s);
+    } else {
+        return s_mp_mul_mont(a, b, r, (mp_mont_modulus *)meth->extra1);
+    }
+#endif
+CLEANUP:
+    return res;
+}
+
+/* Field squaring using Montgomery reduction. */
+mp_err
+ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    return ec_GFp_mul_mont(a, a, r, meth);
+}
+
+/* Field division using Montgomery reduction. */
+mp_err
+ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r,
+                const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    /* if A=aZ represents a encoded in montgomery coordinates with Z and #
+     * and \ respectively represent multiplication and division in
+     * montgomery coordinates, then A\B = (a/b)Z = (A/B)Z and Binv =
+     * (1/b)Z = (1/B)(Z^2) where B # Binv = Z */
+    MP_CHECKOK(ec_GFp_div(a, b, r, meth));
+    MP_CHECKOK(ec_GFp_enc_mont(r, r, meth));
+    if (a == NULL) {
+        MP_CHECKOK(ec_GFp_enc_mont(r, r, meth));
+    }
+CLEANUP:
+    return res;
+}
+
+/* Encode a field element in Montgomery form. See s_mp_to_mont in
+ * mpi/mpmontg.c */
+mp_err
+ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    mp_mont_modulus *mmm;
+    mp_err res = MP_OKAY;
+
+    mmm = (mp_mont_modulus *)meth->extra1;
+    MP_CHECKOK(mp_copy(a, r));
+    MP_CHECKOK(s_mp_lshd(r, MP_USED(&mmm->N)));
+    MP_CHECKOK(mp_mod(r, &mmm->N, r));
+CLEANUP:
+    return res;
+}
+
+/* Decode a field element from Montgomery form. */
+mp_err
+ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+    mp_err res = MP_OKAY;
+
+    if (a != r) {
+        MP_CHECKOK(mp_copy(a, r));
+    }
+    MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1));
+CLEANUP:
+    return res;
+}
+
+/* Free the memory allocated to the extra fields of Montgomery GFMethod
+ * object. */
+void
+ec_GFp_extra_free_mont(GFMethod *meth)
+{
+    if (meth->extra1 != NULL) {
+        free(meth->extra1);
+        meth->extra1 = NULL;
+    }
+}
diff --git a/security/nss/lib/freebl/ecl/tests/ec_naft.c b/security/nss/lib/freebl/ecl/tests/ec_naft.c
new file mode 100644
index 000000000..61ef15c36
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/tests/ec_naft.c
@@ -0,0 +1,121 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "ecl.h"
+#include "ecp.h"
+#include "ecl-priv.h"
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/* Returns 2^e as an integer. This is meant to be used for small powers of
+ * two. */
+int ec_twoTo(int e);
+
+/* Number of bits of scalar to test */
+#define BITSIZE 160
+
+/* Time k repetitions of operation op. */
+#define M_TimeOperation(op, k)                                                        \
+    {                                                                                 \
+        double dStart, dNow, dUserTime;                                               \
+        struct rusage ru;                                                             \
+        int i;                                                                        \
+        getrusage(RUSAGE_SELF, &ru);                                                  \
+        dStart = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \
+        for (i = 0; i < k; i++) {                                                     \
+            {                                                                         \
+                op;                                                                   \
+            }                                                                         \
+        };                                                                            \
+        getrusage(RUSAGE_SELF, &ru);                                                  \
+        dNow = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001;   \
+        dUserTime = dNow - dStart;                                                    \
+        if (dUserTime)                                                                \
+            printf("    %-45s\n      k: %6i, t: %6.2f sec\n", #op, k, dUserTime);     \
+    }
+
+/* Tests wNAF computation. Non-adjacent-form is discussed in the paper: D.
+ * Hankerson, J. Hernandez and A. Menezes, "Software implementation of
+ * elliptic curve cryptography over binary fields", Proc. CHES 2000. */
+
+mp_err
+main(void)
+{
+    signed char naf[BITSIZE + 1];
+    ECGroup *group = NULL;
+    mp_int k;
+    mp_int *scalar;
+    int i, count;
+    int res;
+    int w = 5;
+    char s[1000];
+
+    /* Get a 160 bit scalar to compute wNAF from */
+    group = ECGroup_fromName(ECCurve_SECG_PRIME_160R1);
+    scalar = &group->genx;
+
+    /* Compute wNAF representation of scalar */
+    ec_compute_wNAF(naf, BITSIZE, scalar, w);
+
+    /* Verify correctness of representation */
+    mp_init(&k); /* init k to 0 */
+
+    for (i = BITSIZE; i >= 0; i--) {
+        mp_add(&k, &k, &k);
+        /* digits in mp_???_d are unsigned */
+        if (naf[i] >= 0) {
+            mp_add_d(&k, naf[i], &k);
+        } else {
+            mp_sub_d(&k, -naf[i], &k);
+        }
+    }
+
+    if (mp_cmp(&k, scalar) != 0) {
+        printf("Error:  incorrect NAF value.\n");
+        MP_CHECKOK(mp_toradix(&k, s, 16));
+        printf("NAF value   %s\n", s);
+        MP_CHECKOK(mp_toradix(scalar, s, 16));
+        printf("original value   %s\n", s);
+        goto CLEANUP;
+    }
+
+    /* Verify digits of representation are valid */
+    for (i = 0; i <= BITSIZE; i++) {
+        if (naf[i] % 2 == 0 && naf[i] != 0) {
+            printf("Error:  Even non-zero digit found.\n");
+            goto CLEANUP;
+        }
+        if (naf[i] < -(ec_twoTo(w - 1)) || naf[i] >= ec_twoTo(w - 1)) {
+            printf("Error:  Magnitude of naf digit too large.\n");
+            goto CLEANUP;
+        }
+    }
+
+    /* Verify sparsity of representation */
+    count = w - 1;
+    for (i = 0; i <= BITSIZE; i++) {
+        if (naf[i] != 0) {
+            if (count < w - 1) {
+                printf("Error:  Sparsity failed.\n");
+                goto CLEANUP;
+            }
+            count = 0;
+        } else
+            count++;
+    }
+
+    /* Check timing */
+    M_TimeOperation(ec_compute_wNAF(naf, BITSIZE, scalar, w), 10000);
+
+    printf("Test passed.\n");
+CLEANUP:
+    ECGroup_free(group);
+    return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/tests/ecp_test.c b/security/nss/lib/freebl/ecl/tests/ecp_test.c
new file mode 100644
index 000000000..dcec4d747
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/tests/ecp_test.c
@@ -0,0 +1,409 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpprime.h"
+#include "ecl.h"
+#include "ecl-curve.h"
+#include "ecp.h"
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+
+#include <time.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/* Time k repetitions of operation op. */
+#define M_TimeOperation(op, k)                                                        \
+    {                                                                                 \
+        double dStart, dNow, dUserTime;                                               \
+        struct rusage ru;                                                             \
+        int i;                                                                        \
+        getrusage(RUSAGE_SELF, &ru);                                                  \
+        dStart = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \
+        for (i = 0; i < k; i++) {                                                     \
+            {                                                                         \
+                op;                                                                   \
+            }                                                                         \
+        };                                                                            \
+        getrusage(RUSAGE_SELF, &ru);                                                  \
+        dNow = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001;   \
+        dUserTime = dNow - dStart;                                                    \
+        if (dUserTime)                                                                \
+            printf("    %-45s k: %6i, t: %6.2f sec\n", #op, k, dUserTime);            \
+    }
+
+/* Test curve using generic field arithmetic. */
+#define ECTEST_GENERIC_GFP(name_c, name)                             \
+    printf("Testing %s using generic implementation...\n", name_c);  \
+    params = EC_GetNamedCurveParams(name);                           \
+    if (params == NULL) {                                            \
+        printf("  Error: could not construct params.\n");            \
+        res = MP_NO;                                                 \
+        goto CLEANUP;                                                \
+    }                                                                \
+    ECGroup_free(group);                                             \
+    group = ECGroup_fromHex(params);                                 \
+    if (group == NULL) {                                             \
+        printf("  Error: could not construct group.\n");             \
+        res = MP_NO;                                                 \
+        goto CLEANUP;                                                \
+    }                                                                \
+    MP_CHECKOK(ectest_curve_GFp(group, ectestPrint, ectestTime, 1)); \
+    printf("... okay.\n");
+
+/* Test curve using specific field arithmetic. */
+#define ECTEST_NAMED_GFP(name_c, name)                                   \
+    printf("Testing %s using specific implementation...\n", name_c);     \
+    ECGroup_free(group);                                                 \
+    group = ECGroup_fromName(name);                                      \
+    if (group == NULL) {                                                 \
+        printf("  Warning: could not construct group.\n");               \
+        printf("... failed; continuing with remaining tests.\n");        \
+    } else {                                                             \
+        MP_CHECKOK(ectest_curve_GFp(group, ectestPrint, ectestTime, 0)); \
+        printf("... okay.\n");                                           \
+    }
+
+/* Performs basic tests of elliptic curve cryptography over prime fields.
+ * If tests fail, then it prints an error message, aborts, and returns an
+ * error code. Otherwise, returns 0. */
+int
+ectest_curve_GFp(ECGroup *group, int ectestPrint, int ectestTime,
+                 int generic)
+{
+
+    mp_int one, order_1, gx, gy, rx, ry, n;
+    int size;
+    mp_err res;
+    char s[1000];
+
+    /* initialize values */
+    MP_CHECKOK(mp_init(&one));
+    MP_CHECKOK(mp_init(&order_1));
+    MP_CHECKOK(mp_init(&gx));
+    MP_CHECKOK(mp_init(&gy));
+    MP_CHECKOK(mp_init(&rx));
+    MP_CHECKOK(mp_init(&ry));
+    MP_CHECKOK(mp_init(&n));
+
+    MP_CHECKOK(mp_set_int(&one, 1));
+    MP_CHECKOK(mp_sub(&group->order, &one, &order_1));
+
+    /* encode base point */
+    if (group->meth->field_dec) {
+        MP_CHECKOK(group->meth->field_dec(&group->genx, &gx, group->meth));
+        MP_CHECKOK(group->meth->field_dec(&group->geny, &gy, group->meth));
+    } else {
+        MP_CHECKOK(mp_copy(&group->genx, &gx));
+        MP_CHECKOK(mp_copy(&group->geny, &gy));
+    }
+    if (ectestPrint) {
+        /* output base point */
+        printf("  base point P:\n");
+        MP_CHECKOK(mp_toradix(&gx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&gy, s, 16));
+        printf("    %s\n", s);
+        if (group->meth->field_enc) {
+            printf("  base point P (encoded):\n");
+            MP_CHECKOK(mp_toradix(&group->genx, s, 16));
+            printf("    %s\n", s);
+            MP_CHECKOK(mp_toradix(&group->geny, s, 16));
+            printf("    %s\n", s);
+        }
+    }
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+    /* multiply base point by order - 1 and check for negative of base
+     * point */
+    MP_CHECKOK(ec_GFp_pt_mul_aff(&order_1, &group->genx, &group->geny, &rx, &ry, group));
+    if (ectestPrint) {
+        printf("  (order-1)*P (affine):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    MP_CHECKOK(group->meth->field_neg(&ry, &ry, group->meth));
+    if ((mp_cmp(&rx, &group->genx) != 0) || (mp_cmp(&ry, &group->geny) != 0)) {
+        printf("  Error: invalid result (expected (- base point)).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+#endif
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+    /* multiply base point by order - 1 and check for negative of base
+     * point */
+    MP_CHECKOK(ec_GFp_pt_mul_jac(&order_1, &group->genx, &group->geny, &rx, &ry, group));
+    if (ectestPrint) {
+        printf("  (order-1)*P (jacobian):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    MP_CHECKOK(group->meth->field_neg(&ry, &ry, group->meth));
+    if ((mp_cmp(&rx, &group->genx) != 0) || (mp_cmp(&ry, &group->geny) != 0)) {
+        printf("  Error: invalid result (expected (- base point)).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+#endif
+
+    /* multiply base point by order - 1 and check for negative of base
+     * point */
+    MP_CHECKOK(ECPoint_mul(group, &order_1, NULL, NULL, &rx, &ry));
+    if (ectestPrint) {
+        printf("  (order-1)*P (ECPoint_mul):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry));
+    if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) {
+        printf("  Error: invalid result (expected (- base point)).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+
+    /* multiply base point by order - 1 and check for negative of base
+     * point */
+    MP_CHECKOK(ECPoint_mul(group, &order_1, &gx, &gy, &rx, &ry));
+    if (ectestPrint) {
+        printf("  (order-1)*P (ECPoint_mul):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry));
+    if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) {
+        printf("  Error: invalid result (expected (- base point)).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+    /* multiply base point by order and check for point at infinity */
+    MP_CHECKOK(ec_GFp_pt_mul_aff(&group->order, &group->genx, &group->geny, &rx, &ry,
+                                 group));
+    if (ectestPrint) {
+        printf("  (order)*P (affine):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) {
+        printf("  Error: invalid result (expected point at infinity).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+#endif
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_JAC
+    /* multiply base point by order and check for point at infinity */
+    MP_CHECKOK(ec_GFp_pt_mul_jac(&group->order, &group->genx, &group->geny, &rx, &ry,
+                                 group));
+    if (ectestPrint) {
+        printf("  (order)*P (jacobian):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) {
+        printf("  Error: invalid result (expected point at infinity).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+#endif
+
+    /* multiply base point by order and check for point at infinity */
+    MP_CHECKOK(ECPoint_mul(group, &group->order, NULL, NULL, &rx, &ry));
+    if (ectestPrint) {
+        printf("  (order)*P (ECPoint_mul):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) {
+        printf("  Error: invalid result (expected point at infinity).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+
+    /* multiply base point by order and check for point at infinity */
+    MP_CHECKOK(ECPoint_mul(group, &group->order, &gx, &gy, &rx, &ry));
+    if (ectestPrint) {
+        printf("  (order)*P (ECPoint_mul):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) {
+        printf("  Error: invalid result (expected point at infinity).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+
+    /* check that (order-1)P + (order-1)P + P == (order-1)P */
+    MP_CHECKOK(ECPoints_mul(group, &order_1, &order_1, &gx, &gy, &rx, &ry));
+    MP_CHECKOK(ECPoints_mul(group, &one, &one, &rx, &ry, &rx, &ry));
+    if (ectestPrint) {
+        printf("  (order-1)*P + (order-1)*P + P == (order-1)*P (ECPoints_mul):\n");
+        MP_CHECKOK(mp_toradix(&rx, s, 16));
+        printf("    %s\n", s);
+        MP_CHECKOK(mp_toradix(&ry, s, 16));
+        printf("    %s\n", s);
+    }
+    MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry));
+    if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) {
+        printf("  Error: invalid result (expected (- base point)).\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+
+    /* test validate_point function */
+    if (ECPoint_validate(group, &gx, &gy) != MP_YES) {
+        printf("  Error: validate point on base point failed.\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+    MP_CHECKOK(mp_add_d(&gy, 1, &ry));
+    if (ECPoint_validate(group, &gx, &ry) != MP_NO) {
+        printf("  Error: validate point on invalid point passed.\n");
+        res = MP_NO;
+        goto CLEANUP;
+    }
+
+    if (ectestTime) {
+        /* compute random scalar */
+        size = mpl_significant_bits(&group->meth->irr);
+        if (size < MP_OKAY) {
+            goto CLEANUP;
+        }
+        MP_CHECKOK(mpp_random_size(&n, (size + ECL_BITS - 1) / ECL_BITS));
+        MP_CHECKOK(group->meth->field_mod(&n, &n, group->meth));
+        /* timed test */
+        if (generic) {
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+            M_TimeOperation(MP_CHECKOK(ec_GFp_pt_mul_aff(&n, &group->genx, &group->geny, &rx, &ry,
+                                                         group)),
+                            100);
+#endif
+            M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, NULL, NULL, &rx, &ry)),
+                            100);
+            M_TimeOperation(MP_CHECKOK(ECPoints_mul(group, &n, &n, &gx, &gy, &rx, &ry)), 100);
+        } else {
+            M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, NULL, NULL, &rx, &ry)),
+                            100);
+            M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, &gx, &gy, &rx, &ry)),
+                            100);
+            M_TimeOperation(MP_CHECKOK(ECPoints_mul(group, &n, &n, &gx, &gy, &rx, &ry)), 100);
+        }
+    }
+
+CLEANUP:
+    mp_clear(&one);
+    mp_clear(&order_1);
+    mp_clear(&gx);
+    mp_clear(&gy);
+    mp_clear(&rx);
+    mp_clear(&ry);
+    mp_clear(&n);
+    if (res != MP_OKAY) {
+        printf("  Error: exiting with error value %i\n", res);
+    }
+    return res;
+}
+
+/* Prints help information. */
+void
+printUsage()
+{
+    printf("Usage: ecp_test [--print] [--time]\n");
+    printf("    --print     Print out results of each point arithmetic test.\n");
+    printf("    --time      Benchmark point operations and print results.\n");
+}
+
+/* Performs tests of elliptic curve cryptography over prime fields If
+ * tests fail, then it prints an error message, aborts, and returns an
+ * error code. Otherwise, returns 0. */
+int
+main(int argv, char **argc)
+{
+
+    int ectestTime = 0;
+    int ectestPrint = 0;
+    int i;
+    ECGroup *group = NULL;
+    ECCurveParams *params = NULL;
+    mp_err res;
+
+    /* read command-line arguments */
+    for (i = 1; i < argv; i++) {
+        if ((strcasecmp(argc[i], "time") == 0) || (strcasecmp(argc[i], "-time") == 0) || (strcasecmp(argc[i], "--time") == 0)) {
+            ectestTime = 1;
+        } else if ((strcasecmp(argc[i], "print") == 0) || (strcasecmp(argc[i], "-print") == 0) || (strcasecmp(argc[i], "--print") == 0)) {
+            ectestPrint = 1;
+        } else {
+            printUsage();
+            return 0;
+        }
+    }
+
+    /* generic arithmetic tests */
+    ECTEST_GENERIC_GFP("SECP-160R1", ECCurve_SECG_PRIME_160R1);
+
+    /* specific arithmetic tests */
+    ECTEST_NAMED_GFP("NIST-P192", ECCurve_NIST_P192);
+    ECTEST_NAMED_GFP("NIST-P224", ECCurve_NIST_P224);
+    ECTEST_NAMED_GFP("NIST-P256", ECCurve_NIST_P256);
+    ECTEST_NAMED_GFP("NIST-P384", ECCurve_NIST_P384);
+    ECTEST_NAMED_GFP("NIST-P521", ECCurve_NIST_P521);
+    ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v1", ECCurve_X9_62_PRIME_192V1);
+    ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v2", ECCurve_X9_62_PRIME_192V2);
+    ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v3", ECCurve_X9_62_PRIME_192V3);
+    ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v1", ECCurve_X9_62_PRIME_239V1);
+    ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v2", ECCurve_X9_62_PRIME_239V2);
+    ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v3", ECCurve_X9_62_PRIME_239V3);
+    ECTEST_NAMED_GFP("ANSI X9.62 PRIME256v1", ECCurve_X9_62_PRIME_256V1);
+    ECTEST_NAMED_GFP("SECP-112R1", ECCurve_SECG_PRIME_112R1);
+    ECTEST_NAMED_GFP("SECP-112R2", ECCurve_SECG_PRIME_112R2);
+    ECTEST_NAMED_GFP("SECP-128R1", ECCurve_SECG_PRIME_128R1);
+    ECTEST_NAMED_GFP("SECP-128R2", ECCurve_SECG_PRIME_128R2);
+    ECTEST_NAMED_GFP("SECP-160K1", ECCurve_SECG_PRIME_160K1);
+    ECTEST_NAMED_GFP("SECP-160R1", ECCurve_SECG_PRIME_160R1);
+    ECTEST_NAMED_GFP("SECP-160R2", ECCurve_SECG_PRIME_160R2);
+    ECTEST_NAMED_GFP("SECP-192K1", ECCurve_SECG_PRIME_192K1);
+    ECTEST_NAMED_GFP("SECP-192R1", ECCurve_SECG_PRIME_192R1);
+    ECTEST_NAMED_GFP("SECP-224K1", ECCurve_SECG_PRIME_224K1);
+    ECTEST_NAMED_GFP("SECP-224R1", ECCurve_SECG_PRIME_224R1);
+    ECTEST_NAMED_GFP("SECP-256K1", ECCurve_SECG_PRIME_256K1);
+    ECTEST_NAMED_GFP("SECP-256R1", ECCurve_SECG_PRIME_256R1);
+    ECTEST_NAMED_GFP("SECP-384R1", ECCurve_SECG_PRIME_384R1);
+    ECTEST_NAMED_GFP("SECP-521R1", ECCurve_SECG_PRIME_521R1);
+    ECTEST_NAMED_GFP("WTLS-6 (112)", ECCurve_WTLS_6);
+    ECTEST_NAMED_GFP("WTLS-7 (160)", ECCurve_WTLS_7);
+    ECTEST_NAMED_GFP("WTLS-8 (112)", ECCurve_WTLS_8);
+    ECTEST_NAMED_GFP("WTLS-9 (160)", ECCurve_WTLS_9);
+    ECTEST_NAMED_GFP("WTLS-12 (224)", ECCurve_WTLS_12);
+    ECTEST_NAMED_GFP("Curve25519", ECCurve25519);
+
+CLEANUP:
+    EC_FreeCurveParams(params);
+    ECGroup_free(group);
+    if (res != MP_OKAY) {
+        printf("Error: exiting with error value %i\n", res);
+    }
+    return res;
+}
diff --git a/security/nss/lib/freebl/ecl/uint128.c b/security/nss/lib/freebl/ecl/uint128.c
new file mode 100644
index 000000000..22cbd023c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/uint128.c
@@ -0,0 +1,87 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "uint128.h"
+
+/* helper functions */
+uint64_t
+mask51(uint128_t x)
+{
+    return x.lo & MASK51;
+}
+
+uint64_t
+mask_lower(uint128_t x)
+{
+    return x.lo;
+}
+
+uint128_t
+mask51full(uint128_t x)
+{
+    uint128_t ret = { x.lo & MASK51, 0 };
+    return ret;
+}
+
+uint128_t
+init128x(uint64_t x)
+{
+    uint128_t ret = { x, 0 };
+    return ret;
+}
+
+/* arithmetic */
+
+uint128_t
+add128(uint128_t a, uint128_t b)
+{
+    uint128_t ret;
+    ret.lo = a.lo + b.lo;
+    ret.hi = a.hi + b.hi + (ret.lo < b.lo);
+    return ret;
+}
+
+/* out = 19 * a */
+uint128_t
+mul12819(uint128_t a)
+{
+    uint128_t ret = lshift128(a, 4);
+    ret = add128(ret, a);
+    ret = add128(ret, a);
+    ret = add128(ret, a);
+    return ret;
+}
+
+uint128_t
+mul6464(uint64_t a, uint64_t b)
+{
+    uint128_t ret;
+    uint64_t t0 = ((uint64_t)(uint32_t)a) * ((uint64_t)(uint32_t)b);
+    uint64_t t1 = (a >> 32) * ((uint64_t)(uint32_t)b) + (t0 >> 32);
+    uint64_t t2 = (b >> 32) * ((uint64_t)(uint32_t)a) + ((uint32_t)t1);
+    ret.lo = (((uint64_t)((uint32_t)t2)) << 32) + ((uint32_t)t0);
+    ret.hi = (a >> 32) * (b >> 32);
+    ret.hi += (t2 >> 32) + (t1 >> 32);
+    return ret;
+}
+
+/* only defined for n < 64 */
+uint128_t
+rshift128(uint128_t x, uint8_t n)
+{
+    uint128_t ret;
+    ret.lo = (x.lo >> n) + (x.hi << (64 - n));
+    ret.hi = x.hi >> n;
+    return ret;
+}
+
+/* only defined for n < 64 */
+uint128_t
+lshift128(uint128_t x, uint8_t n)
+{
+    uint128_t ret;
+    ret.hi = (x.hi << n) + (x.lo >> (64 - n));
+    ret.lo = x.lo << n;
+    return ret;
+}
diff --git a/security/nss/lib/freebl/ecl/uint128.h b/security/nss/lib/freebl/ecl/uint128.h
new file mode 100644
index 000000000..a3a71e6e7
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/uint128.h
@@ -0,0 +1,35 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdint.h>
+
+#define MASK51 0x7ffffffffffffULL
+
+#ifdef HAVE_INT128_SUPPORT
+typedef unsigned __int128 uint128_t;
+#define add128(a, b) (a) + (b)
+#define mul6464(a, b) (uint128_t)(a) * (uint128_t)(b)
+#define mul12819(a) (uint128_t)(a) * 19
+#define rshift128(x, n) (x) >> (n)
+#define lshift128(x, n) (x) << (n)
+#define mask51(x) (x) & 0x7ffffffffffff
+#define mask_lower(x) (uint64_t)(x)
+#define mask51full(x) (x) & 0x7ffffffffffff
+#define init128x(x) (x)
+#else /* uint128_t for Windows and 32 bit intel systems */
+struct uint128_t_str {
+    uint64_t lo;
+    uint64_t hi;
+};
+typedef struct uint128_t_str uint128_t;
+uint128_t add128(uint128_t a, uint128_t b);
+uint128_t mul6464(uint64_t a, uint64_t b);
+uint128_t mul12819(uint128_t a);
+uint128_t rshift128(uint128_t x, uint8_t n);
+uint128_t lshift128(uint128_t x, uint8_t n);
+uint64_t mask51(uint128_t x);
+uint64_t mask_lower(uint128_t x);
+uint128_t mask51full(uint128_t x);
+uint128_t init128x(uint64_t x);
+#endif
diff --git a/security/nss/lib/freebl/exports.gyp b/security/nss/lib/freebl/exports.gyp
new file mode 100644
index 000000000..ef81685b0
--- /dev/null
+++ b/security/nss/lib/freebl/exports.gyp
@@ -0,0 +1,48 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+{
+  'includes': [
+    '../../coreconf/config.gypi'
+  ],
+  'targets': [
+    {
+      'target_name': 'lib_freebl_exports',
+      'type': 'none',
+      'copies': [
+        {
+          'files': [
+            'blapit.h',
+            'ecl/ecl-exp.h',
+            'shsign.h'
+          ],
+          'conditions': [
+            [ 'OS=="linux"', {
+              'files': [
+                'nsslowhash.h',
+              ],
+            }],
+          ],
+          'destination': '<(nss_public_dist_dir)/<(module)'
+        },
+        {
+          'files': [
+            'alghmac.h',
+            'blapi.h',
+            'chacha20poly1305.h',
+            'ec.h',
+            'ecl/ecl-curve.h',
+            'ecl/ecl.h',
+            'hmacct.h',
+            'secmpi.h',
+            'secrng.h'
+          ],
+          'destination': '<(nss_private_dist_dir)/<(module)'
+        }
+      ]
+    }
+  ],
+  'variables': {
+    'module': 'nss'
+  }
+}
diff --git a/security/nss/lib/freebl/fipsfreebl.c b/security/nss/lib/freebl/fipsfreebl.c
new file mode 100644
index 000000000..b3ae6865b
--- /dev/null
+++ b/security/nss/lib/freebl/fipsfreebl.c
@@ -0,0 +1,1715 @@
+/*
+ * PKCS #11 FIPS Power-Up Self Test.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* $Id: fipstest.c,v 1.31 2012/06/28 17:55:06 rrelyea%redhat.com Exp $ */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "seccomon.h" /* Required for RSA and DSA. */
+#include "secerr.h"
+#include "prtypes.h"
+
+#ifdef NSS_ENABLE_ECC
+#include "ec.h" /* Required for ECDSA */
+#endif
+
+/*
+ * different platforms have different ways of calling and initial entry point
+ * when the dll/.so is loaded. Most platforms support either a posix pragma
+ * or the GCC attribute. Some platforms suppor a pre-defined name, and some
+ * platforms have a link line way of invoking this function.
+ */
+
+/* The pragma */
+#if defined(USE_INIT_PRAGMA)
+#pragma init(bl_startup_tests)
+#endif
+
+/* GCC Attribute */
+#if defined(__GNUC__) && !defined(NSS_NO_INIT_SUPPORT)
+#define INIT_FUNCTION __attribute__((constructor))
+#else
+#define INIT_FUNCTION
+#endif
+
+static void INIT_FUNCTION bl_startup_tests(void);
+
+/* Windows pre-defined entry */
+#if defined(XP_WIN) && !defined(NSS_NO_INIT_SUPPORT)
+#include <windows.h>
+
+BOOL WINAPI DllMain(
+    HINSTANCE hinstDLL, // handle to DLL module
+    DWORD fdwReason,    // reason for calling function
+    LPVOID lpReserved)  // reserved
+{
+    // Perform actions based on the reason for calling.
+    switch (fdwReason) {
+        case DLL_PROCESS_ATTACH:
+            // Initialize once for each new process.
+            // Return FALSE to fail DLL load.
+            bl_startup_tests();
+            break;
+
+        case DLL_THREAD_ATTACH:
+            // Do thread-specific initialization.
+            break;
+
+        case DLL_THREAD_DETACH:
+            // Do thread-specific cleanup.
+            break;
+
+        case DLL_PROCESS_DETACH:
+            // Perform any necessary cleanup.
+            break;
+    }
+    return TRUE; // Successful DLL_PROCESS_ATTACH.
+}
+#endif
+
+/* insert other platform dependent init entry points here, or modify
+ * the linker line */
+
+/* FIPS preprocessor directives for RC2-ECB and RC2-CBC.        */
+#define FIPS_RC2_KEY_LENGTH 5     /*  40-bits */
+#define FIPS_RC2_ENCRYPT_LENGTH 8 /*  64-bits */
+#define FIPS_RC2_DECRYPT_LENGTH 8 /*  64-bits */
+
+/* FIPS preprocessor directives for RC4.                        */
+#define FIPS_RC4_KEY_LENGTH 5     /*  40-bits */
+#define FIPS_RC4_ENCRYPT_LENGTH 8 /*  64-bits */
+#define FIPS_RC4_DECRYPT_LENGTH 8 /*  64-bits */
+
+/* FIPS preprocessor directives for DES-ECB and DES-CBC.        */
+#define FIPS_DES_ENCRYPT_LENGTH 8 /*  64-bits */
+#define FIPS_DES_DECRYPT_LENGTH 8 /*  64-bits */
+
+/* FIPS preprocessor directives for DES3-CBC and DES3-ECB.      */
+#define FIPS_DES3_ENCRYPT_LENGTH 8 /*  64-bits */
+#define FIPS_DES3_DECRYPT_LENGTH 8 /*  64-bits */
+
+/* FIPS preprocessor directives for AES-ECB and AES-CBC.        */
+#define FIPS_AES_BLOCK_SIZE 16     /* 128-bits */
+#define FIPS_AES_ENCRYPT_LENGTH 16 /* 128-bits */
+#define FIPS_AES_DECRYPT_LENGTH 16 /* 128-bits */
+#define FIPS_AES_128_KEY_SIZE 16   /* 128-bits */
+#define FIPS_AES_192_KEY_SIZE 24   /* 192-bits */
+#define FIPS_AES_256_KEY_SIZE 32   /* 256-bits */
+
+/* FIPS preprocessor directives for message digests             */
+#define FIPS_KNOWN_HASH_MESSAGE_LENGTH 64 /* 512-bits */
+
+/* FIPS preprocessor directives for RSA.                         */
+#define FIPS_RSA_TYPE siBuffer
+#define FIPS_RSA_PUBLIC_EXPONENT_LENGTH 3    /*   24-bits */
+#define FIPS_RSA_PRIVATE_VERSION_LENGTH 1    /*    8-bits */
+#define FIPS_RSA_MESSAGE_LENGTH 256          /* 2048-bits */
+#define FIPS_RSA_COEFFICIENT_LENGTH 128      /* 1024-bits */
+#define FIPS_RSA_PRIME0_LENGTH 128           /* 1024-bits */
+#define FIPS_RSA_PRIME1_LENGTH 128           /* 1024-bits */
+#define FIPS_RSA_EXPONENT0_LENGTH 128        /* 1024-bits */
+#define FIPS_RSA_EXPONENT1_LENGTH 128        /* 1024-bits */
+#define FIPS_RSA_PRIVATE_EXPONENT_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_ENCRYPT_LENGTH 256          /* 2048-bits */
+#define FIPS_RSA_DECRYPT_LENGTH 256          /* 2048-bits */
+#define FIPS_RSA_SIGNATURE_LENGTH 256        /* 2048-bits */
+#define FIPS_RSA_MODULUS_LENGTH 256          /* 2048-bits */
+
+/* FIPS preprocessor directives for DSA.                        */
+#define FIPS_DSA_TYPE siBuffer
+#define FIPS_DSA_DIGEST_LENGTH 20    /*  160-bits */
+#define FIPS_DSA_SUBPRIME_LENGTH 20  /*  160-bits */
+#define FIPS_DSA_SIGNATURE_LENGTH 40 /*  320-bits */
+#define FIPS_DSA_PRIME_LENGTH 128    /* 1024-bits */
+#define FIPS_DSA_BASE_LENGTH 128     /* 1024-bits */
+
+/* FIPS preprocessor directives for RNG.                        */
+#define FIPS_RNG_XKEY_LENGTH 32 /* 256-bits */
+
+static SECStatus
+freebl_fips_DES3_PowerUpSelfTest(void)
+{
+    /* DES3 Known Key (56-bits). */
+    static const PRUint8 des3_known_key[] = { "ANSI Triple-DES Key Data" };
+
+    /* DES3-CBC Known Initialization Vector (64-bits). */
+    static const PRUint8 des3_cbc_known_initialization_vector[] = { "Security" };
+
+    /* DES3 Known Plaintext (64-bits). */
+    static const PRUint8 des3_ecb_known_plaintext[] = { "Netscape" };
+    static const PRUint8 des3_cbc_known_plaintext[] = { "Netscape" };
+
+    /* DES3 Known Ciphertext (64-bits). */
+    static const PRUint8 des3_ecb_known_ciphertext[] = {
+        0x55, 0x8e, 0xad, 0x3c, 0xee, 0x49, 0x69, 0xbe
+    };
+    static const PRUint8 des3_cbc_known_ciphertext[] = {
+        0x43, 0xdc, 0x6a, 0xc1, 0xaf, 0xa6, 0x32, 0xf5
+    };
+
+    /* DES3 variables. */
+    PRUint8 des3_computed_ciphertext[FIPS_DES3_ENCRYPT_LENGTH];
+    PRUint8 des3_computed_plaintext[FIPS_DES3_DECRYPT_LENGTH];
+    DESContext *des3_context;
+    unsigned int des3_bytes_encrypted;
+    unsigned int des3_bytes_decrypted;
+    SECStatus des3_status;
+
+    /*******************************************************/
+    /* DES3-ECB Single-Round Known Answer Encryption Test. */
+    /*******************************************************/
+
+    des3_context = DES_CreateContext(des3_known_key, NULL,
+                                     NSS_DES_EDE3, PR_TRUE);
+
+    if (des3_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext,
+                              &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH,
+                              des3_ecb_known_plaintext,
+                              FIPS_DES3_DECRYPT_LENGTH);
+
+    DES_DestroyContext(des3_context, PR_TRUE);
+
+    if ((des3_status != SECSuccess) ||
+        (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) ||
+        (PORT_Memcmp(des3_computed_ciphertext, des3_ecb_known_ciphertext,
+                     FIPS_DES3_ENCRYPT_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /*******************************************************/
+    /* DES3-ECB Single-Round Known Answer Decryption Test. */
+    /*******************************************************/
+
+    des3_context = DES_CreateContext(des3_known_key, NULL,
+                                     NSS_DES_EDE3, PR_FALSE);
+
+    if (des3_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    des3_status = DES_Decrypt(des3_context, des3_computed_plaintext,
+                              &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH,
+                              des3_ecb_known_ciphertext,
+                              FIPS_DES3_ENCRYPT_LENGTH);
+
+    DES_DestroyContext(des3_context, PR_TRUE);
+
+    if ((des3_status != SECSuccess) ||
+        (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) ||
+        (PORT_Memcmp(des3_computed_plaintext, des3_ecb_known_plaintext,
+                     FIPS_DES3_DECRYPT_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /*******************************************************/
+    /* DES3-CBC Single-Round Known Answer Encryption Test. */
+    /*******************************************************/
+
+    des3_context = DES_CreateContext(des3_known_key,
+                                     des3_cbc_known_initialization_vector,
+                                     NSS_DES_EDE3_CBC, PR_TRUE);
+
+    if (des3_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext,
+                              &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH,
+                              des3_cbc_known_plaintext,
+                              FIPS_DES3_DECRYPT_LENGTH);
+
+    DES_DestroyContext(des3_context, PR_TRUE);
+
+    if ((des3_status != SECSuccess) ||
+        (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) ||
+        (PORT_Memcmp(des3_computed_ciphertext, des3_cbc_known_ciphertext,
+                     FIPS_DES3_ENCRYPT_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /*******************************************************/
+    /* DES3-CBC Single-Round Known Answer Decryption Test. */
+    /*******************************************************/
+
+    des3_context = DES_CreateContext(des3_known_key,
+                                     des3_cbc_known_initialization_vector,
+                                     NSS_DES_EDE3_CBC, PR_FALSE);
+
+    if (des3_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    des3_status = DES_Decrypt(des3_context, des3_computed_plaintext,
+                              &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH,
+                              des3_cbc_known_ciphertext,
+                              FIPS_DES3_ENCRYPT_LENGTH);
+
+    DES_DestroyContext(des3_context, PR_TRUE);
+
+    if ((des3_status != SECSuccess) ||
+        (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) ||
+        (PORT_Memcmp(des3_computed_plaintext, des3_cbc_known_plaintext,
+                     FIPS_DES3_DECRYPT_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    return (SECSuccess);
+}
+
+/* AES self-test for 128-bit, 192-bit, or 256-bit key sizes*/
+static SECStatus
+freebl_fips_AES_PowerUpSelfTest(int aes_key_size)
+{
+    /* AES Known Key (up to 256-bits). */
+    static const PRUint8 aes_known_key[] =
+        { "AES-128 RIJNDAELLEADNJIR 821-SEA" };
+
+    /* AES-CBC Known Initialization Vector (128-bits). */
+    static const PRUint8 aes_cbc_known_initialization_vector[] =
+        { "SecurityytiruceS" };
+
+    /* AES Known Plaintext (128-bits). (blocksize is 128-bits) */
+    static const PRUint8 aes_known_plaintext[] = { "NetscapeepacsteN" };
+
+    /* AES Known Ciphertext (128-bit key). */
+    static const PRUint8 aes_ecb128_known_ciphertext[] = {
+        0x3c, 0xa5, 0x96, 0xf3, 0x34, 0x6a, 0x96, 0xc1,
+        0x03, 0x88, 0x16, 0x7b, 0x20, 0xbf, 0x35, 0x47
+    };
+
+    static const PRUint8 aes_cbc128_known_ciphertext[] = {
+        0xcf, 0x15, 0x1d, 0x4f, 0x96, 0xe4, 0x4f, 0x63,
+        0x15, 0x54, 0x14, 0x1d, 0x4e, 0xd8, 0xd5, 0xea
+    };
+
+    /* AES Known Ciphertext (192-bit key). */
+    static const PRUint8 aes_ecb192_known_ciphertext[] = {
+        0xa0, 0x18, 0x62, 0xed, 0x88, 0x19, 0xcb, 0x62,
+        0x88, 0x1d, 0x4d, 0xfe, 0x84, 0x02, 0x89, 0x0e
+    };
+
+    static const PRUint8 aes_cbc192_known_ciphertext[] = {
+        0x83, 0xf7, 0xa4, 0x76, 0xd1, 0x6f, 0x07, 0xbe,
+        0x07, 0xbc, 0x43, 0x2f, 0x6d, 0xad, 0x29, 0xe1
+    };
+
+    /* AES Known Ciphertext (256-bit key). */
+    static const PRUint8 aes_ecb256_known_ciphertext[] = {
+        0xdb, 0xa6, 0x52, 0x01, 0x8a, 0x70, 0xae, 0x66,
+        0x3a, 0x99, 0xd8, 0x95, 0x7f, 0xfb, 0x01, 0x67
+    };
+
+    static const PRUint8 aes_cbc256_known_ciphertext[] = {
+        0x37, 0xea, 0x07, 0x06, 0x31, 0x1c, 0x59, 0x27,
+        0xc5, 0xc5, 0x68, 0x71, 0x6e, 0x34, 0x40, 0x16
+    };
+
+    const PRUint8 *aes_ecb_known_ciphertext =
+        (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_ecb128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_ecb192_known_ciphertext : aes_ecb256_known_ciphertext;
+
+    const PRUint8 *aes_cbc_known_ciphertext =
+        (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_cbc128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_cbc192_known_ciphertext : aes_cbc256_known_ciphertext;
+
+    /* AES variables. */
+    PRUint8 aes_computed_ciphertext[FIPS_AES_ENCRYPT_LENGTH];
+    PRUint8 aes_computed_plaintext[FIPS_AES_DECRYPT_LENGTH];
+    AESContext *aes_context;
+    unsigned int aes_bytes_encrypted;
+    unsigned int aes_bytes_decrypted;
+    SECStatus aes_status;
+
+    /*check if aes_key_size is 128, 192, or 256 bits */
+    if ((aes_key_size != FIPS_AES_128_KEY_SIZE) &&
+        (aes_key_size != FIPS_AES_192_KEY_SIZE) &&
+        (aes_key_size != FIPS_AES_256_KEY_SIZE)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /******************************************************/
+    /* AES-ECB Single-Round Known Answer Encryption Test: */
+    /******************************************************/
+
+    aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_TRUE,
+                                    aes_key_size, FIPS_AES_BLOCK_SIZE);
+
+    if (aes_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext,
+                             &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH,
+                             aes_known_plaintext,
+                             FIPS_AES_DECRYPT_LENGTH);
+
+    AES_DestroyContext(aes_context, PR_TRUE);
+
+    if ((aes_status != SECSuccess) ||
+        (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) ||
+        (PORT_Memcmp(aes_computed_ciphertext, aes_ecb_known_ciphertext,
+                     FIPS_AES_ENCRYPT_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /******************************************************/
+    /* AES-ECB Single-Round Known Answer Decryption Test: */
+    /******************************************************/
+
+    aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_FALSE,
+                                    aes_key_size, FIPS_AES_BLOCK_SIZE);
+
+    if (aes_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    aes_status = AES_Decrypt(aes_context, aes_computed_plaintext,
+                             &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH,
+                             aes_ecb_known_ciphertext,
+                             FIPS_AES_ENCRYPT_LENGTH);
+
+    AES_DestroyContext(aes_context, PR_TRUE);
+
+    if ((aes_status != SECSuccess) ||
+        (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) ||
+        (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext,
+                     FIPS_AES_DECRYPT_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /******************************************************/
+    /* AES-CBC Single-Round Known Answer Encryption Test. */
+    /******************************************************/
+
+    aes_context = AES_CreateContext(aes_known_key,
+                                    aes_cbc_known_initialization_vector,
+                                    NSS_AES_CBC, PR_TRUE, aes_key_size,
+                                    FIPS_AES_BLOCK_SIZE);
+
+    if (aes_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext,
+                             &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH,
+                             aes_known_plaintext,
+                             FIPS_AES_DECRYPT_LENGTH);
+
+    AES_DestroyContext(aes_context, PR_TRUE);
+
+    if ((aes_status != SECSuccess) ||
+        (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) ||
+        (PORT_Memcmp(aes_computed_ciphertext, aes_cbc_known_ciphertext,
+                     FIPS_AES_ENCRYPT_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /******************************************************/
+    /* AES-CBC Single-Round Known Answer Decryption Test. */
+    /******************************************************/
+
+    aes_context = AES_CreateContext(aes_known_key,
+                                    aes_cbc_known_initialization_vector,
+                                    NSS_AES_CBC, PR_FALSE, aes_key_size,
+                                    FIPS_AES_BLOCK_SIZE);
+
+    if (aes_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    aes_status = AES_Decrypt(aes_context, aes_computed_plaintext,
+                             &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH,
+                             aes_cbc_known_ciphertext,
+                             FIPS_AES_ENCRYPT_LENGTH);
+
+    AES_DestroyContext(aes_context, PR_TRUE);
+
+    if ((aes_status != SECSuccess) ||
+        (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) ||
+        (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext,
+                     FIPS_AES_DECRYPT_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    return (SECSuccess);
+}
+
+/* Known Hash Message (512-bits).  Used for all hashes (incl. SHA-N [N>1]). */
+static const PRUint8 known_hash_message[] = {
+    "The test message for the MD2, MD5, and SHA-1 hashing algorithms."
+};
+
+/****************************************************/
+/* Single Round HMAC SHA-X test                     */
+/****************************************************/
+static SECStatus
+freebl_fips_HMAC(unsigned char *hmac_computed,
+                 const PRUint8 *secret_key,
+                 unsigned int secret_key_length,
+                 const PRUint8 *message,
+                 unsigned int message_length,
+                 HASH_HashType hashAlg)
+{
+    SECStatus hmac_status = SECFailure;
+    HMACContext *cx = NULL;
+    SECHashObject *hashObj = NULL;
+    unsigned int bytes_hashed = 0;
+
+    hashObj = (SECHashObject *)HASH_GetRawHashObject(hashAlg);
+
+    if (!hashObj)
+        return (SECFailure);
+
+    cx = HMAC_Create(hashObj, secret_key,
+                     secret_key_length,
+                     PR_TRUE); /* PR_TRUE for in FIPS mode */
+
+    if (cx == NULL)
+        return (SECFailure);
+
+    HMAC_Begin(cx);
+    HMAC_Update(cx, message, message_length);
+    hmac_status = HMAC_Finish(cx, hmac_computed, &bytes_hashed,
+                              hashObj->length);
+
+    HMAC_Destroy(cx, PR_TRUE);
+
+    return (hmac_status);
+}
+
+static SECStatus
+freebl_fips_HMAC_PowerUpSelfTest(void)
+{
+    static const PRUint8 HMAC_known_secret_key[] = {
+        "Firefox and ThunderBird are awesome!"
+    };
+
+    static const PRUint8 HMAC_known_secret_key_length = sizeof HMAC_known_secret_key;
+
+    /* known SHA1 hmac (20 bytes) */
+    static const PRUint8 known_SHA1_hmac[] = {
+        0xd5, 0x85, 0xf6, 0x5b, 0x39, 0xfa, 0xb9, 0x05,
+        0x3b, 0x57, 0x1d, 0x61, 0xe7, 0xb8, 0x84, 0x1e,
+        0x5d, 0x0e, 0x1e, 0x11
+    };
+
+    /* known SHA224 hmac (28 bytes) */
+    static const PRUint8 known_SHA224_hmac[] = {
+        0x1c, 0xc3, 0x06, 0x8e, 0xce, 0x37, 0x68, 0xfb,
+        0x1a, 0x82, 0x4a, 0xbe, 0x2b, 0x00, 0x51, 0xf8,
+        0x9d, 0xb6, 0xe0, 0x90, 0x0d, 0x00, 0xc9, 0x64,
+        0x9a, 0xb8, 0x98, 0x4e
+    };
+
+    /* known SHA256 hmac (32 bytes) */
+    static const PRUint8 known_SHA256_hmac[] = {
+        0x05, 0x75, 0x9a, 0x9e, 0x70, 0x5e, 0xe7, 0x44,
+        0xe2, 0x46, 0x4b, 0x92, 0x22, 0x14, 0x22, 0xe0,
+        0x1b, 0x92, 0x8a, 0x0c, 0xfe, 0xf5, 0x49, 0xe9,
+        0xa7, 0x1b, 0x56, 0x7d, 0x1d, 0x29, 0x40, 0x48
+    };
+
+    /* known SHA384 hmac (48 bytes) */
+    static const PRUint8 known_SHA384_hmac[] = {
+        0xcd, 0x56, 0x14, 0xec, 0x05, 0x53, 0x06, 0x2b,
+        0x7e, 0x9c, 0x8a, 0x18, 0x5e, 0xea, 0xf3, 0x91,
+        0x33, 0xfb, 0x64, 0xf6, 0xe3, 0x9f, 0x89, 0x0b,
+        0xaf, 0xbe, 0x83, 0x4d, 0x3f, 0x3c, 0x43, 0x4d,
+        0x4a, 0x0c, 0x56, 0x98, 0xf8, 0xca, 0xb4, 0xaa,
+        0x9a, 0xf4, 0x0a, 0xaf, 0x4f, 0x69, 0xca, 0x87
+    };
+
+    /* known SHA512 hmac (64 bytes) */
+    static const PRUint8 known_SHA512_hmac[] = {
+        0xf6, 0x0e, 0x97, 0x12, 0x00, 0x67, 0x6e, 0xb9,
+        0x0c, 0xb2, 0x63, 0xf0, 0x60, 0xac, 0x75, 0x62,
+        0x70, 0x95, 0x2a, 0x52, 0x22, 0xee, 0xdd, 0xd2,
+        0x71, 0xb1, 0xe8, 0x26, 0x33, 0xd3, 0x13, 0x27,
+        0xcb, 0xff, 0x44, 0xef, 0x87, 0x97, 0x16, 0xfb,
+        0xd3, 0x0b, 0x48, 0xbe, 0x12, 0x4e, 0xda, 0xb1,
+        0x89, 0x90, 0xfb, 0x06, 0x0c, 0xbe, 0xe5, 0xc4,
+        0xff, 0x24, 0x37, 0x3d, 0xc7, 0xe4, 0xe4, 0x37
+    };
+
+    SECStatus hmac_status;
+    PRUint8 hmac_computed[HASH_LENGTH_MAX];
+
+    /***************************************************/
+    /* HMAC SHA-1 Single-Round Known Answer HMAC Test. */
+    /***************************************************/
+
+    hmac_status = freebl_fips_HMAC(hmac_computed,
+                                   HMAC_known_secret_key,
+                                   HMAC_known_secret_key_length,
+                                   known_hash_message,
+                                   FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+                                   HASH_AlgSHA1);
+
+    if ((hmac_status != SECSuccess) ||
+        (PORT_Memcmp(hmac_computed, known_SHA1_hmac,
+                     SHA1_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /***************************************************/
+    /* HMAC SHA-224 Single-Round Known Answer Test.    */
+    /***************************************************/
+
+    hmac_status = freebl_fips_HMAC(hmac_computed,
+                                   HMAC_known_secret_key,
+                                   HMAC_known_secret_key_length,
+                                   known_hash_message,
+                                   FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+                                   HASH_AlgSHA224);
+
+    if ((hmac_status != SECSuccess) ||
+        (PORT_Memcmp(hmac_computed, known_SHA224_hmac,
+                     SHA224_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /***************************************************/
+    /* HMAC SHA-256 Single-Round Known Answer Test.    */
+    /***************************************************/
+
+    hmac_status = freebl_fips_HMAC(hmac_computed,
+                                   HMAC_known_secret_key,
+                                   HMAC_known_secret_key_length,
+                                   known_hash_message,
+                                   FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+                                   HASH_AlgSHA256);
+
+    if ((hmac_status != SECSuccess) ||
+        (PORT_Memcmp(hmac_computed, known_SHA256_hmac,
+                     SHA256_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /***************************************************/
+    /* HMAC SHA-384 Single-Round Known Answer Test.    */
+    /***************************************************/
+
+    hmac_status = freebl_fips_HMAC(hmac_computed,
+                                   HMAC_known_secret_key,
+                                   HMAC_known_secret_key_length,
+                                   known_hash_message,
+                                   FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+                                   HASH_AlgSHA384);
+
+    if ((hmac_status != SECSuccess) ||
+        (PORT_Memcmp(hmac_computed, known_SHA384_hmac,
+                     SHA384_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /***************************************************/
+    /* HMAC SHA-512 Single-Round Known Answer Test.    */
+    /***************************************************/
+
+    hmac_status = freebl_fips_HMAC(hmac_computed,
+                                   HMAC_known_secret_key,
+                                   HMAC_known_secret_key_length,
+                                   known_hash_message,
+                                   FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+                                   HASH_AlgSHA512);
+
+    if ((hmac_status != SECSuccess) ||
+        (PORT_Memcmp(hmac_computed, known_SHA512_hmac,
+                     SHA512_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_SHA_PowerUpSelfTest(void)
+{
+    /* SHA-1 Known Digest Message (160-bits). */
+    static const PRUint8 sha1_known_digest[] = {
+        0x0a, 0x6d, 0x07, 0xba, 0x1e, 0xbd, 0x8a, 0x1b,
+        0x72, 0xf6, 0xc7, 0x22, 0xf1, 0x27, 0x9f, 0xf0,
+        0xe0, 0x68, 0x47, 0x7a
+    };
+
+    /* SHA-224 Known Digest Message (224-bits). */
+    static const PRUint8 sha224_known_digest[] = {
+        0x89, 0x5e, 0x7f, 0xfd, 0x0e, 0xd8, 0x35, 0x6f,
+        0x64, 0x6d, 0xf2, 0xde, 0x5e, 0xed, 0xa6, 0x7f,
+        0x29, 0xd1, 0x12, 0x73, 0x42, 0x84, 0x95, 0x4f,
+        0x8e, 0x08, 0xe5, 0xcb
+    };
+
+    /* SHA-256 Known Digest Message (256-bits). */
+    static const PRUint8 sha256_known_digest[] = {
+        0x38, 0xa9, 0xc1, 0xf0, 0x35, 0xf6, 0x5d, 0x61,
+        0x11, 0xd4, 0x0b, 0xdc, 0xce, 0x35, 0x14, 0x8d,
+        0xf2, 0xdd, 0xaf, 0xaf, 0xcf, 0xb7, 0x87, 0xe9,
+        0x96, 0xa5, 0xd2, 0x83, 0x62, 0x46, 0x56, 0x79
+    };
+
+    /* SHA-384 Known Digest Message (384-bits). */
+    static const PRUint8 sha384_known_digest[] = {
+        0x11, 0xfe, 0x1c, 0x00, 0x89, 0x48, 0xde, 0xb3,
+        0x99, 0xee, 0x1c, 0x18, 0xb4, 0x10, 0xfb, 0xfe,
+        0xe3, 0xa8, 0x2c, 0xf3, 0x04, 0xb0, 0x2f, 0xc8,
+        0xa3, 0xc4, 0x5e, 0xea, 0x7e, 0x60, 0x48, 0x7b,
+        0xce, 0x2c, 0x62, 0xf7, 0xbc, 0xa7, 0xe8, 0xa3,
+        0xcf, 0x24, 0xce, 0x9c, 0xe2, 0x8b, 0x09, 0x72
+    };
+
+    /* SHA-512 Known Digest Message (512-bits). */
+    static const PRUint8 sha512_known_digest[] = {
+        0xc8, 0xb3, 0x27, 0xf9, 0x0b, 0x24, 0xc8, 0xbf,
+        0x4c, 0xba, 0x33, 0x54, 0xf2, 0x31, 0xbf, 0xdb,
+        0xab, 0xfd, 0xb3, 0x15, 0xd7, 0xfa, 0x48, 0x99,
+        0x07, 0x60, 0x0f, 0x57, 0x41, 0x1a, 0xdd, 0x28,
+        0x12, 0x55, 0x25, 0xac, 0xba, 0x3a, 0x99, 0x12,
+        0x2c, 0x7a, 0x8f, 0x75, 0x3a, 0xe1, 0x06, 0x6f,
+        0x30, 0x31, 0xc9, 0x33, 0xc6, 0x1b, 0x90, 0x1a,
+        0x6c, 0x98, 0x9a, 0x87, 0xd0, 0xb2, 0xf8, 0x07
+    };
+
+    /* SHA-X variables. */
+    PRUint8 sha_computed_digest[HASH_LENGTH_MAX];
+    SECStatus sha_status;
+
+    /*************************************************/
+    /* SHA-1 Single-Round Known Answer Hashing Test. */
+    /*************************************************/
+
+    sha_status = SHA1_HashBuf(sha_computed_digest, known_hash_message,
+                              FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+    if ((sha_status != SECSuccess) ||
+        (PORT_Memcmp(sha_computed_digest, sha1_known_digest,
+                     SHA1_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /***************************************************/
+    /* SHA-224 Single-Round Known Answer Hashing Test. */
+    /***************************************************/
+
+    sha_status = SHA224_HashBuf(sha_computed_digest, known_hash_message,
+                                FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+    if ((sha_status != SECSuccess) ||
+        (PORT_Memcmp(sha_computed_digest, sha224_known_digest,
+                     SHA224_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /***************************************************/
+    /* SHA-256 Single-Round Known Answer Hashing Test. */
+    /***************************************************/
+
+    sha_status = SHA256_HashBuf(sha_computed_digest, known_hash_message,
+                                FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+    if ((sha_status != SECSuccess) ||
+        (PORT_Memcmp(sha_computed_digest, sha256_known_digest,
+                     SHA256_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /***************************************************/
+    /* SHA-384 Single-Round Known Answer Hashing Test. */
+    /***************************************************/
+
+    sha_status = SHA384_HashBuf(sha_computed_digest, known_hash_message,
+                                FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+    if ((sha_status != SECSuccess) ||
+        (PORT_Memcmp(sha_computed_digest, sha384_known_digest,
+                     SHA384_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /***************************************************/
+    /* SHA-512 Single-Round Known Answer Hashing Test. */
+    /***************************************************/
+
+    sha_status = SHA512_HashBuf(sha_computed_digest, known_hash_message,
+                                FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+    if ((sha_status != SECSuccess) ||
+        (PORT_Memcmp(sha_computed_digest, sha512_known_digest,
+                     SHA512_LENGTH) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_RSA_PowerUpSelfTest(void)
+{
+    /* RSA Known Modulus used in both Public/Private Key Values (2048-bits). */
+    static const PRUint8 rsa_modulus[FIPS_RSA_MODULUS_LENGTH] = {
+        0xb8, 0x15, 0x00, 0x33, 0xda, 0x0c, 0x9d, 0xa5,
+        0x14, 0x8c, 0xde, 0x1f, 0x23, 0x07, 0x54, 0xe2,
+        0xc6, 0xb9, 0x51, 0x04, 0xc9, 0x65, 0x24, 0x6e,
+        0x0a, 0x46, 0x34, 0x5c, 0x37, 0x86, 0x6b, 0x88,
+        0x24, 0x27, 0xac, 0xa5, 0x02, 0x79, 0xfb, 0xed,
+        0x75, 0xc5, 0x3f, 0x6e, 0xdf, 0x05, 0x5f, 0x0f,
+        0x20, 0x70, 0xa0, 0x5b, 0x85, 0xdb, 0xac, 0xb9,
+        0x5f, 0x02, 0xc2, 0x64, 0x1e, 0x84, 0x5b, 0x3e,
+        0xad, 0xbf, 0xf6, 0x2e, 0x51, 0xd6, 0xad, 0xf7,
+        0xa7, 0x86, 0x75, 0x86, 0xec, 0xa7, 0xe1, 0xf7,
+        0x08, 0xbf, 0xdc, 0x56, 0xb1, 0x3b, 0xca, 0xd8,
+        0xfc, 0x51, 0xdf, 0x9a, 0x2a, 0x37, 0x06, 0xf2,
+        0xd1, 0x6b, 0x9a, 0x5e, 0x2a, 0xe5, 0x20, 0x57,
+        0x35, 0x9f, 0x1f, 0x98, 0xcf, 0x40, 0xc7, 0xd6,
+        0x98, 0xdb, 0xde, 0xf5, 0x64, 0x53, 0xf7, 0x9d,
+        0x45, 0xf3, 0xd6, 0x78, 0xb9, 0xe3, 0xa3, 0x20,
+        0xcd, 0x79, 0x43, 0x35, 0xef, 0xd7, 0xfb, 0xb9,
+        0x80, 0x88, 0x27, 0x2f, 0x63, 0xa8, 0x67, 0x3d,
+        0x4a, 0xfa, 0x06, 0xc6, 0xd2, 0x86, 0x0b, 0xa7,
+        0x28, 0xfd, 0xe0, 0x1e, 0x93, 0x4b, 0x17, 0x2e,
+        0xb0, 0x11, 0x6f, 0xc6, 0x2b, 0x98, 0x0f, 0x15,
+        0xe3, 0x87, 0x16, 0x7a, 0x7c, 0x67, 0x3e, 0x12,
+        0x2b, 0xf8, 0xbe, 0x48, 0xc1, 0x97, 0x47, 0xf4,
+        0x1f, 0x81, 0x80, 0x12, 0x28, 0xe4, 0x7b, 0x1e,
+        0xb7, 0x00, 0xa4, 0xde, 0xaa, 0xfb, 0x0f, 0x77,
+        0x84, 0xa3, 0xd6, 0xb2, 0x03, 0x48, 0xdd, 0x53,
+        0x8b, 0x46, 0x41, 0x28, 0x52, 0xc4, 0x53, 0xf0,
+        0x1c, 0x95, 0xd9, 0x36, 0xe0, 0x0f, 0x26, 0x46,
+        0x9c, 0x61, 0x0e, 0x80, 0xca, 0x86, 0xaf, 0x39,
+        0x95, 0xe5, 0x60, 0x43, 0x61, 0x3e, 0x2b, 0xb4,
+        0xe8, 0xbd, 0x8d, 0x77, 0x62, 0xf5, 0x32, 0x43,
+        0x2f, 0x4b, 0x65, 0x82, 0x14, 0xdd, 0x29, 0x5b
+    };
+
+    /* RSA Known Public Key Values (24-bits). */
+    static const PRUint8 rsa_public_exponent[FIPS_RSA_PUBLIC_EXPONENT_LENGTH] = { 0x01, 0x00, 0x01 };
+    /* RSA Known Private Key Values (version                 is    8-bits), */
+    /*                              (private exponent        is 2048-bits), */
+    /*                              (private prime0          is 1024-bits), */
+    /*                              (private prime1          is 1024-bits), */
+    /*                              (private prime exponent0 is 1024-bits), */
+    /*                              (private prime exponent1 is 1024-bits), */
+    /*                          and (private coefficient     is 1024-bits). */
+    static const PRUint8 rsa_version[] = { 0x00 };
+
+    static const PRUint8 rsa_private_exponent[FIPS_RSA_PRIVATE_EXPONENT_LENGTH] = {
+        0x29, 0x08, 0x05, 0x53, 0x89, 0x76, 0xe6, 0x6c,
+        0xb5, 0x77, 0xf0, 0xca, 0xdf, 0xf3, 0xf2, 0x67,
+        0xda, 0x03, 0xd4, 0x9b, 0x4c, 0x88, 0xce, 0xe5,
+        0xf8, 0x44, 0x4d, 0xc7, 0x80, 0x58, 0xe5, 0xff,
+        0x22, 0x8f, 0xf5, 0x5b, 0x92, 0x81, 0xbe, 0x35,
+        0xdf, 0xda, 0x67, 0x99, 0x3e, 0xfc, 0xe3, 0x83,
+        0x6b, 0xa7, 0xaf, 0x16, 0xb7, 0x6f, 0x8f, 0xc0,
+        0x81, 0xfd, 0x0b, 0x77, 0x65, 0x95, 0xfb, 0x00,
+        0xad, 0x99, 0xec, 0x35, 0xc6, 0xe8, 0x23, 0x3e,
+        0xe0, 0x88, 0x88, 0x09, 0xdb, 0x16, 0x50, 0xb7,
+        0xcf, 0xab, 0x74, 0x61, 0x9e, 0x7f, 0xc5, 0x67,
+        0x38, 0x56, 0xc7, 0x90, 0x85, 0x78, 0x5e, 0x84,
+        0x21, 0x49, 0xea, 0xce, 0xb2, 0xa0, 0xff, 0xe4,
+        0x70, 0x7f, 0x57, 0x7b, 0xa8, 0x36, 0xb8, 0x54,
+        0x8d, 0x1d, 0xf5, 0x44, 0x9d, 0x68, 0x59, 0xf9,
+        0x24, 0x6e, 0x85, 0x8f, 0xc3, 0x5f, 0x8a, 0x2c,
+        0x94, 0xb7, 0xbc, 0x0e, 0xa5, 0xef, 0x93, 0x06,
+        0x38, 0xcd, 0x07, 0x0c, 0xae, 0xb8, 0x44, 0x1a,
+        0xd8, 0xe7, 0xf5, 0x9a, 0x1e, 0x9c, 0x18, 0xc7,
+        0x6a, 0xc2, 0x7f, 0x28, 0x01, 0x4f, 0xb4, 0xb8,
+        0x90, 0x97, 0x5a, 0x43, 0x38, 0xad, 0xe8, 0x95,
+        0x68, 0x83, 0x1a, 0x1b, 0x10, 0x07, 0xe6, 0x02,
+        0x52, 0x1f, 0xbf, 0x76, 0x6b, 0x46, 0xd6, 0xfb,
+        0xc3, 0xbe, 0xb5, 0xac, 0x52, 0x53, 0x01, 0x1c,
+        0xf3, 0xc5, 0xeb, 0x64, 0xf2, 0x1e, 0xc4, 0x38,
+        0xe9, 0xaa, 0xd9, 0xc3, 0x72, 0x51, 0xa5, 0x44,
+        0x58, 0x69, 0x0b, 0x1b, 0x98, 0x7f, 0xf2, 0x23,
+        0xff, 0xeb, 0xf0, 0x75, 0x24, 0xcf, 0xc5, 0x1e,
+        0xb8, 0x6a, 0xc5, 0x2f, 0x4f, 0x23, 0x50, 0x7d,
+        0x15, 0x9d, 0x19, 0x7a, 0x0b, 0x82, 0xe0, 0x21,
+        0x5b, 0x5f, 0x9d, 0x50, 0x2b, 0x83, 0xe4, 0x48,
+        0xcc, 0x39, 0xe5, 0xfb, 0x13, 0x7b, 0x6f, 0x81
+    };
+
+    static const PRUint8 rsa_prime0[FIPS_RSA_PRIME0_LENGTH] = {
+        0xe4, 0xbf, 0x21, 0x62, 0x9b, 0xa9, 0x77, 0x40,
+        0x8d, 0x2a, 0xce, 0xa1, 0x67, 0x5a, 0x4c, 0x96,
+        0x45, 0x98, 0x67, 0xbd, 0x75, 0x22, 0x33, 0x6f,
+        0xe6, 0xcb, 0x77, 0xde, 0x9e, 0x97, 0x7d, 0x96,
+        0x8c, 0x5e, 0x5d, 0x34, 0xfb, 0x27, 0xfc, 0x6d,
+        0x74, 0xdb, 0x9d, 0x2e, 0x6d, 0xf6, 0xea, 0xfc,
+        0xce, 0x9e, 0xda, 0xa7, 0x25, 0xa2, 0xf4, 0x58,
+        0x6d, 0x0a, 0x3f, 0x01, 0xc2, 0xb4, 0xab, 0x38,
+        0xc1, 0x14, 0x85, 0xb6, 0xfa, 0x94, 0xc3, 0x85,
+        0xf9, 0x3c, 0x2e, 0x96, 0x56, 0x01, 0xe7, 0xd6,
+        0x14, 0x71, 0x4f, 0xfb, 0x4c, 0x85, 0x52, 0xc4,
+        0x61, 0x1e, 0xa5, 0x1e, 0x96, 0x13, 0x0d, 0x8f,
+        0x66, 0xae, 0xa0, 0xcd, 0x7d, 0x25, 0x66, 0x19,
+        0x15, 0xc2, 0xcf, 0xc3, 0x12, 0x3c, 0xe8, 0xa4,
+        0x52, 0x4c, 0xcb, 0x28, 0x3c, 0xc4, 0xbf, 0x95,
+        0x33, 0xe3, 0x81, 0xea, 0x0c, 0x6c, 0xa2, 0x05
+    };
+    static const PRUint8 rsa_prime1[FIPS_RSA_PRIME1_LENGTH] = {
+        0xce, 0x03, 0x94, 0xf4, 0xa9, 0x2c, 0x1e, 0x06,
+        0xe7, 0x40, 0x30, 0x01, 0xf7, 0xbb, 0x68, 0x8c,
+        0x27, 0xd2, 0x15, 0xe3, 0x28, 0x49, 0x5b, 0xa8,
+        0xc1, 0x9a, 0x42, 0x7e, 0x31, 0xf9, 0x08, 0x34,
+        0x81, 0xa2, 0x0f, 0x04, 0x61, 0x34, 0xe3, 0x36,
+        0x92, 0xb1, 0x09, 0x2b, 0xe9, 0xef, 0x84, 0x88,
+        0xbe, 0x9c, 0x98, 0x60, 0xa6, 0x60, 0x84, 0xe9,
+        0x75, 0x6f, 0xcc, 0x81, 0xd1, 0x96, 0xef, 0xdd,
+        0x2e, 0xca, 0xc4, 0xf5, 0x42, 0xfb, 0x13, 0x2b,
+        0x57, 0xbf, 0x14, 0x5e, 0xc2, 0x7f, 0x77, 0x35,
+        0x29, 0xc4, 0xe5, 0xe0, 0xf9, 0x6d, 0x15, 0x4a,
+        0x42, 0x56, 0x1c, 0x3e, 0x0c, 0xc5, 0xce, 0x70,
+        0x08, 0x63, 0x1e, 0x73, 0xdb, 0x7e, 0x74, 0x05,
+        0x32, 0x01, 0xc6, 0x36, 0x32, 0x75, 0x6b, 0xed,
+        0x9d, 0xfe, 0x7c, 0x7e, 0xa9, 0x57, 0xb4, 0xe9,
+        0x22, 0xe4, 0xe7, 0xfe, 0x36, 0x07, 0x9b, 0xdf
+    };
+    static const PRUint8 rsa_exponent0[FIPS_RSA_EXPONENT0_LENGTH] = {
+        0x04, 0x5a, 0x3a, 0xa9, 0x64, 0xaa, 0xd9, 0xd1,
+        0x09, 0x9e, 0x99, 0xe5, 0xea, 0x50, 0x86, 0x8a,
+        0x89, 0x72, 0x77, 0xee, 0xdb, 0xee, 0xb5, 0xa9,
+        0xd8, 0x6b, 0x60, 0xb1, 0x84, 0xb4, 0xff, 0x37,
+        0xc1, 0x1d, 0xfe, 0x8a, 0x06, 0x89, 0x61, 0x3d,
+        0x37, 0xef, 0x01, 0xd3, 0xa3, 0x56, 0x02, 0x6c,
+        0xa3, 0x05, 0xd4, 0xc5, 0x3f, 0x6b, 0x15, 0x59,
+        0x25, 0x61, 0xff, 0x86, 0xea, 0x0c, 0x84, 0x01,
+        0x85, 0x72, 0xfd, 0x84, 0x58, 0xca, 0x41, 0xda,
+        0x27, 0xbe, 0xe4, 0x68, 0x09, 0xe4, 0xe9, 0x63,
+        0x62, 0x6a, 0x31, 0x8a, 0x67, 0x8f, 0x55, 0xde,
+        0xd4, 0xb6, 0x3f, 0x90, 0x10, 0x6c, 0xf6, 0x62,
+        0x17, 0x23, 0x15, 0x7e, 0x33, 0x76, 0x65, 0xb5,
+        0xee, 0x7b, 0x11, 0x76, 0xf5, 0xbe, 0xe0, 0xf2,
+        0x57, 0x7a, 0x8c, 0x97, 0x0c, 0x68, 0xf5, 0xf8,
+        0x41, 0xcf, 0x7f, 0x66, 0x53, 0xac, 0x31, 0x7d
+    };
+    static const PRUint8 rsa_exponent1[FIPS_RSA_EXPONENT1_LENGTH] = {
+        0x93, 0x54, 0x14, 0x6e, 0x73, 0x9d, 0x4d, 0x4b,
+        0xfa, 0x8c, 0xf8, 0xc8, 0x2f, 0x76, 0x22, 0xea,
+        0x38, 0x80, 0x11, 0x8f, 0x05, 0xfc, 0x90, 0x44,
+        0x3b, 0x50, 0x2a, 0x45, 0x3d, 0x4f, 0xaf, 0x02,
+        0x7d, 0xc2, 0x7b, 0xa2, 0xd2, 0x31, 0x94, 0x5c,
+        0x2e, 0xc3, 0xd4, 0x9f, 0x47, 0x09, 0x37, 0x6a,
+        0xe3, 0x85, 0xf1, 0xa3, 0x0c, 0xd8, 0xf1, 0xb4,
+        0x53, 0x7b, 0xc4, 0x71, 0x02, 0x86, 0x42, 0xbb,
+        0x96, 0xff, 0x03, 0xa3, 0xb2, 0x67, 0x03, 0xea,
+        0x77, 0x31, 0xfb, 0x4b, 0x59, 0x24, 0xf7, 0x07,
+        0x59, 0xfb, 0xa9, 0xba, 0x1e, 0x26, 0x58, 0x97,
+        0x66, 0xa1, 0x56, 0x49, 0x39, 0xb1, 0x2c, 0x55,
+        0x0a, 0x6a, 0x78, 0x18, 0xba, 0xdb, 0xcf, 0xf4,
+        0xf7, 0x32, 0x35, 0xa2, 0x04, 0xab, 0xdc, 0xa7,
+        0x6d, 0xd9, 0xd5, 0x06, 0x6f, 0xec, 0x7d, 0x40,
+        0x4c, 0xe8, 0x0e, 0xd0, 0xc9, 0xaa, 0xdf, 0x59
+    };
+    static const PRUint8 rsa_coefficient[FIPS_RSA_COEFFICIENT_LENGTH] = {
+        0x17, 0xd7, 0xf5, 0x0a, 0xf0, 0x68, 0x97, 0x96,
+        0xc4, 0x29, 0x18, 0x77, 0x9a, 0x1f, 0xe3, 0xf3,
+        0x12, 0x13, 0x0f, 0x7e, 0x7b, 0xb9, 0xc1, 0x91,
+        0xf9, 0xc7, 0x08, 0x56, 0x5c, 0xa4, 0xbc, 0x83,
+        0x71, 0xf9, 0x78, 0xd9, 0x2b, 0xec, 0xfe, 0x6b,
+        0xdc, 0x2f, 0x63, 0xc9, 0xcd, 0x50, 0x14, 0x5b,
+        0xd3, 0x6e, 0x85, 0x4d, 0x0c, 0xa2, 0x0b, 0xa0,
+        0x09, 0xb6, 0xca, 0x34, 0x9c, 0xc2, 0xc1, 0x4a,
+        0xb0, 0xbc, 0x45, 0x93, 0xa5, 0x7e, 0x99, 0xb5,
+        0xbd, 0xe4, 0x69, 0x29, 0x08, 0x28, 0xd2, 0xcd,
+        0xab, 0x24, 0x78, 0x48, 0x41, 0x26, 0x0b, 0x37,
+        0xa3, 0x43, 0xd1, 0x95, 0x1a, 0xd6, 0xee, 0x22,
+        0x1c, 0x00, 0x0b, 0xc2, 0xb7, 0xa4, 0xa3, 0x21,
+        0xa9, 0xcd, 0xe4, 0x69, 0xd3, 0x45, 0x02, 0xb1,
+        0xb7, 0x3a, 0xbf, 0x51, 0x35, 0x1b, 0x78, 0xc2,
+        0xcf, 0x0c, 0x0d, 0x60, 0x09, 0xa9, 0x44, 0x02
+    };
+
+    /* RSA Known Plaintext Message (1024-bits). */
+    static const PRUint8 rsa_known_plaintext_msg[FIPS_RSA_MESSAGE_LENGTH] = {
+        "Known plaintext message utilized"
+        "for RSA Encryption &  Decryption"
+        "blocks SHA256, SHA384  and      "
+        "SHA512 RSA Signature KAT tests. "
+        "Known plaintext message utilized"
+        "for RSA Encryption &  Decryption"
+        "blocks SHA256, SHA384  and      "
+        "SHA512 RSA Signature KAT  tests."
+    };
+
+    /* RSA Known Ciphertext (2048-bits). */
+    static const PRUint8 rsa_known_ciphertext[] = {
+        0x04, 0x12, 0x46, 0xe3, 0x6a, 0xee, 0xde, 0xdd,
+        0x49, 0xa1, 0xd9, 0x83, 0xf7, 0x35, 0xf9, 0x70,
+        0x88, 0x03, 0x2d, 0x01, 0x8b, 0xd1, 0xbf, 0xdb,
+        0xe5, 0x1c, 0x85, 0xbe, 0xb5, 0x0b, 0x48, 0x45,
+        0x7a, 0xf0, 0xa0, 0xe3, 0xa2, 0xbb, 0x4b, 0xf6,
+        0x27, 0xd0, 0x1b, 0x12, 0xe3, 0x77, 0x52, 0x34,
+        0x9e, 0x8e, 0x03, 0xd2, 0xf8, 0x79, 0x6e, 0x39,
+        0x79, 0x53, 0x3c, 0x44, 0x14, 0x94, 0xbb, 0x8d,
+        0xaa, 0x14, 0x44, 0xa0, 0x7b, 0xa5, 0x8c, 0x93,
+        0x5f, 0x99, 0xa4, 0xa3, 0x6e, 0x7a, 0x38, 0x40,
+        0x78, 0xfa, 0x36, 0x91, 0x5e, 0x9a, 0x9c, 0xba,
+        0x1e, 0xd4, 0xf9, 0xda, 0x4b, 0x0f, 0xa8, 0xa3,
+        0x1c, 0xf3, 0x3a, 0xd1, 0xa5, 0xb4, 0x51, 0x16,
+        0xed, 0x4b, 0xcf, 0xec, 0x93, 0x7b, 0x90, 0x21,
+        0xbc, 0x3a, 0xf4, 0x0b, 0xd1, 0x3a, 0x2b, 0xba,
+        0xa6, 0x7d, 0x5b, 0x53, 0xd8, 0x64, 0xf9, 0x29,
+        0x7b, 0x7f, 0x77, 0x3e, 0x51, 0x4c, 0x9a, 0x94,
+        0xd2, 0x4b, 0x4a, 0x8d, 0x61, 0x74, 0x97, 0xae,
+        0x53, 0x6a, 0xf4, 0x90, 0xc2, 0x2c, 0x49, 0xe2,
+        0xfa, 0xeb, 0x91, 0xc5, 0xe5, 0x83, 0x13, 0xc9,
+        0x44, 0x4b, 0x95, 0x2c, 0x57, 0x70, 0x15, 0x5c,
+        0x64, 0x8d, 0x1a, 0xfd, 0x2a, 0xc7, 0xb2, 0x9c,
+        0x5c, 0x99, 0xd3, 0x4a, 0xfd, 0xdd, 0xf6, 0x82,
+        0x87, 0x8c, 0x5a, 0xc4, 0xa8, 0x0d, 0x2a, 0xef,
+        0xc3, 0xa2, 0x7e, 0x8e, 0x67, 0x9f, 0x6f, 0x63,
+        0xdb, 0xbb, 0x1d, 0x31, 0xc4, 0xbb, 0xbc, 0x13,
+        0x3f, 0x54, 0xc6, 0xf6, 0xc5, 0x28, 0x32, 0xab,
+        0x96, 0x42, 0x10, 0x36, 0x40, 0x92, 0xbb, 0x57,
+        0x55, 0x38, 0xf5, 0x43, 0x7e, 0x43, 0xc4, 0x65,
+        0x47, 0x64, 0xaa, 0x0f, 0x4c, 0xe9, 0x49, 0x16,
+        0xec, 0x6a, 0x50, 0xfd, 0x14, 0x49, 0xca, 0xdb,
+        0x44, 0x54, 0xca, 0xbe, 0xa3, 0x0e, 0x5f, 0xef
+    };
+
+    static const RSAPublicKey bl_public_key = {
+        NULL,
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus,
+          FIPS_RSA_MODULUS_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent,
+          FIPS_RSA_PUBLIC_EXPONENT_LENGTH }
+    };
+    static const RSAPrivateKey bl_private_key = {
+        NULL,
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_version,
+          FIPS_RSA_PRIVATE_VERSION_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus,
+          FIPS_RSA_MODULUS_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent,
+          FIPS_RSA_PUBLIC_EXPONENT_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_private_exponent,
+          FIPS_RSA_PRIVATE_EXPONENT_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_prime0,
+          FIPS_RSA_PRIME0_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_prime1,
+          FIPS_RSA_PRIME1_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent0,
+          FIPS_RSA_EXPONENT0_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent1,
+          FIPS_RSA_EXPONENT1_LENGTH },
+        { FIPS_RSA_TYPE, (unsigned char *)rsa_coefficient,
+          FIPS_RSA_COEFFICIENT_LENGTH }
+    };
+
+    /* RSA variables. */
+    SECStatus rsa_status;
+    RSAPublicKey rsa_public_key;
+    RSAPrivateKey rsa_private_key;
+
+    PRUint8 rsa_computed_ciphertext[FIPS_RSA_ENCRYPT_LENGTH];
+    PRUint8 rsa_computed_plaintext[FIPS_RSA_DECRYPT_LENGTH];
+
+    rsa_public_key = bl_public_key;
+    rsa_private_key = bl_private_key;
+
+    /**************************************************/
+    /* RSA Single-Round Known Answer Encryption Test. */
+    /**************************************************/
+
+    /* Perform RSA Public Key Encryption. */
+    rsa_status = RSA_PublicKeyOp(&rsa_public_key,
+                                 rsa_computed_ciphertext,
+                                 rsa_known_plaintext_msg);
+
+    if ((rsa_status != SECSuccess) ||
+        (PORT_Memcmp(rsa_computed_ciphertext, rsa_known_ciphertext,
+                     FIPS_RSA_ENCRYPT_LENGTH) != 0))
+        goto rsa_loser;
+
+    /**************************************************/
+    /* RSA Single-Round Known Answer Decryption Test. */
+    /**************************************************/
+
+    /* Perform RSA Private Key Decryption. */
+    rsa_status = RSA_PrivateKeyOp(&rsa_private_key,
+                                  rsa_computed_plaintext,
+                                  rsa_known_ciphertext);
+
+    if ((rsa_status != SECSuccess) ||
+        (PORT_Memcmp(rsa_computed_plaintext, rsa_known_plaintext_msg,
+                     FIPS_RSA_DECRYPT_LENGTH) != 0))
+        goto rsa_loser;
+
+    return (SECSuccess);
+
+rsa_loser:
+
+    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+    return (SECFailure);
+}
+
+#ifdef NSS_ENABLE_ECC
+
+static SECStatus
+freebl_fips_ECDSA_Test(ECParams *ecparams,
+                       const PRUint8 *knownSignature,
+                       unsigned int knownSignatureLen)
+{
+
+    /* ECDSA Known Seed info for curves nistp256 and nistk283  */
+    static const PRUint8 ecdsa_Known_Seed[] = {
+        0x6a, 0x9b, 0xf6, 0xf7, 0xce, 0xed, 0x79, 0x11,
+        0xf0, 0xc7, 0xc8, 0x9a, 0xa5, 0xd1, 0x57, 0xb1,
+        0x7b, 0x5a, 0x3b, 0x76, 0x4e, 0x7b, 0x7c, 0xbc,
+        0xf2, 0x76, 0x1c, 0x1c, 0x7f, 0xc5, 0x53, 0x2f
+    };
+
+    static const PRUint8 msg[] = {
+        "Firefox and ThunderBird are awesome!"
+    };
+
+    unsigned char sha1[SHA1_LENGTH]; /* SHA-1 hash (160 bits) */
+    unsigned char sig[2 * MAX_ECKEY_LEN];
+    SECItem signature, digest;
+    ECPrivateKey *ecdsa_private_key = NULL;
+    ECPublicKey ecdsa_public_key;
+    SECStatus ecdsaStatus = SECSuccess;
+
+    /* Generates a new EC key pair. The private key is a supplied
+     * random value (in seed) and the public key is the result of
+     * performing a scalar point multiplication of that value with
+     * the curve's base point.
+     */
+    ecdsaStatus = EC_NewKeyFromSeed(ecparams, &ecdsa_private_key,
+                                    ecdsa_Known_Seed,
+                                    sizeof(ecdsa_Known_Seed));
+    if (ecdsaStatus != SECSuccess) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+
+    /* construct public key from private key. */
+    ecdsa_public_key.ecParams = ecdsa_private_key->ecParams;
+    ecdsa_public_key.publicValue = ecdsa_private_key->publicValue;
+
+    /* validate public key value */
+    ecdsaStatus = EC_ValidatePublicKey(&ecdsa_public_key.ecParams,
+                                       &ecdsa_public_key.publicValue);
+    if (ecdsaStatus != SECSuccess) {
+        goto loser;
+    }
+
+    /* validate public key value */
+    ecdsaStatus = EC_ValidatePublicKey(&ecdsa_private_key->ecParams,
+                                       &ecdsa_private_key->publicValue);
+    if (ecdsaStatus != SECSuccess) {
+        goto loser;
+    }
+
+    /***************************************************/
+    /* ECDSA Single-Round Known Answer Signature Test. */
+    /***************************************************/
+
+    ecdsaStatus = SHA1_HashBuf(sha1, msg, sizeof msg);
+    if (ecdsaStatus != SECSuccess) {
+        goto loser;
+    }
+    digest.type = siBuffer;
+    digest.data = sha1;
+    digest.len = SHA1_LENGTH;
+
+    memset(sig, 0, sizeof sig);
+    signature.type = siBuffer;
+    signature.data = sig;
+    signature.len = sizeof sig;
+
+    ecdsaStatus = ECDSA_SignDigestWithSeed(ecdsa_private_key, &signature,
+                                           &digest, ecdsa_Known_Seed, sizeof ecdsa_Known_Seed);
+    if (ecdsaStatus != SECSuccess) {
+        goto loser;
+    }
+
+    if ((signature.len != knownSignatureLen) ||
+        (PORT_Memcmp(signature.data, knownSignature,
+                     knownSignatureLen) != 0)) {
+        ecdsaStatus = SECFailure;
+        goto loser;
+    }
+
+    /******************************************************/
+    /* ECDSA Single-Round Known Answer Verification Test. */
+    /******************************************************/
+
+    /* Perform ECDSA verification process. */
+    ecdsaStatus = ECDSA_VerifyDigest(&ecdsa_public_key, &signature, &digest);
+
+loser:
+    /* free the memory for the private key arena*/
+    PORT_FreeArena(ecdsa_private_key->ecParams.arena, PR_FALSE);
+
+    if (ecdsaStatus != SECSuccess) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return (SECFailure);
+    }
+    return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_ECDSA_PowerUpSelfTest()
+{
+
+    /* ECDSA Known curve nistp256 == ECCCurve_X9_62_PRIME_256V1 params */
+    static const unsigned char p256_prime[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+    };
+    static const unsigned char p256_a[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC
+    };
+    static const unsigned char p256_b[] = {
+        0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, 0xB3, 0xEB, 0xBD, 0x55, 0x76,
+        0x98, 0x86, 0xBC, 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6, 0x3B, 0xCE,
+        0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B
+    };
+    static const unsigned char p256_base[] = {
+        0x04,
+        0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63,
+        0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1,
+        0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96,
+        0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C,
+        0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6,
+        0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5
+    };
+    static const unsigned char p256_order[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9,
+        0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51
+    };
+    static const unsigned char p256_encoding[] = {
+        0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07
+    };
+    static const ECParams ecdsa_known_P256_Params = {
+        NULL, ec_params_named,                                               /* arena, type */
+                                                                             /* fieldID */
+        { 256, ec_field_GFp,                                                 /* size and type */
+          { { siBuffer, (unsigned char *)p256_prime, sizeof(p256_prime) } }, /* u.prime */
+          0,
+          0,
+          0 },
+        /* curve */
+        { /* a = curvea b = curveb */
+          /* curve.a */
+          { siBuffer, (unsigned char *)p256_a, sizeof(p256_a) },
+          /* curve.b */
+          { siBuffer, (unsigned char *)p256_b, sizeof(p256_b) },
+          /* curve.seed */
+          { siBuffer, NULL, 0 } },
+        /* base  = 04xy*/
+        { siBuffer, (unsigned char *)p256_base, sizeof(p256_base) },
+        /* order */
+        { siBuffer, (unsigned char *)p256_order, sizeof(p256_order) },
+        1, /* cofactor */
+        /* DEREncoding */
+        { siBuffer, (unsigned char *)p256_encoding, sizeof(p256_encoding) },
+        ECCurve_X9_62_PRIME_256V1,
+        /* curveOID */
+        { siBuffer, (unsigned char *)(p256_encoding) + 2, sizeof(p256_encoding) - 2 },
+    };
+
+    static const PRUint8 ecdsa_known_P256_signature[] = {
+        0x07, 0xb1, 0xcb, 0x57, 0x20, 0xa7, 0x10, 0xd6,
+        0x9d, 0x37, 0x4b, 0x1c, 0xdc, 0x35, 0x90, 0xff,
+        0x1a, 0x2d, 0x98, 0x95, 0x1b, 0x2f, 0xeb, 0x7f,
+        0xbb, 0x81, 0xca, 0xc0, 0x69, 0x75, 0xea, 0xc5,
+        0x59, 0x6a, 0x62, 0x49, 0x3d, 0x50, 0xc9, 0xe1,
+        0x27, 0x3b, 0xff, 0x9b, 0x13, 0x66, 0x67, 0xdd,
+        0x7d, 0xd1, 0x0d, 0x2d, 0x7c, 0x44, 0x04, 0x1b,
+        0x16, 0x21, 0x12, 0xc5, 0xcb, 0xbd, 0x9e, 0x75
+    };
+
+    ECParams ecparams;
+
+    SECStatus rv;
+
+    /* ECDSA GF(p) prime field curve test */
+    ecparams = ecdsa_known_P256_Params;
+    rv = freebl_fips_ECDSA_Test(&ecparams,
+                                ecdsa_known_P256_signature,
+                                sizeof ecdsa_known_P256_signature);
+    if (rv != SECSuccess) {
+        return (SECFailure);
+    }
+
+    return (SECSuccess);
+}
+
+#endif /* NSS_ENABLE_ECC */
+
+static SECStatus
+freebl_fips_DSA_PowerUpSelfTest(void)
+{
+    /* DSA Known P (1024-bits), Q (160-bits), and G (1024-bits) Values. */
+    static const PRUint8 dsa_P[] = {
+        0x80, 0xb0, 0xd1, 0x9d, 0x6e, 0xa4, 0xf3, 0x28,
+        0x9f, 0x24, 0xa9, 0x8a, 0x49, 0xd0, 0x0c, 0x63,
+        0xe8, 0x59, 0x04, 0xf9, 0x89, 0x4a, 0x5e, 0xc0,
+        0x6d, 0xd2, 0x67, 0x6b, 0x37, 0x81, 0x83, 0x0c,
+        0xfe, 0x3a, 0x8a, 0xfd, 0xa0, 0x3b, 0x08, 0x91,
+        0x1c, 0xcb, 0xb5, 0x63, 0xb0, 0x1c, 0x70, 0xd0,
+        0xae, 0xe1, 0x60, 0x2e, 0x12, 0xeb, 0x54, 0xc7,
+        0xcf, 0xc6, 0xcc, 0xae, 0x97, 0x52, 0x32, 0x63,
+        0xd3, 0xeb, 0x55, 0xea, 0x2f, 0x4c, 0xd5, 0xd7,
+        0x3f, 0xda, 0xec, 0x49, 0x27, 0x0b, 0x14, 0x56,
+        0xc5, 0x09, 0xbe, 0x4d, 0x09, 0x15, 0x75, 0x2b,
+        0xa3, 0x42, 0x0d, 0x03, 0x71, 0xdf, 0x0f, 0xf4,
+        0x0e, 0xe9, 0x0c, 0x46, 0x93, 0x3d, 0x3f, 0xa6,
+        0x6c, 0xdb, 0xca, 0xe5, 0xac, 0x96, 0xc8, 0x64,
+        0x5c, 0xec, 0x4b, 0x35, 0x65, 0xfc, 0xfb, 0x5a,
+        0x1b, 0x04, 0x1b, 0xa1, 0x0e, 0xfd, 0x88, 0x15
+    };
+
+    static const PRUint8 dsa_Q[] = {
+        0xad, 0x22, 0x59, 0xdf, 0xe5, 0xec, 0x4c, 0x6e,
+        0xf9, 0x43, 0xf0, 0x4b, 0x2d, 0x50, 0x51, 0xc6,
+        0x91, 0x99, 0x8b, 0xcf
+    };
+
+    static const PRUint8 dsa_G[] = {
+        0x78, 0x6e, 0xa9, 0xd8, 0xcd, 0x4a, 0x85, 0xa4,
+        0x45, 0xb6, 0x6e, 0x5d, 0x21, 0x50, 0x61, 0xf6,
+        0x5f, 0xdf, 0x5c, 0x7a, 0xde, 0x0d, 0x19, 0xd3,
+        0xc1, 0x3b, 0x14, 0xcc, 0x8e, 0xed, 0xdb, 0x17,
+        0xb6, 0xca, 0xba, 0x86, 0xa9, 0xea, 0x51, 0x2d,
+        0xc1, 0xa9, 0x16, 0xda, 0xf8, 0x7b, 0x59, 0x8a,
+        0xdf, 0xcb, 0xa4, 0x67, 0x00, 0x44, 0xea, 0x24,
+        0x73, 0xe5, 0xcb, 0x4b, 0xaf, 0x2a, 0x31, 0x25,
+        0x22, 0x28, 0x3f, 0x16, 0x10, 0x82, 0xf7, 0xeb,
+        0x94, 0x0d, 0xdd, 0x09, 0x22, 0x14, 0x08, 0x79,
+        0xba, 0x11, 0x0b, 0xf1, 0xff, 0x2d, 0x67, 0xac,
+        0xeb, 0xb6, 0x55, 0x51, 0x69, 0x97, 0xa7, 0x25,
+        0x6b, 0x9c, 0xa0, 0x9b, 0xd5, 0x08, 0x9b, 0x27,
+        0x42, 0x1c, 0x7a, 0x69, 0x57, 0xe6, 0x2e, 0xed,
+        0xa9, 0x5b, 0x25, 0xe8, 0x1f, 0xd2, 0xed, 0x1f,
+        0xdf, 0xe7, 0x80, 0x17, 0xba, 0x0d, 0x4d, 0x38
+    };
+
+    /* DSA Known Random Values (known random key block       is 160-bits)  */
+    /*                     and (known random signature block is 160-bits). */
+    static const PRUint8 dsa_known_random_key_block[] = {
+        "Mozilla Rules World!"
+    };
+    static const PRUint8 dsa_known_random_signature_block[] = {
+        "Random DSA Signature"
+    };
+
+    /* DSA Known Digest (160-bits) */
+    static const PRUint8 dsa_known_digest[] = { "DSA Signature Digest" };
+
+    /* DSA Known Signature (320-bits). */
+    static const PRUint8 dsa_known_signature[] = {
+        0x25, 0x7c, 0x3a, 0x79, 0x32, 0x45, 0xb7, 0x32,
+        0x70, 0xca, 0x62, 0x63, 0x2b, 0xf6, 0x29, 0x2c,
+        0x22, 0x2a, 0x03, 0xce, 0x48, 0x15, 0x11, 0x72,
+        0x7b, 0x7e, 0xf5, 0x7a, 0xf3, 0x10, 0x3b, 0xde,
+        0x34, 0xc1, 0x9e, 0xd7, 0x27, 0x9e, 0x77, 0x38
+    };
+
+    /* DSA variables. */
+    DSAPrivateKey *dsa_private_key;
+    SECStatus dsa_status;
+    SECItem dsa_signature_item;
+    SECItem dsa_digest_item;
+    DSAPublicKey dsa_public_key;
+    PRUint8 dsa_computed_signature[FIPS_DSA_SIGNATURE_LENGTH];
+    static const PQGParams dsa_pqg = {
+        NULL,
+        { FIPS_DSA_TYPE, (unsigned char *)dsa_P, FIPS_DSA_PRIME_LENGTH },
+        { FIPS_DSA_TYPE, (unsigned char *)dsa_Q, FIPS_DSA_SUBPRIME_LENGTH },
+        { FIPS_DSA_TYPE, (unsigned char *)dsa_G, FIPS_DSA_BASE_LENGTH }
+    };
+
+    /*******************************************/
+    /* Generate a DSA public/private key pair. */
+    /*******************************************/
+
+    /* Generate a DSA public/private key pair. */
+    dsa_status = DSA_NewKeyFromSeed(&dsa_pqg, dsa_known_random_key_block,
+                                    &dsa_private_key);
+
+    if (dsa_status != SECSuccess) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return (SECFailure);
+    }
+
+    /* construct public key from private key. */
+    dsa_public_key.params = dsa_private_key->params;
+    dsa_public_key.publicValue = dsa_private_key->publicValue;
+
+    /*************************************************/
+    /* DSA Single-Round Known Answer Signature Test. */
+    /*************************************************/
+
+    dsa_signature_item.data = dsa_computed_signature;
+    dsa_signature_item.len = sizeof dsa_computed_signature;
+
+    dsa_digest_item.data = (unsigned char *)dsa_known_digest;
+    dsa_digest_item.len = SHA1_LENGTH;
+
+    /* Perform DSA signature process. */
+    dsa_status = DSA_SignDigestWithSeed(dsa_private_key,
+                                        &dsa_signature_item,
+                                        &dsa_digest_item,
+                                        dsa_known_random_signature_block);
+
+    if ((dsa_status != SECSuccess) ||
+        (dsa_signature_item.len != FIPS_DSA_SIGNATURE_LENGTH) ||
+        (PORT_Memcmp(dsa_computed_signature, dsa_known_signature,
+                     FIPS_DSA_SIGNATURE_LENGTH) != 0)) {
+        dsa_status = SECFailure;
+    } else {
+
+        /****************************************************/
+        /* DSA Single-Round Known Answer Verification Test. */
+        /****************************************************/
+
+        /* Perform DSA verification process. */
+        dsa_status = DSA_VerifyDigest(&dsa_public_key,
+                                      &dsa_signature_item,
+                                      &dsa_digest_item);
+    }
+
+    PORT_FreeArena(dsa_private_key->params.arena, PR_TRUE);
+    /* Don't free public key, it uses same arena as private key */
+
+    /* Verify DSA signature. */
+    if (dsa_status != SECSuccess) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+
+    return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_RNG_PowerUpSelfTest(void)
+{
+    static const PRUint8 Q[] = {
+        0x85, 0x89, 0x9c, 0x77, 0xa3, 0x79, 0xff, 0x1a,
+        0x86, 0x6f, 0x2f, 0x3e, 0x2e, 0xf9, 0x8c, 0x9c,
+        0x9d, 0xef, 0xeb, 0xed
+    };
+    static const PRUint8 GENX[] = {
+        0x65, 0x48, 0xe3, 0xca, 0xac, 0x64, 0x2d, 0xf7,
+        0x7b, 0xd3, 0x4e, 0x79, 0xc9, 0x7d, 0xa6, 0xa8,
+        0xa2, 0xc2, 0x1f, 0x8f, 0xe9, 0xb9, 0xd3, 0xa1,
+        0x3f, 0xf7, 0x0c, 0xcd, 0xa6, 0xca, 0xbf, 0xce,
+        0x84, 0x0e, 0xb6, 0xf1, 0x0d, 0xbe, 0xa9, 0xa3
+    };
+    static const PRUint8 rng_known_DSAX[] = {
+        0x7a, 0x86, 0xf1, 0x7f, 0xbd, 0x4e, 0x6e, 0xd9,
+        0x0a, 0x26, 0x21, 0xd0, 0x19, 0xcb, 0x86, 0x73,
+        0x10, 0x1f, 0x60, 0xd7
+    };
+
+    SECStatus rng_status = SECSuccess;
+    PRUint8 DSAX[FIPS_DSA_SUBPRIME_LENGTH];
+
+    /*******************************************/
+    /*   Run the SP 800-90 Health tests        */
+    /*******************************************/
+    rng_status = PRNGTEST_RunHealthTests();
+    if (rng_status != SECSuccess) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+
+    /*******************************************/
+    /* Generate DSAX fow given Q.              */
+    /*******************************************/
+
+    rng_status = FIPS186Change_ReduceModQForDSA(GENX, Q, DSAX);
+
+    /* Verify DSAX to perform the RNG integrity check */
+    if ((rng_status != SECSuccess) ||
+        (PORT_Memcmp(DSAX, rng_known_DSAX,
+                     (FIPS_DSA_SUBPRIME_LENGTH)) != 0)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+
+    return (SECSuccess);
+}
+
+static SECStatus
+freebl_fipsSoftwareIntegrityTest(const char *libname)
+{
+    SECStatus rv = SECSuccess;
+
+    /* make sure that our check file signatures are OK */
+    if (!BLAPI_VerifySelf(libname)) {
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+#define DO_FREEBL 1
+#define DO_REST 2
+
+static SECStatus
+freebl_fipsPowerUpSelfTest(unsigned int tests)
+{
+    SECStatus rv;
+
+    /*
+     * stand alone freebl. Test hash, and rng
+     */
+    if (tests & DO_FREEBL) {
+
+        /* SHA-X Power-Up SelfTest(s). */
+        rv = freebl_fips_SHA_PowerUpSelfTest();
+
+        if (rv != SECSuccess)
+            return rv;
+
+        /* RNG Power-Up SelfTest(s). */
+        rv = freebl_fips_RNG_PowerUpSelfTest();
+
+        if (rv != SECSuccess)
+            return rv;
+    }
+
+    /*
+     * test the rest of the algorithms not accessed through freebl
+     * standalone */
+    if (tests & DO_REST) {
+
+        /* DES3 Power-Up SelfTest(s). */
+        rv = freebl_fips_DES3_PowerUpSelfTest();
+
+        if (rv != SECSuccess)
+            return rv;
+
+        /* AES Power-Up SelfTest(s) for 128-bit key. */
+        rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_128_KEY_SIZE);
+
+        if (rv != SECSuccess)
+            return rv;
+
+        /* AES Power-Up SelfTest(s) for 192-bit key. */
+        rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_192_KEY_SIZE);
+
+        if (rv != SECSuccess)
+            return rv;
+
+        /* AES Power-Up SelfTest(s) for 256-bit key. */
+        rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_256_KEY_SIZE);
+
+        if (rv != SECSuccess)
+            return rv;
+
+        /* HMAC SHA-X Power-Up SelfTest(s). */
+        rv = freebl_fips_HMAC_PowerUpSelfTest();
+
+        if (rv != SECSuccess)
+            return rv;
+
+        /* NOTE: RSA can only be tested in full freebl. It requires access to
+     * the locking primitives */
+        /* RSA Power-Up SelfTest(s). */
+        rv = freebl_fips_RSA_PowerUpSelfTest();
+
+        if (rv != SECSuccess)
+            return rv;
+
+        /* DSA Power-Up SelfTest(s). */
+        rv = freebl_fips_DSA_PowerUpSelfTest();
+
+        if (rv != SECSuccess)
+            return rv;
+
+#ifdef NSS_ENABLE_ECC
+        /* ECDSA Power-Up SelfTest(s). */
+        rv = freebl_fips_ECDSA_PowerUpSelfTest();
+
+        if (rv != SECSuccess)
+            return rv;
+#endif
+    }
+    /* Passed Power-Up SelfTest(s). */
+    return (SECSuccess);
+}
+
+/*
+ * state variables. NOTE: freebl has two uses: a standalone use which
+ * provided limitted access to the hash functions throught the NSSLOWHASH_
+ * interface and an joint use from softoken, using the function pointer
+ * table. The standalone use can operation without nspr or nss-util, while
+ * the joint use requires both to be loaded. Certain functions (like RSA)
+ * needs locking from NSPR, for instance.
+ *
+ * At load time, we need to handle the two uses separately. If nspr and
+ * nss-util  are loaded, then we can run all the selftests, but if nspr and
+ * nss-util are not loaded, then we can't run all the selftests, and we need
+ * to prevent the softoken function pointer table from operating until the
+ * libraries are loaded and we try to use them.
+ */
+static PRBool self_tests_freebl_ran = PR_FALSE;
+static PRBool self_tests_ran = PR_FALSE;
+static PRBool self_tests_freebl_success = PR_FALSE;
+static PRBool self_tests_success = PR_FALSE;
+#if defined(DEBUG)
+static PRBool fips_mode_available = PR_FALSE;
+#endif
+
+/*
+ * accessors for freebl
+ */
+PRBool
+BL_POSTRan(PRBool freebl_only)
+{
+    SECStatus rv;
+    /* if the freebl self tests didn't run, there is something wrong with
+     * our on load tests */
+    if (!self_tests_freebl_ran) {
+        return PR_FALSE;
+    }
+    /* if all the self tests have run, we are good */
+    if (self_tests_ran) {
+        return PR_TRUE;
+    }
+    /* if we only care about the freebl tests, we are good */
+    if (freebl_only) {
+        return PR_TRUE;
+    }
+    /* run the rest of the self tests */
+    /* We could get there if freebl was loaded without the rest of the support
+     * libraries, but now we want to use more than just a standalone freebl.
+     * This requires the other libraries to be loaded.
+     * If they are now loaded, Try to run the rest of the selftests,
+     * otherwise fail (disabling access to these algorithms)  */
+    self_tests_ran = PR_TRUE;
+    BL_Init();     /* required by RSA */
+    RNG_RNGInit(); /* required by RSA */
+    rv = freebl_fipsPowerUpSelfTest(DO_REST);
+    if (rv == SECSuccess) {
+        self_tests_success = PR_TRUE;
+    }
+    return PR_TRUE;
+}
+
+#include "blname.c"
+
+/*
+ * This function is called at dll load time, the code tha makes this
+ * happen is platform specific on defined above.
+ */
+static void
+bl_startup_tests(void)
+{
+    const char *libraryName;
+    PRBool freebl_only = PR_FALSE;
+    SECStatus rv;
+
+    PORT_Assert(self_tests_freebl_ran == PR_FALSE);
+    PORT_Assert(self_tests_success == PR_FALSE);
+    PORT_Assert(fips_mode_available == PR_FALSE);
+    self_tests_freebl_ran = PR_TRUE;      /* we are running the tests */
+    self_tests_success = PR_FALSE;        /* force it just in case */
+    self_tests_freebl_success = PR_FALSE; /* force it just in case */
+
+#ifdef FREEBL_NO_DEPEND
+    rv = FREEBL_InitStubs();
+    if (rv != SECSuccess) {
+        freebl_only = PR_TRUE;
+    }
+#endif
+
+    self_tests_freebl_ran = PR_TRUE; /* we are running the tests */
+
+    if (!freebl_only) {
+        self_tests_ran = PR_TRUE; /* we're running all the tests */
+        BL_Init();                /* needs to be called before RSA can be used */
+        RNG_RNGInit();
+    }
+
+    /* always run the post tests */
+    rv = freebl_fipsPowerUpSelfTest(freebl_only ? DO_FREEBL : DO_FREEBL | DO_REST);
+    if (rv != SECSuccess) {
+        return;
+    }
+
+    libraryName = getLibName();
+    rv = freebl_fipsSoftwareIntegrityTest(libraryName);
+    if (rv != SECSuccess) {
+        return;
+    }
+
+    /* posts are happy, allow the fips module to function now */
+    self_tests_freebl_success = PR_TRUE; /* we always test the freebl stuff */
+    if (!freebl_only) {
+        self_tests_success = PR_TRUE;
+    }
+}
+
+/*
+ * this is called from the freebl init entry points that controll access to
+ * all other freebl functions. This prevents freebl from operating if our
+ * power on selftest failed.
+ */
+SECStatus
+BL_FIPSEntryOK(PRBool freebl_only)
+{
+#ifdef NSS_NO_INIT_SUPPORT
+    /* this should only be set on platforms that can't handle one of the INIT
+    * schemes.  This code allows those platforms to continue to function,
+    * though they don't meet the strict NIST requirements. If NSS_NO_INIT_SUPPORT
+    * is not set, and init support has not been properly enabled, freebl
+    * will always fail because of the test below
+    */
+    if (!self_tests_freebl_ran) {
+        bl_startup_tests();
+    }
+#endif
+    /* if the general self tests succeeded, we're done */
+    if (self_tests_success) {
+        return SECSuccess;
+    }
+    /* standalone freebl can initialize */
+    if (freebl_only & self_tests_freebl_success) {
+        return SECSuccess;
+    }
+    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+    return SECFailure;
+}
diff --git a/security/nss/lib/freebl/freebl.def b/security/nss/lib/freebl/freebl.def
new file mode 100644
index 000000000..164c843fd
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.def
@@ -0,0 +1,26 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+#   1. For all unix platforms, the string ";-"  means "remove this line"
+;+#   2. For all unix platforms, the string " DATA " will be removed from any 
+;+#	line on which it occurs.
+;+#   3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+#      On AIX, lines containing ";+" will be removed.  
+;+#   4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+#   5. For all unix platforms, after the above processing has taken place,
+;+#    all characters after the first ";" on the line will be removed.  
+;+#    And for AIX, the first ";" will also be removed.
+;+#  This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+#   directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 {               # NSS 3.11 release
+;+    global:
+LIBRARY freebl3 ;-
+EXPORTS	;-
+FREEBL_GetVector;
+;+    local:
+;+       *;
+;+};
diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp
new file mode 100644
index 000000000..f5ae232ec
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.gyp
@@ -0,0 +1,408 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+{
+  'includes': [
+    '../../coreconf/config.gypi'
+  ],
+  'targets': [
+    {
+      'target_name': 'intel-gcm-wrap_c_lib',
+      'type': 'static_library',
+      'sources': [
+        'intel-gcm-wrap.c'
+      ],
+      'dependencies': [
+        '<(DEPTH)/exports.gyp:nss_exports'
+      ],
+      'cflags': [
+        '-mssse3'
+      ],
+      'cflags_mozilla': [
+        '-mssse3'
+      ]
+    },
+    {
+      'target_name': 'freebl',
+      'type': 'static_library',
+      'sources': [
+        'loader.c'
+      ],
+      'dependencies': [
+        '<(DEPTH)/exports.gyp:nss_exports'
+      ]
+    },
+    {
+      'target_name': '<(freebl_name)',
+      'type': 'shared_library',
+      'sources': [
+        'aeskeywrap.c',
+        'alg2268.c',
+        'alghmac.c',
+        'arcfive.c',
+        'arcfour.c',
+        'camellia.c',
+        'chacha20poly1305.c',
+        'ctr.c',
+        'cts.c',
+        'des.c',
+        'desblapi.c',
+        'dh.c',
+        'drbg.c',
+        'dsa.c',
+        'ec.c',
+        'ecdecode.c',
+        'ecl/ec_naf.c',
+        'ecl/ecl.c',
+        'ecl/ecl_curve.c',
+        'ecl/ecl_gf.c',
+        'ecl/ecl_mult.c',
+        'ecl/ecp_25519.c',
+        'ecl/ecp_256.c',
+        'ecl/ecp_256_32.c',
+        'ecl/ecp_384.c',
+        'ecl/ecp_521.c',
+        'ecl/ecp_aff.c',
+        'ecl/ecp_jac.c',
+        'ecl/ecp_jm.c',
+        'ecl/ecp_mont.c',
+        'fipsfreebl.c',
+        'freeblver.c',
+        'gcm.c',
+        'hmacct.c',
+        'jpake.c',
+        'ldvector.c',
+        'md2.c',
+        'md5.c',
+        'mpi/mp_gf2m.c',
+        'mpi/mpcpucache.c',
+        'mpi/mpi.c',
+        'mpi/mplogic.c',
+        'mpi/mpmontg.c',
+        'mpi/mpprime.c',
+        'pqg.c',
+        'rawhash.c',
+        'rijndael.c',
+        'rsa.c',
+        'rsapkcs.c',
+        'seed.c',
+        'sha512.c',
+        'sha_fast.c',
+        'shvfy.c',
+        'sysrand.c',
+        'tlsprfalg.c'
+      ],
+      'conditions': [
+        [ 'OS=="linux"', {
+          'sources': [
+            'nsslowhash.c',
+            'stubs.c',
+          ],
+          'conditions': [
+            [ 'test_build==1', {
+              'dependencies': [
+                '<(DEPTH)/lib/util/util.gyp:nssutil3',
+              ],
+            }],
+            [ 'target_arch=="x64"', {
+              'sources': [
+                'arcfour-amd64-gas.s',
+                'intel-aes.s',
+                'intel-gcm.s',
+                'mpi/mpi_amd64.c',
+                'mpi/mpi_amd64_gas.s',
+                'mpi/mp_comba.c',
+              ],
+              'dependencies': [
+                'intel-gcm-wrap_c_lib',
+              ],
+              'conditions': [
+                [ 'cc_is_clang==1', {
+                  'cflags': [
+                    '-no-integrated-as',
+                  ],
+                  'cflags_mozilla': [
+                    '-no-integrated-as',
+                  ],
+                  'asflags_mozilla': [
+                    '-no-integrated-as',
+                  ],
+                }],
+              ],
+            }],
+            [ 'target_arch=="ia32"', {
+              'sources': [
+                'mpi/mpi_x86.s',
+              ],
+            }],
+            [ 'target_arch=="arm"', {
+              'sources': [
+                'mpi/mpi_arm.c',
+              ],
+            }],
+          ],
+        }, {
+          # not Linux
+          'conditions': [
+            [ 'moz_fold_libs==0', {
+              'dependencies': [
+                '../util/util.gyp:nssutil3',
+              ],
+            }, {
+              'libraries': [
+                '<(moz_folded_library_name)',
+              ],
+            }],
+          ],
+        }],
+        [ 'OS=="win"', {
+          'sources': [
+            #TODO: building with mingw should not need this.
+            'ecl/uint128.c',
+            #TODO: clang-cl needs -msse3 here
+            'intel-gcm-wrap.c',
+          ],
+          'libraries': [
+            'advapi32.lib',
+          ],
+          'conditions': [
+            [ 'target_arch=="x64"', {
+              'sources': [
+                'arcfour-amd64-masm.asm',
+                'mpi/mpi_amd64.c',
+                'mpi/mpi_amd64_masm.asm',
+                'mpi/mp_comba_amd64_masm.asm',
+                'intel-aes-x64-masm.asm',
+                'intel-gcm-x64-masm.asm',
+              ],
+            }, {
+              # not x64
+              'sources': [
+                'mpi/mpi_x86_asm.c',
+                'intel-aes-x86-masm.asm',
+                'intel-gcm-x86-masm.asm',
+              ],
+            }],
+          ],
+        }],
+        ['target_arch=="ia32" or target_arch=="x64"', {
+          'sources': [
+            # All intel architectures get the 64 bit version
+            'ecl/curve25519_64.c',
+          ],
+        }, {
+          'sources': [
+            # All non intel architectures get the generic 32 bit implementation (slow!)
+            'ecl/curve25519_32.c',
+          ],
+        }],
+        #TODO uint128.c
+        [ 'disable_chachapoly==0', {
+          'conditions': [
+            [ 'OS!="win" and target_arch=="x64"', {
+              'sources': [
+                'chacha20_vec.c',
+                'poly1305-donna-x64-sse2-incremental-source.c',
+              ],
+            }, {
+              # not x64
+              'sources': [
+                'chacha20.c',
+                'poly1305.c',
+              ],
+            }],
+          ],
+        }],
+        [ 'fuzz==1', {
+          'sources': [
+            'det_rng.c',
+          ],
+          'defines': [
+            'UNSAFE_FUZZER_MODE',
+          ],
+        }],
+        [ 'test_build==1', {
+          'defines': [
+            'CT_VERIF',
+          ],
+        }],
+        [ 'OS=="mac"', {
+          'conditions': [
+            [ 'target_arch=="ia32"', {
+              'sources': [
+                'mpi/mpi_sse2.s',
+              ],
+              'defines': [
+                'MP_USE_UINT_DIGIT',
+                'MP_ASSEMBLY_MULTIPLY',
+                'MP_ASSEMBLY_SQUARE',
+                'MP_ASSEMBLY_DIV_2DX1D',
+              ],
+            }],
+          ],
+        }],
+      ],
+      'dependencies': [
+        '<(DEPTH)/exports.gyp:nss_exports',
+      ],
+      'variables': {
+       'conditions': [
+         [ 'OS=="linux"', {
+           'mapfile': 'freebl_hash_vector.def',
+         }, {
+           'mapfile': 'freebl.def',
+         }],
+       ]
+      },
+      'ldflags': [
+        '-Wl,-Bsymbolic'
+      ]
+    },
+  ],
+  'conditions': [
+    [ 'OS=="linux"', {
+      # stub build
+      'targets': [
+        {
+          'target_name': 'freebl3',
+          'type': 'shared_library',
+          'defines': [
+            'FREEBL_NO_DEPEND',
+          ],
+          'sources': [
+            'lowhash_vector.c'
+          ],
+          'dependencies': [
+            '<(DEPTH)/exports.gyp:nss_exports'
+          ],
+          'variables': {
+            'mapfile': 'freebl_hash.def'
+          }
+        },
+      ],
+    }],
+  ],
+  'target_defaults': {
+    'include_dirs': [
+      'mpi',
+      'ecl'
+    ],
+    'defines': [
+      'SHLIB_SUFFIX=\"<(dll_suffix)\"',
+      'SHLIB_PREFIX=\"<(dll_prefix)\"',
+      'SHLIB_VERSION=\"3\"',
+      'SOFTOKEN_SHLIB_VERSION=\"3\"',
+      'RIJNDAEL_INCLUDE_TABLES',
+      'MP_API_COMPATIBLE'
+    ],
+    'conditions': [
+      [ 'OS=="win" and target_arch=="ia32"', {
+        'msvs_settings': {
+          'VCCLCompilerTool': {
+            #TODO: -Ox optimize flags
+            'PreprocessorDefinitions': [
+              'NSS_X86_OR_X64',
+              'NSS_X86',
+              'MP_ASSEMBLY_MULTIPLY',
+              'MP_ASSEMBLY_SQUARE',
+              'MP_ASSEMBLY_DIV_2DX1D',
+              'MP_USE_UINT_DIGIT',
+              'MP_NO_MP_WORD',
+              'USE_HW_AES',
+              'INTEL_GCM',
+            ],
+          },
+        },
+      }],
+      [ 'OS=="win" and target_arch=="x64"', {
+        'msvs_settings': {
+          'VCCLCompilerTool': {
+            #TODO: -Ox optimize flags
+            'PreprocessorDefinitions': [
+              'NSS_USE_64',
+              'NSS_X86_OR_X64',
+              'NSS_X64',
+              'MP_IS_LITTLE_ENDIAN',
+              'NSS_BEVAND_ARCFOUR',
+              'MPI_AMD64',
+              'MP_ASSEMBLY_MULTIPLY',
+              'NSS_USE_COMBA',
+              'USE_HW_AES',
+              'INTEL_GCM',
+            ],
+          },
+        },
+      }],
+      [ 'OS!="win"', {
+        'conditions': [
+          [ 'target_arch=="x64"', {
+            'defines': [
+              'NSS_USE_64',
+              'NSS_X86_OR_X64',
+              'NSS_X64',
+              # The Makefile does version-tests on GCC, but we're not doing that here.
+              'HAVE_INT128_SUPPORT',
+            ],
+          }, {
+            'sources': [
+              'ecl/uint128.c',
+            ],
+          }],
+          [ 'target_arch=="ia32"', {
+            'defines': [
+              'NSS_X86_OR_X64',
+              'NSS_X86',
+            ],
+          }],
+        ],
+      }],
+      [ 'OS=="linux"', {
+        'defines': [
+          'FREEBL_LOWHASH',
+        ],
+        'conditions': [
+          [ 'test_build==0', {
+            'defines': [
+              'FREEBL_NO_DEPEND',
+            ],
+          }],
+          [ 'target_arch=="x64"', {
+            'defines': [
+              'MP_IS_LITTLE_ENDIAN',
+              'NSS_BEVAND_ARCFOUR',
+              'MPI_AMD64',
+              'MP_ASSEMBLY_MULTIPLY',
+              'NSS_USE_COMBA',
+            ],
+          }],
+          [ 'target_arch=="x64" and use_msan==0', {
+            'defines': [
+              'USE_HW_AES',
+              'INTEL_GCM',
+            ],
+          }],
+          [ 'target_arch=="ia32"', {
+            'defines': [
+              'MP_IS_LITTLE_ENDIAN',
+              'MP_ASSEMBLY_MULTIPLY',
+              'MP_ASSEMBLY_SQUARE',
+              'MP_ASSEMBLY_DIV_2DX1D',
+              'MP_USE_UINT_DIGIT',
+            ],
+          }],
+          [ 'target_arch=="arm"', {
+            'defines': [
+              'MP_ASSEMBLY_MULTIPLY',
+              'MP_ASSEMBLY_SQUARE',
+              'MP_USE_UINT_DIGIT',
+              'SHA_NO_LONG_LONG',
+            ],
+          }],
+        ],
+      }],
+    ],
+  },
+  'variables': {
+    'module': 'nss',
+  }
+}
diff --git a/security/nss/lib/freebl/freebl.rc b/security/nss/lib/freebl/freebl.rc
new file mode 100644
index 000000000..444ae5d03
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.rc
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "softkver.h"
+#include <winver.h>
+
+#define MY_LIBNAME "freebl"
+#define MY_FILEDESCRIPTION "NSS freebl Library"
+
+#define STRINGIZE(x) #x
+#define STRINGIZE2(x) STRINGIZE(x)
+#define SOFTOKEN_VMAJOR_STR STRINGIZE2(SOFTOKEN_VMAJOR)
+
+#ifdef _DEBUG
+#define MY_DEBUG_STR " (debug)"
+#define MY_FILEFLAGS_1 VS_FF_DEBUG
+#else
+#define MY_DEBUG_STR ""
+#define MY_FILEFLAGS_1 0x0L
+#endif
+#if SOFTOKEN_BETA
+#define MY_FILEFLAGS_2 MY_FILEFLAGS_1|VS_FF_PRERELEASE
+#else
+#define MY_FILEFLAGS_2 MY_FILEFLAGS_1
+#endif
+
+#ifdef WINNT
+#define MY_FILEOS VOS_NT_WINDOWS32
+#else
+#define MY_FILEOS VOS__WINDOWS32
+#endif
+
+#define MY_INTERNAL_NAME MY_LIBNAME SOFTOKEN_VMAJOR_STR
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version-information resource
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD
+ PRODUCTVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+ FILEFLAGS MY_FILEFLAGS_2
+ FILEOS MY_FILEOS
+ FILETYPE VFT_DLL
+ FILESUBTYPE 0x0L // not used
+
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904B0" // Lang=US English, CharSet=Unicode
+        BEGIN
+            VALUE "CompanyName", "Mozilla Foundation\0"
+            VALUE "FileDescription", MY_FILEDESCRIPTION MY_DEBUG_STR "\0"
+            VALUE "FileVersion", SOFTOKEN_VERSION "\0"
+            VALUE "InternalName", MY_INTERNAL_NAME "\0"
+            VALUE "OriginalFilename", MY_INTERNAL_NAME ".dll\0"
+            VALUE "ProductName", "Network Security Services\0"
+            VALUE "ProductVersion", SOFTOKEN_VERSION "\0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
diff --git a/security/nss/lib/freebl/freebl_hash.def b/security/nss/lib/freebl/freebl_hash.def
new file mode 100644
index 000000000..9fd27367e
--- /dev/null
+++ b/security/nss/lib/freebl/freebl_hash.def
@@ -0,0 +1,39 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+#   1. For all unix platforms, the string ";-"  means "remove this line"
+;+#   2. For all unix platforms, the string " DATA " will be removed from any 
+;+#	line on which it occurs.
+;+#   3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+#      On AIX, lines containing ";+" will be removed.  
+;+#   4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+#   5. For all unix platforms, after the above processing has taken place,
+;+#    all characters after the first ";" on the line will be removed.  
+;+#    And for AIX, the first ";" will also be removed.
+;+#  This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+#   directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 {               # NSS 3.11 release
+;+    global:
+LIBRARY freebl3 ;-
+EXPORTS	;-
+FREEBL_GetVector;
+;+    local:
+;+       *;
+;+};
+;+NSSRAWHASH_3.12.3 {             # NSS 3.12.3 release
+;+    global:
+NSSLOW_Init;
+NSSLOW_Shutdown;
+NSSLOWHASH_Length;
+NSSLOWHASH_Begin;
+NSSLOWHASH_Destroy;
+NSSLOWHASH_End;
+NSSLOWHASH_NewContext;
+NSSLOWHASH_Update;
+;+    local:
+;+       *;
+;+};
diff --git a/security/nss/lib/freebl/freebl_hash_vector.def b/security/nss/lib/freebl/freebl_hash_vector.def
new file mode 100644
index 000000000..9d7d07d54
--- /dev/null
+++ b/security/nss/lib/freebl/freebl_hash_vector.def
@@ -0,0 +1,34 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+#   1. For all unix platforms, the string ";-"  means "remove this line"
+;+#   2. For all unix platforms, the string " DATA " will be removed from any 
+;+#	line on which it occurs.
+;+#   3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+#      On AIX, lines containing ";+" will be removed.  
+;+#   4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+#   5. For all unix platforms, after the above processing has taken place,
+;+#    all characters after the first ";" on the line will be removed.  
+;+#    And for AIX, the first ";" will also be removed.
+;+#  This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+#   directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 {               # NSS 3.11 release
+;+    global:
+LIBRARY freebl3 ;-
+EXPORTS	;-
+FREEBL_GetVector;
+;+    local:
+;+       *;
+;+};
+;+NSSprivate_3.16 {               # NSS 3.11 release
+;+    global:
+LIBRARY freebl3 ;-
+EXPORTS	;-
+NSSLOW_GetVector;
+;+    local:
+;+       *;
+;+};
diff --git a/security/nss/lib/freebl/freeblver.c b/security/nss/lib/freebl/freeblver.c
new file mode 100644
index 000000000..9136f0b0b
--- /dev/null
+++ b/security/nss/lib/freebl/freeblver.c
@@ -0,0 +1,18 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Library identity and versioning */
+
+#include "softkver.h"
+
+#if defined(DEBUG)
+#define _DEBUG_STRING " (debug)"
+#else
+#define _DEBUG_STRING ""
+#endif
+
+/*
+ * Version information
+ */
+const char __nss_freebl_version[] = "Version: NSS " SOFTOKEN_VERSION _DEBUG_STRING;
diff --git a/security/nss/lib/freebl/gcm.c b/security/nss/lib/freebl/gcm.c
new file mode 100644
index 000000000..22121001b
--- /dev/null
+++ b/security/nss/lib/freebl/gcm.c
@@ -0,0 +1,860 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "blapii.h"
+#include "blapit.h"
+#include "gcm.h"
+#include "ctr.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "pkcs11t.h"
+
+#include <limits.h>
+
+/**************************************************************************
+ *          First implement the Galois hash function of GCM (gcmHash)     *
+ **************************************************************************/
+#define GCM_HASH_LEN_LEN 8 /* gcm hash defines lengths to be 64 bits */
+
+typedef struct gcmHashContextStr gcmHashContext;
+
+static SECStatus gcmHash_InitContext(gcmHashContext *hash,
+                                     const unsigned char *H,
+                                     unsigned int blocksize);
+static void gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit);
+static SECStatus gcmHash_Update(gcmHashContext *ghash,
+                                const unsigned char *buf, unsigned int len,
+                                unsigned int blocksize);
+static SECStatus gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize);
+static SECStatus gcmHash_Final(gcmHashContext *gcm, unsigned char *outbuf,
+                               unsigned int *outlen, unsigned int maxout,
+                               unsigned int blocksize);
+static SECStatus gcmHash_Reset(gcmHashContext *ghash,
+                               const unsigned char *inbuf,
+                               unsigned int inbufLen, unsigned int blocksize);
+
+/* compile time defines to select how the GF2 multiply is calculated.
+ * There are currently 2 algorithms implemented here: MPI and ALGORITHM_1.
+ *
+ * MPI uses the GF2m implemented in mpi to support GF2 ECC.
+ * ALGORITHM_1 is the Algorithm 1 in both NIST SP 800-38D and
+ * "The Galois/Counter Mode of Operation (GCM)", McGrew & Viega.
+ */
+#if !defined(GCM_USE_ALGORITHM_1) && !defined(GCM_USE_MPI)
+#define GCM_USE_MPI 1 /* MPI is about 5x faster with the               \
+                       * same or less complexity. It's possible to use \
+                       * tables to speed things up even more */
+#endif
+
+/* GCM defines the bit string to be LSB first, which is exactly
+ * opposite everyone else, including hardware. build array
+ * to reverse everything. */
+static const unsigned char gcm_byte_rev[256] = {
+    0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+    0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+    0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+    0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+    0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+    0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+    0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+    0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+    0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+    0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+    0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+    0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+    0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+    0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+    0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+    0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+    0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+    0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+    0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+    0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+    0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+    0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+    0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+    0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+    0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+    0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+    0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+    0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+    0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+    0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+    0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+    0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
+};
+
+#ifdef GCM_TRACE
+#include <stdio.h>
+
+#define GCM_TRACE_X(ghash, label)         \
+    {                                     \
+        unsigned char _X[MAX_BLOCK_SIZE]; \
+        int i;                            \
+        gcm_getX(ghash, _X, blocksize);   \
+        printf(label, (ghash)->m);        \
+        for (i = 0; i < blocksize; i++)   \
+            printf("%02x", _X[i]);        \
+        printf("\n");                     \
+    }
+#define GCM_TRACE_BLOCK(label, buf, blocksize) \
+    {                                          \
+        printf(label);                         \
+        for (i = 0; i < blocksize; i++)        \
+            printf("%02x", buf[i]);            \
+        printf("\n");                          \
+    }
+#else
+#define GCM_TRACE_X(ghash, label)
+#define GCM_TRACE_BLOCK(label, buf, blocksize)
+#endif
+
+#ifdef GCM_USE_MPI
+
+#ifdef GCM_USE_ALGORITHM_1
+#error "Only define one of GCM_USE_MPI, GCM_USE_ALGORITHM_1"
+#endif
+/* use the MPI functions to calculate Xn = (Xn-1^C_i)*H mod poly */
+#include "mpi.h"
+#include "secmpi.h"
+#include "mplogic.h"
+#include "mp_gf2m.h"
+
+/* state needed to handle GCM Hash function */
+struct gcmHashContextStr {
+    mp_int H;
+    mp_int X;
+    mp_int C_i;
+    const unsigned int *poly;
+    unsigned char buffer[MAX_BLOCK_SIZE];
+    unsigned int bufLen;
+    int m; /* XXX what is m? */
+    unsigned char counterBuf[2 * GCM_HASH_LEN_LEN];
+    PRUint64 cLen;
+};
+
+/* f = x^128 + x^7 + x^2 + x + 1 */
+static const unsigned int poly_128[] = { 128, 7, 2, 1, 0 };
+
+/* sigh, GCM defines the bit strings exactly backwards from everything else */
+static void
+gcm_reverse(unsigned char *target, const unsigned char *src,
+            unsigned int blocksize)
+{
+    unsigned int i;
+    for (i = 0; i < blocksize; i++) {
+        target[blocksize - i - 1] = gcm_byte_rev[src[i]];
+    }
+}
+
+/* Initialize a gcmHashContext */
+static SECStatus
+gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H,
+                    unsigned int blocksize)
+{
+    mp_err err = MP_OKAY;
+    unsigned char H_rev[MAX_BLOCK_SIZE];
+
+    MP_DIGITS(&ghash->H) = 0;
+    MP_DIGITS(&ghash->X) = 0;
+    MP_DIGITS(&ghash->C_i) = 0;
+    CHECK_MPI_OK(mp_init(&ghash->H));
+    CHECK_MPI_OK(mp_init(&ghash->X));
+    CHECK_MPI_OK(mp_init(&ghash->C_i));
+
+    mp_zero(&ghash->X);
+    gcm_reverse(H_rev, H, blocksize);
+    CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->H, H_rev, blocksize));
+
+    /* set the irreducible polynomial. Each blocksize has its own polynomial.
+     * for now only blocksize 16 (=128 bits) is defined */
+    switch (blocksize) {
+        case 16: /* 128 bits */
+            ghash->poly = poly_128;
+            break;
+        default:
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            goto cleanup;
+    }
+    ghash->cLen = 0;
+    ghash->bufLen = 0;
+    ghash->m = 0;
+    PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
+    return SECSuccess;
+cleanup:
+    gcmHash_DestroyContext(ghash, PR_FALSE);
+    return SECFailure;
+}
+
+/* Destroy a HashContext (Note we zero the digits so this function
+ * is idempotent if called with freeit == PR_FALSE */
+static void
+gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit)
+{
+    mp_clear(&ghash->H);
+    mp_clear(&ghash->X);
+    mp_clear(&ghash->C_i);
+    PORT_Memset(ghash, 0, sizeof(gcmHashContext));
+    if (freeit) {
+        PORT_Free(ghash);
+    }
+}
+
+static SECStatus
+gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize)
+{
+    int len;
+    mp_err err;
+    unsigned char tmp_buf[MAX_BLOCK_SIZE];
+    unsigned char *X;
+
+    len = mp_unsigned_octet_size(&ghash->X);
+    if (len <= 0) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    X = tmp_buf;
+    PORT_Assert((unsigned int)len <= blocksize);
+    if ((unsigned int)len > blocksize) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    /* zero pad the result */
+    if (len != blocksize) {
+        PORT_Memset(X, 0, blocksize - len);
+        X += blocksize - len;
+    }
+
+    err = mp_to_unsigned_octets(&ghash->X, X, len);
+    if (err < 0) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    gcm_reverse(T, tmp_buf, blocksize);
+    return SECSuccess;
+}
+
+static SECStatus
+gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf,
+             unsigned int count, unsigned int blocksize)
+{
+    SECStatus rv = SECFailure;
+    mp_err err = MP_OKAY;
+    unsigned char tmp_buf[MAX_BLOCK_SIZE];
+    unsigned int i;
+
+    for (i = 0; i < count; i++, buf += blocksize) {
+        ghash->m++;
+        gcm_reverse(tmp_buf, buf, blocksize);
+        CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->C_i, tmp_buf, blocksize));
+        CHECK_MPI_OK(mp_badd(&ghash->X, &ghash->C_i, &ghash->C_i));
+        /*
+         * Looking to speed up GCM, this the the place to do it.
+         * There are two areas that can be exploited to speed up this code.
+         *
+         * 1) H is a constant in this multiply. We can precompute H * (0 - 255)
+         * at init time and this becomes an blockize xors of our table lookup.
+         *
+         * 2) poly is a constant for each blocksize. We can calculate the
+         * modulo reduction by a series of adds and shifts.
+         *
+         * For now we are after functionality, so we will go ahead and use
+         * the builtin bmulmod from mpi
+         */
+        CHECK_MPI_OK(mp_bmulmod(&ghash->C_i, &ghash->H,
+                                ghash->poly, &ghash->X));
+        GCM_TRACE_X(ghash, "X%d = ")
+    }
+    rv = SECSuccess;
+cleanup:
+    PORT_Memset(tmp_buf, 0, sizeof(tmp_buf));
+    if (rv != SECSuccess) {
+        MP_TO_SEC_ERROR(err);
+    }
+    return rv;
+}
+
+static void
+gcm_zeroX(gcmHashContext *ghash)
+{
+    mp_zero(&ghash->X);
+    ghash->m = 0;
+}
+
+#endif
+
+#ifdef GCM_USE_ALGORITHM_1
+/* use algorithm 1 of McGrew & Viega "The Galois/Counter Mode of Operation" */
+
+#define GCM_ARRAY_SIZE (MAX_BLOCK_SIZE / sizeof(unsigned long))
+
+struct gcmHashContextStr {
+    unsigned long H[GCM_ARRAY_SIZE];
+    unsigned long X[GCM_ARRAY_SIZE];
+    unsigned long R;
+    unsigned char buffer[MAX_BLOCK_SIZE];
+    unsigned int bufLen;
+    int m;
+    unsigned char counterBuf[2 * GCM_HASH_LEN_LEN];
+    PRUint64 cLen;
+};
+
+static void
+gcm_bytes_to_longs(unsigned long *l, const unsigned char *c, unsigned int len)
+{
+    int i, j;
+    int array_size = len / sizeof(unsigned long);
+
+    PORT_Assert(len % sizeof(unsigned long) == 0);
+    for (i = 0; i < array_size; i++) {
+        unsigned long tmp = 0;
+        int byte_offset = i * sizeof(unsigned long);
+        for (j = sizeof(unsigned long) - 1; j >= 0; j--) {
+            tmp = (tmp << PR_BITS_PER_BYTE) | gcm_byte_rev[c[byte_offset + j]];
+        }
+        l[i] = tmp;
+    }
+}
+
+static void
+gcm_longs_to_bytes(const unsigned long *l, unsigned char *c, unsigned int len)
+{
+    int i, j;
+    int array_size = len / sizeof(unsigned long);
+
+    PORT_Assert(len % sizeof(unsigned long) == 0);
+    for (i = 0; i < array_size; i++) {
+        unsigned long tmp = l[i];
+        int byte_offset = i * sizeof(unsigned long);
+        for (j = 0; j < sizeof(unsigned long); j++) {
+            c[byte_offset + j] = gcm_byte_rev[tmp & 0xff];
+            tmp = (tmp >> PR_BITS_PER_BYTE);
+        }
+    }
+}
+
+/* Initialize a gcmHashContext */
+static SECStatus
+gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H,
+                    unsigned int blocksize)
+{
+    PORT_Memset(ghash->X, 0, sizeof(ghash->X));
+    PORT_Memset(ghash->H, 0, sizeof(ghash->H));
+    gcm_bytes_to_longs(ghash->H, H, blocksize);
+
+    /* set the irreducible polynomial. Each blocksize has its own polynommial
+     * for now only blocksize 16 (=128 bits) is defined */
+    switch (blocksize) {
+        case 16:                            /* 128 bits */
+            ghash->R = (unsigned long)0x87; /* x^7 + x^2 + x +1 */
+            break;
+        default:
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            goto cleanup;
+    }
+    ghash->cLen = 0;
+    ghash->bufLen = 0;
+    ghash->m = 0;
+    PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
+    return SECSuccess;
+cleanup:
+    return SECFailure;
+}
+
+/* Destroy a HashContext (Note we zero the digits so this function
+ * is idempotent if called with freeit == PR_FALSE */
+static void
+gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit)
+{
+    PORT_Memset(ghash, 0, sizeof(gcmHashContext));
+    if (freeit) {
+        PORT_Free(ghash);
+    }
+}
+
+static unsigned long
+gcm_shift_one(unsigned long *t, unsigned int count)
+{
+    unsigned long carry = 0;
+    unsigned long nextcarry = 0;
+    unsigned int i;
+    for (i = 0; i < count; i++) {
+        nextcarry = t[i] >> ((sizeof(unsigned long) * PR_BITS_PER_BYTE) - 1);
+        t[i] = (t[i] << 1) | carry;
+        carry = nextcarry;
+    }
+    return carry;
+}
+
+static SECStatus
+gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize)
+{
+    gcm_longs_to_bytes(ghash->X, T, blocksize);
+    return SECSuccess;
+}
+
+#define GCM_XOR(t, s, len)    \
+    for (l = 0; l < len; l++) \
+    t[l] ^= s[l]
+
+static SECStatus
+gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf,
+             unsigned int count, unsigned int blocksize)
+{
+    unsigned long C_i[GCM_ARRAY_SIZE];
+    unsigned int arraysize = blocksize / sizeof(unsigned long);
+    unsigned int i, j, k, l;
+
+    for (i = 0; i < count; i++, buf += blocksize) {
+        ghash->m++;
+        gcm_bytes_to_longs(C_i, buf, blocksize);
+        GCM_XOR(C_i, ghash->X, arraysize);
+        /* multiply X = C_i * H */
+        PORT_Memset(ghash->X, 0, sizeof(ghash->X));
+        for (j = 0; j < arraysize; j++) {
+            unsigned long H = ghash->H[j];
+            for (k = 0; k < sizeof(unsigned long) * PR_BITS_PER_BYTE; k++) {
+                if (H & 1) {
+                    GCM_XOR(ghash->X, C_i, arraysize);
+                }
+                if (gcm_shift_one(C_i, arraysize)) {
+                    C_i[0] = C_i[0] ^ ghash->R;
+                }
+                H = H >> 1;
+            }
+        }
+        GCM_TRACE_X(ghash, "X%d = ")
+    }
+    PORT_Memset(C_i, 0, sizeof(C_i));
+    return SECSuccess;
+}
+
+static void
+gcm_zeroX(gcmHashContext *ghash)
+{
+    PORT_Memset(ghash->X, 0, sizeof(ghash->X));
+    ghash->m = 0;
+}
+#endif
+
+/*
+ * implement GCM GHASH using the freebl GHASH function. The gcm_HashMult
+ * function always takes blocksize lengths of data. gcmHash_Update will
+ * format the data properly.
+ */
+static SECStatus
+gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf,
+               unsigned int len, unsigned int blocksize)
+{
+    unsigned int blocks;
+    SECStatus rv;
+
+    ghash->cLen += (len * PR_BITS_PER_BYTE);
+
+    /* first deal with the current buffer of data. Try to fill it out so
+     * we can hash it */
+    if (ghash->bufLen) {
+        unsigned int needed = PR_MIN(len, blocksize - ghash->bufLen);
+        if (needed != 0) {
+            PORT_Memcpy(ghash->buffer + ghash->bufLen, buf, needed);
+        }
+        buf += needed;
+        len -= needed;
+        ghash->bufLen += needed;
+        if (len == 0) {
+            /* didn't add enough to hash the data, nothing more do do */
+            return SECSuccess;
+        }
+        PORT_Assert(ghash->bufLen == blocksize);
+        /* hash the buffer and clear it */
+        rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize);
+        PORT_Memset(ghash->buffer, 0, blocksize);
+        ghash->bufLen = 0;
+        if (rv != SECSuccess) {
+            return SECFailure;
+        }
+    }
+    /* now hash any full blocks remaining in the data stream */
+    blocks = len / blocksize;
+    if (blocks) {
+        rv = gcm_HashMult(ghash, buf, blocks, blocksize);
+        if (rv != SECSuccess) {
+            return SECFailure;
+        }
+        buf += blocks * blocksize;
+        len -= blocks * blocksize;
+    }
+
+    /* save any remainder in the buffer to be hashed with the next call */
+    if (len != 0) {
+        PORT_Memcpy(ghash->buffer, buf, len);
+        ghash->bufLen = len;
+    }
+    return SECSuccess;
+}
+
+/*
+ * write out any partial blocks zero padded through the GHASH engine,
+ * save the lengths for the final completion of the hash
+ */
+static SECStatus
+gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize)
+{
+    int i;
+    SECStatus rv;
+
+    /* copy the previous counter to the upper block */
+    PORT_Memcpy(ghash->counterBuf, &ghash->counterBuf[GCM_HASH_LEN_LEN],
+                GCM_HASH_LEN_LEN);
+    /* copy the current counter in the lower block */
+    for (i = 0; i < GCM_HASH_LEN_LEN; i++) {
+        ghash->counterBuf[GCM_HASH_LEN_LEN + i] =
+            (ghash->cLen >> ((GCM_HASH_LEN_LEN - 1 - i) * PR_BITS_PER_BYTE)) & 0xff;
+    }
+    ghash->cLen = 0;
+
+    /* now zero fill the buffer and hash the last block */
+    if (ghash->bufLen) {
+        PORT_Memset(ghash->buffer + ghash->bufLen, 0, blocksize - ghash->bufLen);
+        rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize);
+        PORT_Memset(ghash->buffer, 0, blocksize);
+        ghash->bufLen = 0;
+        if (rv != SECSuccess) {
+            return SECFailure;
+        }
+    }
+    return SECSuccess;
+}
+
+/*
+ * This does the final sync, hashes the lengths, then returns
+ * "T", the hashed output.
+ */
+static SECStatus
+gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf,
+              unsigned int *outlen, unsigned int maxout,
+              unsigned int blocksize)
+{
+    unsigned char T[MAX_BLOCK_SIZE];
+    SECStatus rv;
+
+    rv = gcmHash_Sync(ghash, blocksize);
+    if (rv != SECSuccess) {
+        goto cleanup;
+    }
+
+    rv = gcm_HashMult(ghash, ghash->counterBuf, (GCM_HASH_LEN_LEN * 2) / blocksize,
+                      blocksize);
+    if (rv != SECSuccess) {
+        goto cleanup;
+    }
+
+    GCM_TRACE_X(ghash, "GHASH(H,A,C) = ")
+
+    rv = gcm_getX(ghash, T, blocksize);
+    if (rv != SECSuccess) {
+        goto cleanup;
+    }
+
+    if (maxout > blocksize)
+        maxout = blocksize;
+    PORT_Memcpy(outbuf, T, maxout);
+    *outlen = maxout;
+    rv = SECSuccess;
+
+cleanup:
+    PORT_Memset(T, 0, sizeof(T));
+    return rv;
+}
+
+SECStatus
+gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD,
+              unsigned int AADLen, unsigned int blocksize)
+{
+    SECStatus rv;
+
+    ghash->cLen = 0;
+    PORT_Memset(ghash->counterBuf, 0, GCM_HASH_LEN_LEN * 2);
+    ghash->bufLen = 0;
+    gcm_zeroX(ghash);
+
+    /* now kick things off by hashing the Additional Authenticated Data */
+    if (AADLen != 0) {
+        rv = gcmHash_Update(ghash, AAD, AADLen, blocksize);
+        if (rv != SECSuccess) {
+            return SECFailure;
+        }
+        rv = gcmHash_Sync(ghash, blocksize);
+        if (rv != SECSuccess) {
+            return SECFailure;
+        }
+    }
+    return SECSuccess;
+}
+
+/**************************************************************************
+ *           Now implement the GCM using gcmHash and CTR                  *
+ **************************************************************************/
+
+/* state to handle the full GCM operation (hash and counter) */
+struct GCMContextStr {
+    gcmHashContext ghash_context;
+    CTRContext ctr_context;
+    unsigned long tagBits;
+    unsigned char tagKey[MAX_BLOCK_SIZE];
+};
+
+GCMContext *
+GCM_CreateContext(void *context, freeblCipherFunc cipher,
+                  const unsigned char *params, unsigned int blocksize)
+{
+    GCMContext *gcm = NULL;
+    gcmHashContext *ghash;
+    unsigned char H[MAX_BLOCK_SIZE];
+    unsigned int tmp;
+    PRBool freeCtr = PR_FALSE;
+    PRBool freeHash = PR_FALSE;
+    const CK_GCM_PARAMS *gcmParams = (const CK_GCM_PARAMS *)params;
+    CK_AES_CTR_PARAMS ctrParams;
+    SECStatus rv;
+
+    if (blocksize > MAX_BLOCK_SIZE || blocksize > sizeof(ctrParams.cb)) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return NULL;
+    }
+    gcm = PORT_ZNew(GCMContext);
+    if (gcm == NULL) {
+        return NULL;
+    }
+    /* first fill in the ghash context */
+    ghash = &gcm->ghash_context;
+    PORT_Memset(H, 0, blocksize);
+    rv = (*cipher)(context, H, &tmp, blocksize, H, blocksize, blocksize);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+    rv = gcmHash_InitContext(ghash, H, blocksize);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+    freeHash = PR_TRUE;
+
+    /* fill in the Counter context */
+    ctrParams.ulCounterBits = 32;
+    PORT_Memset(ctrParams.cb, 0, sizeof(ctrParams.cb));
+    if ((blocksize == 16) && (gcmParams->ulIvLen == 12)) {
+        PORT_Memcpy(ctrParams.cb, gcmParams->pIv, gcmParams->ulIvLen);
+        ctrParams.cb[blocksize - 1] = 1;
+    } else {
+        rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen,
+                            blocksize);
+        if (rv != SECSuccess) {
+            goto loser;
+        }
+        rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, blocksize, blocksize);
+        if (rv != SECSuccess) {
+            goto loser;
+        }
+    }
+    rv = CTR_InitContext(&gcm->ctr_context, context, cipher,
+                         (unsigned char *)&ctrParams, blocksize);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+    freeCtr = PR_TRUE;
+
+    /* fill in the gcm structure */
+    gcm->tagBits = gcmParams->ulTagBits; /* save for final step */
+    /* calculate the final tag key. NOTE: gcm->tagKey is zero to start with.
+     * if this assumption changes, we would need to explicitly clear it here */
+    rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, blocksize,
+                    gcm->tagKey, blocksize, blocksize);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+
+    /* finally mix in the AAD data */
+    rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen, blocksize);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+
+    return gcm;
+
+loser:
+    if (freeCtr) {
+        CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+    }
+    if (freeHash) {
+        gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE);
+    }
+    if (gcm) {
+        PORT_Free(gcm);
+    }
+    return NULL;
+}
+
+void
+GCM_DestroyContext(GCMContext *gcm, PRBool freeit)
+{
+    /* these two are statically allocated and will be freed when we free
+     * gcm. call their destroy functions to free up any locally
+     * allocated data (like mp_int's) */
+    CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+    gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE);
+    PORT_Memset(&gcm->tagBits, 0, sizeof(gcm->tagBits));
+    PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey));
+    if (freeit) {
+        PORT_Free(gcm);
+    }
+}
+
+static SECStatus
+gcm_GetTag(GCMContext *gcm, unsigned char *outbuf,
+           unsigned int *outlen, unsigned int maxout,
+           unsigned int blocksize)
+{
+    unsigned int tagBytes;
+    unsigned int extra;
+    unsigned int i;
+    SECStatus rv;
+
+    tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+    extra = tagBytes * PR_BITS_PER_BYTE - gcm->tagBits;
+
+    if (outbuf == NULL) {
+        *outlen = tagBytes;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    if (maxout < tagBytes) {
+        *outlen = tagBytes;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    maxout = tagBytes;
+    rv = gcmHash_Final(&gcm->ghash_context, outbuf, outlen, maxout, blocksize);
+    if (rv != SECSuccess) {
+        return SECFailure;
+    }
+
+    GCM_TRACE_BLOCK("GHASH=", outbuf, blocksize);
+    GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize);
+    for (i = 0; i < *outlen; i++) {
+        outbuf[i] ^= gcm->tagKey[i];
+    }
+    GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize);
+    GCM_TRACE_BLOCK("T=", outbuf, blocksize);
+    /* mask off any extra bits we got */
+    if (extra) {
+        outbuf[tagBytes - 1] &= ~((1 << extra) - 1);
+    }
+    return SECSuccess;
+}
+
+/*
+ * See The Galois/Counter Mode of Operation, McGrew and Viega.
+ *  GCM is basically counter mode with a specific initialization and
+ *  built in macing operation.
+ */
+SECStatus
+GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+                  unsigned int *outlen, unsigned int maxout,
+                  const unsigned char *inbuf, unsigned int inlen,
+                  unsigned int blocksize)
+{
+    SECStatus rv;
+    unsigned int tagBytes;
+    unsigned int len;
+
+    tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+    if (UINT_MAX - inlen < tagBytes) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    if (maxout < inlen + tagBytes) {
+        *outlen = inlen + tagBytes;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
+                    inbuf, inlen, blocksize);
+    if (rv != SECSuccess) {
+        return SECFailure;
+    }
+    rv = gcmHash_Update(&gcm->ghash_context, outbuf, *outlen, blocksize);
+    if (rv != SECSuccess) {
+        PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
+        *outlen = 0;
+        return SECFailure;
+    }
+    rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen, blocksize);
+    if (rv != SECSuccess) {
+        PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
+        *outlen = 0;
+        return SECFailure;
+    };
+    *outlen += len;
+    return SECSuccess;
+}
+
+/*
+ * See The Galois/Counter Mode of Operation, McGrew and Viega.
+ *  GCM is basically counter mode with a specific initialization and
+ *  built in macing operation. NOTE: the only difference between Encrypt
+ *  and Decrypt is when we calculate the mac. That is because the mac must
+ *  always be calculated on the cipher text, not the plain text, so for
+ *  encrypt, we do the CTR update first and for decrypt we do the mac first.
+ */
+SECStatus
+GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+                  unsigned int *outlen, unsigned int maxout,
+                  const unsigned char *inbuf, unsigned int inlen,
+                  unsigned int blocksize)
+{
+    SECStatus rv;
+    unsigned int tagBytes;
+    unsigned char tag[MAX_BLOCK_SIZE];
+    const unsigned char *intag;
+    unsigned int len;
+
+    tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+    /* get the authentication block */
+    if (inlen < tagBytes) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+
+    inlen -= tagBytes;
+    intag = inbuf + inlen;
+
+    /* verify the block */
+    rv = gcmHash_Update(&gcm->ghash_context, inbuf, inlen, blocksize);
+    if (rv != SECSuccess) {
+        return SECFailure;
+    }
+    rv = gcm_GetTag(gcm, tag, &len, blocksize, blocksize);
+    if (rv != SECSuccess) {
+        return SECFailure;
+    }
+    /* Don't decrypt if we can't authenticate the encrypted data!
+     * This assumes that if tagBits is not a multiple of 8, intag will
+     * preserve the masked off missing bits.  */
+    if (NSS_SecureMemcmp(tag, intag, tagBytes) != 0) {
+        /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+        PORT_SetError(SEC_ERROR_BAD_DATA);
+        PORT_Memset(tag, 0, sizeof(tag));
+        return SECFailure;
+    }
+    PORT_Memset(tag, 0, sizeof(tag));
+    /* finish the decryption */
+    return CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
+                      inbuf, inlen, blocksize);
+}
diff --git a/security/nss/lib/freebl/gcm.h b/security/nss/lib/freebl/gcm.h
new file mode 100644
index 000000000..1cdba534d
--- /dev/null
+++ b/security/nss/lib/freebl/gcm.h
@@ -0,0 +1,31 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef GCM_H
+#define GCM_H 1
+
+#include "blapii.h"
+
+typedef struct GCMContextStr GCMContext;
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * GCMContext does not own context. context needs to remain valid for as long
+ * as the GCMContext is valid.
+ *
+ * The cipher argument is a block cipher in the ECB encrypt mode.
+ */
+GCMContext *GCM_CreateContext(void *context, freeblCipherFunc cipher,
+                              const unsigned char *params, unsigned int blocksize);
+void GCM_DestroyContext(GCMContext *gcm, PRBool freeit);
+SECStatus GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+                            unsigned int *outlen, unsigned int maxout,
+                            const unsigned char *inbuf, unsigned int inlen,
+                            unsigned int blocksize);
+SECStatus GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+                            unsigned int *outlen, unsigned int maxout,
+                            const unsigned char *inbuf, unsigned int inlen,
+                            unsigned int blocksize);
+
+#endif
diff --git a/security/nss/lib/freebl/genload.c b/security/nss/lib/freebl/genload.c
new file mode 100644
index 000000000..832deb58c
--- /dev/null
+++ b/security/nss/lib/freebl/genload.c
@@ -0,0 +1,167 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This file is meant to be included by other .c files.
+ * This file takes a "parameter", the scope which includes this
+ * code shall declare this variable:
+ *   const char *NameOfThisSharedLib;
+ *
+ * NameOfThisSharedLib:
+ *   The file name of the shared library that shall be used as the
+ *   "reference library". The loader will attempt to load the requested
+ *   library from the same directory as the reference library.
+ */
+
+#ifdef XP_UNIX
+#include <unistd.h>
+#define BL_MAXSYMLINKS 20
+
+/*
+ * If 'link' is a symbolic link, this function follows the symbolic links
+ * and returns the pathname of the ultimate source of the symbolic links.
+ * If 'link' is not a symbolic link, this function returns NULL.
+ * The caller should call PR_Free to free the string returned by this
+ * function.
+ */
+static char*
+loader_GetOriginalPathname(const char* link)
+{
+#ifdef __GLIBC__
+    char* tmp = realpath(link, NULL);
+    char* resolved;
+    if (!tmp)
+        return NULL;
+    resolved = PR_Malloc(strlen(tmp) + 1);
+    strcpy(resolved, tmp); /* This is necessary because PR_Free might not be using free() */
+    free(tmp);
+    return resolved;
+#else
+    char* resolved = NULL;
+    char* input = NULL;
+    PRUint32 iterations = 0;
+    PRInt32 len = 0, retlen = 0;
+    if (!link) {
+        PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
+        return NULL;
+    }
+    len = PR_MAX(1024, strlen(link) + 1);
+    resolved = PR_Malloc(len);
+    input = PR_Malloc(len);
+    if (!resolved || !input) {
+        if (resolved) {
+            PR_Free(resolved);
+        }
+        if (input) {
+            PR_Free(input);
+        }
+        return NULL;
+    }
+    strcpy(input, link);
+    while ((iterations++ < BL_MAXSYMLINKS) &&
+           ((retlen = readlink(input, resolved, len - 1)) > 0)) {
+        char* tmp = input;
+        resolved[retlen] = '\0'; /* NULL termination */
+        input = resolved;
+        resolved = tmp;
+    }
+    PR_Free(resolved);
+    if (iterations == 1 && retlen < 0) {
+        PR_Free(input);
+        input = NULL;
+    }
+    return input;
+#endif
+}
+#endif /* XP_UNIX */
+
+/*
+ * Load the library with the file name 'name' residing in the same
+ * directory as the reference library, whose pathname is 'referencePath'.
+ */
+static PRLibrary*
+loader_LoadLibInReferenceDir(const char* referencePath, const char* name)
+{
+    PRLibrary* dlh = NULL;
+    char* fullName = NULL;
+    char* c;
+    PRLibSpec libSpec;
+
+    /* Remove the trailing filename from referencePath and add the new one */
+    c = strrchr(referencePath, PR_GetDirectorySeparator());
+    if (c) {
+        size_t referencePathSize = 1 + c - referencePath;
+        fullName = (char*)PORT_Alloc(strlen(name) + referencePathSize + 1);
+        if (fullName) {
+            memcpy(fullName, referencePath, referencePathSize);
+            strcpy(fullName + referencePathSize, name);
+#ifdef DEBUG_LOADER
+            PR_fprintf(PR_STDOUT, "\nAttempting to load fully-qualified %s\n",
+                       fullName);
+#endif
+            libSpec.type = PR_LibSpec_Pathname;
+            libSpec.value.pathname = fullName;
+            dlh = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL);
+            PORT_Free(fullName);
+        }
+    }
+    return dlh;
+}
+
+/*
+ * We use PR_GetLibraryFilePathname to get the pathname of the loaded
+ * shared lib that contains this function, and then do a PR_LoadLibrary
+ * with an absolute pathname for the softoken shared library.
+ */
+
+static PRLibrary*
+loader_LoadLibrary(const char* nameToLoad)
+{
+    PRLibrary* lib = NULL;
+    char* fullPath = NULL;
+    PRLibSpec libSpec;
+
+    /* Get the pathname for nameOfAlreadyLoadedLib, i.e. /usr/lib/libnss3.so
+     * PR_GetLibraryFilePathname works with either the base library name or a
+     * function pointer, depending on the platform. We can't query an exported
+     * symbol such as NSC_GetFunctionList, because on some platforms we can't
+     * find symbols in loaded implicit dependencies.
+     * But we can just get the address of this function !
+     */
+    fullPath = PR_GetLibraryFilePathname(NameOfThisSharedLib,
+                                         (PRFuncPtr)&loader_LoadLibrary);
+
+    if (fullPath) {
+        lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad);
+#ifdef XP_UNIX
+        if (!lib) {
+            /*
+             * If fullPath is a symbolic link, resolve the symbolic
+             * link and try again.
+             */
+            char* originalfullPath = loader_GetOriginalPathname(fullPath);
+            if (originalfullPath) {
+                PR_Free(fullPath);
+                fullPath = originalfullPath;
+                lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad);
+            }
+        }
+#endif
+        PR_Free(fullPath);
+    }
+    if (!lib) {
+#ifdef DEBUG_LOADER
+        PR_fprintf(PR_STDOUT, "\nAttempting to load %s\n", nameToLoad);
+#endif
+        libSpec.type = PR_LibSpec_Pathname;
+        libSpec.value.pathname = nameToLoad;
+        lib = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL);
+    }
+    if (NULL == lib) {
+#ifdef DEBUG_LOADER
+        PR_fprintf(PR_STDOUT, "\nLoading failed : %s.\n", nameToLoad);
+#endif
+    }
+    return lib;
+}
diff --git a/security/nss/lib/freebl/hmacct.c b/security/nss/lib/freebl/hmacct.c
new file mode 100644
index 000000000..c7815ac05
--- /dev/null
+++ b/security/nss/lib/freebl/hmacct.c
@@ -0,0 +1,335 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secport.h"
+#include "hasht.h"
+#include "blapit.h"
+#include "hmacct.h"
+#include "secerr.h"
+
+/* MAX_HASH_BIT_COUNT_BYTES is the maximum number of bytes in the hash's length
+ * field. (SHA-384/512 have 128-bit length.) */
+#define MAX_HASH_BIT_COUNT_BYTES 16
+
+/* Some utility functions are needed:
+ *
+ * These macros return the given value with the MSB copied to all the other
+ * bits. They use the fact that an arithmetic shift shifts-in the sign bit.
+ * However, this is not ensured by the C standard so you may need to replace
+ * them with something else on odd CPUs.
+ *
+ * Note: the argument to these macros must be an unsigned int.
+ * */
+#define DUPLICATE_MSB_TO_ALL(x) ((unsigned int)((int)(x) >> (sizeof(int) * 8 - 1)))
+#define DUPLICATE_MSB_TO_ALL_8(x) ((unsigned char)(DUPLICATE_MSB_TO_ALL(x)))
+
+/* constantTimeGE returns 0xff if a>=b and 0x00 otherwise, where a, b <
+ * MAX_UINT/2. */
+static unsigned char
+constantTimeGE(unsigned int a, unsigned int b)
+{
+    a -= b;
+    return DUPLICATE_MSB_TO_ALL(~a);
+}
+
+/* constantTimeEQ8 returns 0xff if a==b and 0x00 otherwise. */
+static unsigned char
+constantTimeEQ8(unsigned char a, unsigned char b)
+{
+    unsigned int c = a ^ b;
+    c--;
+    return DUPLICATE_MSB_TO_ALL_8(c);
+}
+
+/* MAC performs a constant time SSLv3/TLS MAC of |dataLen| bytes of |data|,
+ * where |dataLen| includes both the authenticated bytes and the MAC tag from
+ * the sender. |dataLen| must be >= the length of the MAC tag.
+ *
+ * |dataTotalLen| is >= |dataLen| and also accounts for any padding bytes
+ * that may follow the sender's MAC. (Only a single block of padding may
+ * follow in SSLv3, or up to 255 bytes in TLS.)
+ *
+ * Since the results of decryption are secret information (otherwise a
+ * padding-oracle is created), this function is constant-time with respect to
+ * |dataLen|.
+ *
+ * |header| contains either the 13-byte TLS header (containing the sequence
+ * number, record type etc), or it contains the SSLv3 header with the SSLv3
+ * padding bytes etc. */
+static SECStatus
+MAC(unsigned char *mdOut,
+    unsigned int *mdOutLen,
+    unsigned int mdOutMax,
+    const SECHashObject *hashObj,
+    const unsigned char *macSecret,
+    unsigned int macSecretLen,
+    const unsigned char *header,
+    unsigned int headerLen,
+    const unsigned char *data,
+    unsigned int dataLen,
+    unsigned int dataTotalLen,
+    unsigned char isSSLv3)
+{
+    void *mdState = hashObj->create();
+    const unsigned int mdSize = hashObj->length;
+    const unsigned int mdBlockSize = hashObj->blocklength;
+    /* mdLengthSize is the number of bytes in the length field that terminates
+     * the hash.
+     *
+     * This assumes that hash functions with a 64 byte block size use a 64-bit
+     * length, and otherwise they use a 128-bit length. This is true of {MD5,
+     * SHA*} (which are all of the hash functions specified for use with TLS
+     * today). */
+    const unsigned int mdLengthSize = mdBlockSize == 64 ? 8 : 16;
+
+    const unsigned int sslv3PadLen = hashObj->type == HASH_AlgMD5 ? 48 : 40;
+
+    /* varianceBlocks is the number of blocks of the hash that we have to
+     * calculate in constant time because they could be altered by the
+     * padding value.
+     *
+     * In SSLv3, the padding must be minimal so the end of the plaintext
+     * varies by, at most, 15+20 = 35 bytes. (We conservatively assume that
+     * the MAC size varies from 0..20 bytes.) In case the 9 bytes of hash
+     * termination (0x80 + 64-bit length) don't fit in the final block, we
+     * say that the final two blocks can vary based on the padding.
+     *
+     * TLSv1 has MACs up to 48 bytes long (SHA-384) and the padding is not
+     * required to be minimal. Therefore we say that the final six blocks
+     * can vary based on the padding.
+     *
+     * Later in the function, if the message is short and there obviously
+     * cannot be this many blocks then varianceBlocks can be reduced. */
+    unsigned int varianceBlocks = isSSLv3 ? 2 : 6;
+    /* From now on we're dealing with the MAC, which conceptually has 13
+     * bytes of `header' before the start of the data (TLS) or 71/75 bytes
+     * (SSLv3) */
+    const unsigned int len = dataTotalLen + headerLen;
+    /* maxMACBytes contains the maximum bytes of bytes in the MAC, including
+     * |header|, assuming that there's no padding. */
+    const unsigned int maxMACBytes = len - mdSize - 1;
+    /* numBlocks is the maximum number of hash blocks. */
+    const unsigned int numBlocks =
+        (maxMACBytes + 1 + mdLengthSize + mdBlockSize - 1) / mdBlockSize;
+    /* macEndOffset is the index just past the end of the data to be
+     * MACed. */
+    const unsigned int macEndOffset = dataLen + headerLen - mdSize;
+    /* c is the index of the 0x80 byte in the final hash block that
+     * contains application data. */
+    const unsigned int c = macEndOffset % mdBlockSize;
+    /* indexA is the hash block number that contains the 0x80 terminating
+     * value. */
+    const unsigned int indexA = macEndOffset / mdBlockSize;
+    /* indexB is the hash block number that contains the 64-bit hash
+     * length, in bits. */
+    const unsigned int indexB = (macEndOffset + mdLengthSize) / mdBlockSize;
+    /* bits is the hash-length in bits. It includes the additional hash
+     * block for the masked HMAC key, or whole of |header| in the case of
+     * SSLv3. */
+    unsigned int bits;
+    /* In order to calculate the MAC in constant time we have to handle
+     * the final blocks specially because the padding value could cause the
+     * end to appear somewhere in the final |varianceBlocks| blocks and we
+     * can't leak where. However, |numStartingBlocks| worth of data can
+     * be hashed right away because no padding value can affect whether
+     * they are plaintext. */
+    unsigned int numStartingBlocks = 0;
+    /* k is the starting byte offset into the conceptual header||data where
+     * we start processing. */
+    unsigned int k = 0;
+    unsigned char lengthBytes[MAX_HASH_BIT_COUNT_BYTES];
+    /* hmacPad is the masked HMAC key. */
+    unsigned char hmacPad[HASH_BLOCK_LENGTH_MAX];
+    unsigned char firstBlock[HASH_BLOCK_LENGTH_MAX];
+    unsigned char macOut[HASH_LENGTH_MAX];
+    unsigned i, j;
+
+    /* For SSLv3, if we're going to have any starting blocks then we need
+     * at least two because the header is larger than a single block. */
+    if (numBlocks > varianceBlocks + (isSSLv3 ? 1 : 0)) {
+        numStartingBlocks = numBlocks - varianceBlocks;
+        k = mdBlockSize * numStartingBlocks;
+    }
+
+    bits = 8 * macEndOffset;
+    hashObj->begin(mdState);
+    if (!isSSLv3) {
+        /* Compute the initial HMAC block. For SSLv3, the padding and
+         * secret bytes are included in |header| because they take more
+         * than a single block. */
+        bits += 8 * mdBlockSize;
+        memset(hmacPad, 0, mdBlockSize);
+        PORT_Assert(macSecretLen <= sizeof(hmacPad));
+        memcpy(hmacPad, macSecret, macSecretLen);
+        for (i = 0; i < mdBlockSize; i++)
+            hmacPad[i] ^= 0x36;
+        hashObj->update(mdState, hmacPad, mdBlockSize);
+    }
+
+    j = 0;
+    memset(lengthBytes, 0, sizeof(lengthBytes));
+    if (mdLengthSize == 16) {
+        j = 8;
+    }
+    if (hashObj->type == HASH_AlgMD5) {
+        /* MD5 appends a little-endian length. */
+        for (i = 0; i < 4; i++) {
+            lengthBytes[i + j] = bits >> (8 * i);
+        }
+    } else {
+        /* All other TLS hash functions use a big-endian length. */
+        for (i = 0; i < 4; i++) {
+            lengthBytes[4 + i + j] = bits >> (8 * (3 - i));
+        }
+    }
+
+    if (k > 0) {
+        if (isSSLv3) {
+            /* The SSLv3 header is larger than a single block.
+             * overhang is the number of bytes beyond a single
+             * block that the header consumes: either 7 bytes
+             * (SHA1) or 11 bytes (MD5). */
+            const unsigned int overhang = headerLen - mdBlockSize;
+            hashObj->update(mdState, header, mdBlockSize);
+            memcpy(firstBlock, header + mdBlockSize, overhang);
+            memcpy(firstBlock + overhang, data, mdBlockSize - overhang);
+            hashObj->update(mdState, firstBlock, mdBlockSize);
+            for (i = 1; i < k / mdBlockSize - 1; i++) {
+                hashObj->update(mdState, data + mdBlockSize * i - overhang,
+                                mdBlockSize);
+            }
+        } else {
+            /* k is a multiple of mdBlockSize. */
+            memcpy(firstBlock, header, 13);
+            memcpy(firstBlock + 13, data, mdBlockSize - 13);
+            hashObj->update(mdState, firstBlock, mdBlockSize);
+            for (i = 1; i < k / mdBlockSize; i++) {
+                hashObj->update(mdState, data + mdBlockSize * i - 13,
+                                mdBlockSize);
+            }
+        }
+    }
+
+    memset(macOut, 0, sizeof(macOut));
+
+    /* We now process the final hash blocks. For each block, we construct
+     * it in constant time. If i == indexA then we'll include the 0x80
+     * bytes and zero pad etc. For each block we selectively copy it, in
+     * constant time, to |macOut|. */
+    for (i = numStartingBlocks; i <= numStartingBlocks + varianceBlocks; i++) {
+        unsigned char block[HASH_BLOCK_LENGTH_MAX];
+        unsigned char isBlockA = constantTimeEQ8(i, indexA);
+        unsigned char isBlockB = constantTimeEQ8(i, indexB);
+        for (j = 0; j < mdBlockSize; j++) {
+            unsigned char isPastC = isBlockA & constantTimeGE(j, c);
+            unsigned char isPastCPlus1 = isBlockA & constantTimeGE(j, c + 1);
+            unsigned char b = 0;
+            if (k < headerLen) {
+                b = header[k];
+            } else if (k < dataTotalLen + headerLen) {
+                b = data[k - headerLen];
+            }
+            k++;
+
+            /* If this is the block containing the end of the
+             * application data, and we are at the offset for the
+             * 0x80 value, then overwrite b with 0x80. */
+            b = (b & ~isPastC) | (0x80 & isPastC);
+            /* If this the the block containing the end of the
+             * application data and we're past the 0x80 value then
+             * just write zero. */
+            b = b & ~isPastCPlus1;
+            /* If this is indexB (the final block), but not
+             * indexA (the end of the data), then the 64-bit
+             * length didn't fit into indexA and we're having to
+             * add an extra block of zeros. */
+            b &= ~isBlockB | isBlockA;
+
+            /* The final bytes of one of the blocks contains the length. */
+            if (j >= mdBlockSize - mdLengthSize) {
+                /* If this is indexB, write a length byte. */
+                b = (b & ~isBlockB) |
+                    (isBlockB & lengthBytes[j - (mdBlockSize - mdLengthSize)]);
+            }
+            block[j] = b;
+        }
+
+        hashObj->update(mdState, block, mdBlockSize);
+        hashObj->end_raw(mdState, block, NULL, mdSize);
+        /* If this is indexB, copy the hash value to |macOut|. */
+        for (j = 0; j < mdSize; j++) {
+            macOut[j] |= block[j] & isBlockB;
+        }
+    }
+
+    hashObj->begin(mdState);
+
+    if (isSSLv3) {
+        /* We repurpose |hmacPad| to contain the SSLv3 pad2 block. */
+        for (i = 0; i < sslv3PadLen; i++)
+            hmacPad[i] = 0x5c;
+
+        hashObj->update(mdState, macSecret, macSecretLen);
+        hashObj->update(mdState, hmacPad, sslv3PadLen);
+        hashObj->update(mdState, macOut, mdSize);
+    } else {
+        /* Complete the HMAC in the standard manner. */
+        for (i = 0; i < mdBlockSize; i++)
+            hmacPad[i] ^= 0x6a;
+
+        hashObj->update(mdState, hmacPad, mdBlockSize);
+        hashObj->update(mdState, macOut, mdSize);
+    }
+
+    hashObj->end(mdState, mdOut, mdOutLen, mdOutMax);
+    hashObj->destroy(mdState, PR_TRUE);
+
+    return SECSuccess;
+}
+
+SECStatus
+HMAC_ConstantTime(
+    unsigned char *result,
+    unsigned int *resultLen,
+    unsigned int maxResultLen,
+    const SECHashObject *hashObj,
+    const unsigned char *secret,
+    unsigned int secretLen,
+    const unsigned char *header,
+    unsigned int headerLen,
+    const unsigned char *body,
+    unsigned int bodyLen,
+    unsigned int bodyTotalLen)
+{
+    if (hashObj->end_raw == NULL)
+        return SECFailure;
+    return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen,
+               header, headerLen, body, bodyLen, bodyTotalLen,
+               0 /* not SSLv3 */);
+}
+
+SECStatus
+SSLv3_MAC_ConstantTime(
+    unsigned char *result,
+    unsigned int *resultLen,
+    unsigned int maxResultLen,
+    const SECHashObject *hashObj,
+    const unsigned char *secret,
+    unsigned int secretLen,
+    const unsigned char *header,
+    unsigned int headerLen,
+    const unsigned char *body,
+    unsigned int bodyLen,
+    unsigned int bodyTotalLen)
+{
+    if (hashObj->end_raw == NULL)
+        return SECFailure;
+    return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen,
+               header, headerLen, body, bodyLen, bodyTotalLen,
+               1 /* SSLv3 */);
+}
diff --git a/security/nss/lib/freebl/hmacct.h b/security/nss/lib/freebl/hmacct.h
new file mode 100644
index 000000000..a773ea89c
--- /dev/null
+++ b/security/nss/lib/freebl/hmacct.h
@@ -0,0 +1,38 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _HMACCT_H_
+#define _HMACCT_H_
+
+SEC_BEGIN_PROTOS
+
+extern SECStatus HMAC_ConstantTime(
+    unsigned char *result,
+    unsigned int *resultLen,
+    unsigned int maxResultLen,
+    const SECHashObject *hashObj,
+    const unsigned char *secret,
+    unsigned int secretLen,
+    const unsigned char *header,
+    unsigned int headerLen,
+    const unsigned char *body,
+    unsigned int bodyLen,
+    unsigned int bodyTotalLen);
+
+extern SECStatus SSLv3_MAC_ConstantTime(
+    unsigned char *result,
+    unsigned int *resultLen,
+    unsigned int maxResultLen,
+    const SECHashObject *hashObj,
+    const unsigned char *secret,
+    unsigned int secretLen,
+    const unsigned char *header,
+    unsigned int headerLen,
+    const unsigned char *body,
+    unsigned int bodyLen,
+    unsigned int bodyTotalLen);
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/intel-aes-x64-masm.asm b/security/nss/lib/freebl/intel-aes-x64-masm.asm
new file mode 100644
index 000000000..ef5c76ba2
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes-x64-masm.asm
@@ -0,0 +1,971 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.DATA
+ALIGN 16
+Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
+Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
+Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
+Lcon1 dd 1,1,1,1
+Lcon2 dd 1bh,1bh,1bh,1bh
+
+.CODE
+
+ctx     textequ <rcx>
+output  textequ <rdx>
+input   textequ <r8>
+inputLen textequ <r9d>
+
+
+aes_rnd MACRO i
+    movdqu  xmm8, [i*16 + ctx]
+    aesenc  xmm0, xmm8
+    aesenc  xmm1, xmm8
+    aesenc  xmm2, xmm8
+    aesenc  xmm3, xmm8
+    aesenc  xmm4, xmm8
+    aesenc  xmm5, xmm8
+    aesenc  xmm6, xmm8
+    aesenc  xmm7, xmm8
+    ENDM
+
+aes_last_rnd MACRO i
+    movdqu  xmm8, [i*16 + ctx]
+    aesenclast  xmm0, xmm8
+    aesenclast  xmm1, xmm8
+    aesenclast  xmm2, xmm8
+    aesenclast  xmm3, xmm8
+    aesenclast  xmm4, xmm8
+    aesenclast  xmm5, xmm8
+    aesenclast  xmm6, xmm8
+    aesenclast  xmm7, xmm8
+    ENDM
+
+aes_dec_rnd MACRO i
+    movdqu  xmm8, [i*16 + ctx]
+    aesdec  xmm0, xmm8
+    aesdec  xmm1, xmm8
+    aesdec  xmm2, xmm8
+    aesdec  xmm3, xmm8
+    aesdec  xmm4, xmm8
+    aesdec  xmm5, xmm8
+    aesdec  xmm6, xmm8
+    aesdec  xmm7, xmm8
+    ENDM
+
+aes_dec_last_rnd MACRO i
+    movdqu  xmm8, [i*16 + ctx]
+    aesdeclast  xmm0, xmm8
+    aesdeclast  xmm1, xmm8
+    aesdeclast  xmm2, xmm8
+    aesdeclast  xmm3, xmm8
+    aesdeclast  xmm4, xmm8
+    aesdeclast  xmm5, xmm8
+    aesdeclast  xmm6, xmm8
+    aesdeclast  xmm7, xmm8
+    ENDM
+
+
+gen_aes_ecb_func MACRO enc, rnds
+
+LOCAL   loop8
+LOCAL   loop1
+LOCAL   bail
+
+        xor     inputLen, inputLen
+        mov     input,      [rsp + 1*8 + 8*4]
+        mov     inputLen,   [rsp + 1*8 + 8*5]
+
+        sub     rsp, 3*16
+
+        movdqu  [rsp + 0*16], xmm6
+        movdqu  [rsp + 1*16], xmm7
+        movdqu  [rsp + 2*16], xmm8
+
+        lea     ctx, [48+ctx]
+
+loop8:
+        cmp     inputLen, 8*16
+        jb      loop1
+
+        movdqu  xmm0, [0*16 + input]
+        movdqu  xmm1, [1*16 + input]
+        movdqu  xmm2, [2*16 + input]
+        movdqu  xmm3, [3*16 + input]
+        movdqu  xmm4, [4*16 + input]
+        movdqu  xmm5, [5*16 + input]
+        movdqu  xmm6, [6*16 + input]
+        movdqu  xmm7, [7*16 + input]
+
+        movdqu  xmm8, [0*16 + ctx]
+        pxor    xmm0, xmm8
+        pxor    xmm1, xmm8
+        pxor    xmm2, xmm8
+        pxor    xmm3, xmm8
+        pxor    xmm4, xmm8
+        pxor    xmm5, xmm8
+        pxor    xmm6, xmm8
+        pxor    xmm7, xmm8
+
+IF enc eq 1
+        rnd textequ <aes_rnd>
+        lastrnd textequ <aes_last_rnd>
+        aesinst textequ <aesenc>
+        aeslastinst textequ <aesenclast>
+ELSE
+        rnd textequ <aes_dec_rnd>
+        lastrnd textequ <aes_dec_last_rnd>
+        aesinst textequ <aesdec>
+        aeslastinst textequ <aesdeclast>
+ENDIF
+
+        i = 1
+        WHILE i LT rnds
+            rnd i
+            i = i+1
+            ENDM
+        lastrnd rnds
+
+        movdqu  [0*16 + output], xmm0
+        movdqu  [1*16 + output], xmm1
+        movdqu  [2*16 + output], xmm2
+        movdqu  [3*16 + output], xmm3
+        movdqu  [4*16 + output], xmm4
+        movdqu  [5*16 + output], xmm5
+        movdqu  [6*16 + output], xmm6
+        movdqu  [7*16 + output], xmm7
+
+        lea input, [8*16 + input]
+        lea output, [8*16 + output]
+        sub inputLen, 8*16
+        jmp loop8
+
+loop1:
+        cmp     inputLen, 1*16
+        jb      bail
+
+        movdqu  xmm0, [input]
+        movdqu  xmm7, [0*16 + ctx]
+        pxor    xmm0, xmm7
+
+        i = 1
+    WHILE i LT rnds
+            movdqu  xmm7, [i*16 + ctx]
+            aesinst  xmm0, xmm7
+            i = i+1
+        ENDM
+        movdqu  xmm7, [rnds*16 + ctx]
+        aeslastinst xmm0, xmm7
+
+        movdqu  [output], xmm0
+
+        lea input, [1*16 + input]
+        lea output, [1*16 + output]
+        sub inputLen, 1*16
+        jmp loop1
+
+bail:
+        xor rax, rax
+
+        movdqu  xmm6, [rsp + 0*16]
+        movdqu  xmm7, [rsp + 1*16]
+        movdqu  xmm8, [rsp + 2*16]
+        add     rsp, 3*16
+        ret
+ENDM
+
+intel_aes_encrypt_ecb_128 PROC
+gen_aes_ecb_func 1, 10
+intel_aes_encrypt_ecb_128 ENDP
+
+intel_aes_encrypt_ecb_192 PROC
+gen_aes_ecb_func 1, 12
+intel_aes_encrypt_ecb_192 ENDP
+
+intel_aes_encrypt_ecb_256 PROC
+gen_aes_ecb_func 1, 14
+intel_aes_encrypt_ecb_256 ENDP
+
+intel_aes_decrypt_ecb_128 PROC
+gen_aes_ecb_func 0, 10
+intel_aes_decrypt_ecb_128 ENDP
+
+intel_aes_decrypt_ecb_192 PROC
+gen_aes_ecb_func 0, 12
+intel_aes_decrypt_ecb_192 ENDP
+
+intel_aes_decrypt_ecb_256 PROC
+gen_aes_ecb_func 0, 14
+intel_aes_decrypt_ecb_256 ENDP
+
+
+KEY textequ <rcx>
+KS  textequ <rdx>
+ITR textequ <r8>
+
+intel_aes_encrypt_init_128  PROC
+
+    movdqu  xmm1, [KEY]
+    movdqu  [KS], xmm1
+    movdqa  xmm2, xmm1
+
+    lea ITR, Lcon1
+    movdqa  xmm0, [ITR]
+    lea ITR, Lmask
+    movdqa  xmm4, [ITR]
+
+    mov ITR, 8
+
+Lenc_128_ks_loop:
+        lea KS, [16 + KS]
+        dec ITR
+
+        pshufb  xmm2, xmm4
+        aesenclast  xmm2, xmm0
+        pslld   xmm0, 1
+        movdqa  xmm3, xmm1
+        pslldq  xmm3, 4
+        pxor    xmm1, xmm3
+        pslldq  xmm3, 4
+        pxor    xmm1, xmm3
+        pslldq  xmm3, 4
+        pxor    xmm1, xmm3
+        pxor    xmm1, xmm2
+        movdqu  [KS], xmm1
+        movdqa  xmm2, xmm1
+
+        jne Lenc_128_ks_loop
+
+    lea ITR, Lcon2
+    movdqa  xmm0, [ITR]
+
+    pshufb  xmm2, xmm4
+    aesenclast  xmm2, xmm0
+    pslld   xmm0, 1
+    movdqa  xmm3, xmm1
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pxor    xmm1, xmm2
+    movdqu  [16 + KS], xmm1
+    movdqa  xmm2, xmm1
+
+    pshufb  xmm2, xmm4
+    aesenclast  xmm2, xmm0
+    movdqa  xmm3, xmm1
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pxor    xmm1, xmm2
+    movdqu  [32 + KS], xmm1
+    movdqa  xmm2, xmm1
+
+    ret
+intel_aes_encrypt_init_128  ENDP
+
+
+intel_aes_decrypt_init_128  PROC
+
+    push    KS
+    push    KEY
+
+    call    intel_aes_encrypt_init_128
+
+    pop     KEY
+    pop     KS
+
+    movdqu  xmm0, [0*16 + KS]
+    movdqu  xmm1, [10*16 + KS]
+    movdqu  [10*16 + KS], xmm0
+    movdqu  [0*16 + KS], xmm1
+
+    i = 1
+    WHILE i LT 5
+        movdqu  xmm0, [i*16 + KS]
+        movdqu  xmm1, [(10-i)*16 + KS]
+
+        aesimc  xmm0, xmm0
+        aesimc  xmm1, xmm1
+
+        movdqu  [(10-i)*16 + KS], xmm0
+        movdqu  [i*16 + KS], xmm1
+
+        i = i+1
+    ENDM
+
+    movdqu  xmm0, [5*16 + KS]
+    aesimc  xmm0, xmm0
+    movdqu  [5*16 + KS], xmm0
+    ret
+intel_aes_decrypt_init_128  ENDP
+
+
+intel_aes_encrypt_init_192  PROC
+
+    sub     rsp, 16*2
+    movdqu  [16*0 + rsp], xmm6
+    movdqu  [16*1 + rsp], xmm7
+
+    movdqu  xmm1, [KEY]
+    mov     ITR, [16 + KEY]
+    movd    xmm3, ITR
+
+    movdqu  [KS], xmm1
+    movdqa  xmm5, xmm3
+
+    lea ITR, Lcon1
+    movdqu  xmm0, [ITR]
+    lea ITR, Lmask192
+    movdqu  xmm4, [ITR]
+
+    mov ITR, 4
+
+Lenc_192_ks_loop:
+        movdqa  xmm2, xmm3
+        pshufb  xmm2, xmm4
+        aesenclast xmm2, xmm0
+        pslld   xmm0, 1
+
+        movdqa  xmm6, xmm1
+        movdqa  xmm7, xmm3
+        pslldq  xmm6, 4
+        pslldq  xmm7, 4
+        pxor    xmm1, xmm6
+        pxor    xmm3, xmm7
+        pslldq  xmm6, 4
+        pxor    xmm1, xmm6
+        pslldq  xmm6, 4
+        pxor    xmm1, xmm6
+        pxor    xmm1, xmm2
+        pshufd  xmm2, xmm1, 0ffh
+        pxor    xmm3, xmm2
+
+        movdqa  xmm6, xmm1
+        shufpd  xmm5, xmm1, 00h
+        shufpd  xmm6, xmm3, 01h
+
+        movdqu  [16 + KS], xmm5
+        movdqu  [32 + KS], xmm6
+
+        movdqa  xmm2, xmm3
+        pshufb  xmm2, xmm4
+        aesenclast  xmm2, xmm0
+        pslld   xmm0, 1
+
+        movdqa  xmm6, xmm1
+        movdqa  xmm7, xmm3
+        pslldq  xmm6, 4
+        pslldq  xmm7, 4
+        pxor    xmm1, xmm6
+        pxor    xmm3, xmm7
+        pslldq  xmm6, 4
+        pxor    xmm1, xmm6
+        pslldq  xmm6, 4
+        pxor    xmm1, xmm6
+        pxor    xmm1, xmm2
+        pshufd  xmm2, xmm1, 0ffh
+        pxor    xmm3, xmm2
+
+        movdqu  [48 + KS], xmm1
+        movdqa  xmm5, xmm3
+
+        lea KS, [48 + KS]
+
+        dec ITR
+        jnz Lenc_192_ks_loop
+
+    movdqu  [16 + KS], xmm5
+
+    movdqu  xmm7, [16*1 + rsp]
+    movdqu  xmm6, [16*0 + rsp]
+    add rsp, 16*2
+    ret
+intel_aes_encrypt_init_192  ENDP
+
+intel_aes_decrypt_init_192  PROC
+    push    KS
+    push    KEY
+
+    call    intel_aes_encrypt_init_192
+
+    pop     KEY
+    pop     KS
+
+    movdqu  xmm0, [0*16 + KS]
+    movdqu  xmm1, [12*16 + KS]
+    movdqu  [12*16 + KS], xmm0
+    movdqu  [0*16 + KS], xmm1
+
+    i = 1
+    WHILE i LT 6
+        movdqu  xmm0, [i*16 + KS]
+        movdqu  xmm1, [(12-i)*16 + KS]
+
+        aesimc  xmm0, xmm0
+        aesimc  xmm1, xmm1
+
+        movdqu  [(12-i)*16 + KS], xmm0
+        movdqu  [i*16 + KS], xmm1
+
+        i = i+1
+    ENDM
+
+    movdqu  xmm0, [6*16 + KS]
+    aesimc  xmm0, xmm0
+    movdqu  [6*16 + KS], xmm0
+    ret
+intel_aes_decrypt_init_192  ENDP
+
+
+intel_aes_encrypt_init_256  PROC
+    sub     rsp, 16*2
+    movdqu  [16*0 + rsp], xmm6
+    movdqu  [16*1 + rsp], xmm7
+
+    movdqu  xmm1, [16*0 + KEY]
+    movdqu  xmm3, [16*1 + KEY]
+
+    movdqu  [16*0 + KS], xmm1
+    movdqu  [16*1 + KS], xmm3
+
+    lea ITR, Lcon1
+    movdqu  xmm0, [ITR]
+    lea ITR, Lmask256
+    movdqu  xmm5, [ITR]
+
+    pxor    xmm6, xmm6
+
+    mov ITR, 6
+
+Lenc_256_ks_loop:
+
+        movdqa  xmm2, xmm3
+        pshufb  xmm2, xmm5
+        aesenclast  xmm2, xmm0
+        pslld   xmm0, 1
+        movdqa  xmm4, xmm1
+        pslldq  xmm4, 4
+        pxor    xmm1, xmm4
+        pslldq  xmm4, 4
+        pxor    xmm1, xmm4
+        pslldq  xmm4, 4
+        pxor    xmm1, xmm4
+        pxor    xmm1, xmm2
+        movdqu  [16*2 + KS], xmm1
+
+        pshufd  xmm2, xmm1, 0ffh
+        aesenclast  xmm2, xmm6
+        movdqa  xmm4, xmm3
+        pslldq  xmm4, 4
+        pxor    xmm3, xmm4
+        pslldq  xmm4, 4
+        pxor    xmm3, xmm4
+        pslldq  xmm4, 4
+        pxor    xmm3, xmm4
+        pxor    xmm3, xmm2
+        movdqu  [16*3 + KS], xmm3
+
+        lea KS, [32 + KS]
+        dec ITR
+        jnz Lenc_256_ks_loop
+
+    movdqa  xmm2, xmm3
+    pshufb  xmm2, xmm5
+    aesenclast  xmm2, xmm0
+    movdqa  xmm4, xmm1
+    pslldq  xmm4, 4
+    pxor    xmm1, xmm4
+    pslldq  xmm4, 4
+    pxor    xmm1, xmm4
+    pslldq  xmm4, 4
+    pxor    xmm1, xmm4
+    pxor    xmm1, xmm2
+    movdqu  [16*2 + KS], xmm1
+
+    movdqu  xmm7, [16*1 + rsp]
+    movdqu  xmm6, [16*0 + rsp]
+    add rsp, 16*2
+    ret
+
+intel_aes_encrypt_init_256  ENDP
+
+
+intel_aes_decrypt_init_256  PROC
+    push    KS
+    push    KEY
+
+    call    intel_aes_encrypt_init_256
+
+    pop     KEY
+    pop     KS
+
+    movdqu  xmm0, [0*16 + KS]
+    movdqu  xmm1, [14*16 + KS]
+    movdqu  [14*16 + KS], xmm0
+    movdqu  [0*16 + KS], xmm1
+
+    i = 1
+    WHILE i LT 7
+        movdqu  xmm0, [i*16 + KS]
+        movdqu  xmm1, [(14-i)*16 + KS]
+
+        aesimc  xmm0, xmm0
+        aesimc  xmm1, xmm1
+
+        movdqu  [(14-i)*16 + KS], xmm0
+        movdqu  [i*16 + KS], xmm1
+
+        i = i+1
+    ENDM
+
+    movdqu  xmm0, [7*16 + KS]
+    aesimc  xmm0, xmm0
+    movdqu  [7*16 + KS], xmm0
+    ret
+intel_aes_decrypt_init_256  ENDP
+
+
+
+gen_aes_cbc_enc_func MACRO rnds
+
+LOCAL   loop1
+LOCAL   bail
+
+        mov     input,      [rsp + 1*8 + 8*4]
+        mov     inputLen,   [rsp + 1*8 + 8*5]
+
+        sub     rsp, 3*16
+
+        movdqu  [rsp + 0*16], xmm6
+        movdqu  [rsp + 1*16], xmm7
+        movdqu  [rsp + 2*16], xmm8
+
+        lea     ctx, [48+ctx]
+
+        movdqu  xmm0, [-32+ctx]
+
+        movdqu  xmm2, [0*16 + ctx]
+        movdqu  xmm3, [1*16 + ctx]
+        movdqu  xmm4, [2*16 + ctx]
+        movdqu  xmm5, [3*16 + ctx]
+        movdqu  xmm6, [4*16 + ctx]
+        movdqu  xmm7, [5*16 + ctx]
+
+loop1:
+        cmp     inputLen, 1*16
+        jb      bail
+
+        movdqu  xmm1, [input]
+        pxor    xmm1, xmm2
+        pxor    xmm0, xmm1
+
+        aesenc  xmm0, xmm3
+        aesenc  xmm0, xmm4
+        aesenc  xmm0, xmm5
+        aesenc  xmm0, xmm6
+        aesenc  xmm0, xmm7
+
+        i = 6
+    WHILE i LT rnds
+            movdqu  xmm8, [i*16 + ctx]
+            aesenc  xmm0, xmm8
+            i = i+1
+        ENDM
+        movdqu  xmm8, [rnds*16 + ctx]
+        aesenclast xmm0, xmm8
+
+        movdqu  [output], xmm0
+
+        lea input, [1*16 + input]
+        lea output, [1*16 + output]
+        sub inputLen, 1*16
+        jmp loop1
+
+bail:
+        movdqu  [-32+ctx], xmm0
+
+        xor rax, rax
+
+        movdqu  xmm6, [rsp + 0*16]
+        movdqu  xmm7, [rsp + 1*16]
+        movdqu  xmm8, [rsp + 2*16]
+        add     rsp, 3*16
+        ret
+
+ENDM
+
+gen_aes_cbc_dec_func MACRO rnds
+
+LOCAL   loop8
+LOCAL   loop1
+LOCAL   dec1
+LOCAL   bail
+
+        mov     input,      [rsp + 1*8 + 8*4]
+        mov     inputLen,   [rsp + 1*8 + 8*5]
+
+        sub     rsp, 3*16
+
+        movdqu  [rsp + 0*16], xmm6
+        movdqu  [rsp + 1*16], xmm7
+        movdqu  [rsp + 2*16], xmm8
+
+        lea     ctx, [48+ctx]
+
+loop8:
+        cmp     inputLen, 8*16
+        jb      dec1
+
+        movdqu  xmm0, [0*16 + input]
+        movdqu  xmm1, [1*16 + input]
+        movdqu  xmm2, [2*16 + input]
+        movdqu  xmm3, [3*16 + input]
+        movdqu  xmm4, [4*16 + input]
+        movdqu  xmm5, [5*16 + input]
+        movdqu  xmm6, [6*16 + input]
+        movdqu  xmm7, [7*16 + input]
+
+        movdqu  xmm8, [0*16 + ctx]
+        pxor    xmm0, xmm8
+        pxor    xmm1, xmm8
+        pxor    xmm2, xmm8
+        pxor    xmm3, xmm8
+        pxor    xmm4, xmm8
+        pxor    xmm5, xmm8
+        pxor    xmm6, xmm8
+        pxor    xmm7, xmm8
+
+        i = 1
+        WHILE i LT rnds
+            aes_dec_rnd i
+            i = i+1
+            ENDM
+        aes_dec_last_rnd rnds
+
+        movdqu  xmm8, [-32 + ctx]
+        pxor    xmm0, xmm8
+        movdqu  xmm8, [0*16 + input]
+        pxor    xmm1, xmm8
+        movdqu  xmm8, [1*16 + input]
+        pxor    xmm2, xmm8
+        movdqu  xmm8, [2*16 + input]
+        pxor    xmm3, xmm8
+        movdqu  xmm8, [3*16 + input]
+        pxor    xmm4, xmm8
+        movdqu  xmm8, [4*16 + input]
+        pxor    xmm5, xmm8
+        movdqu  xmm8, [5*16 + input]
+        pxor    xmm6, xmm8
+        movdqu  xmm8, [6*16 + input]
+        pxor    xmm7, xmm8
+        movdqu  xmm8, [7*16 + input]
+
+        movdqu  [0*16 + output], xmm0
+        movdqu  [1*16 + output], xmm1
+        movdqu  [2*16 + output], xmm2
+        movdqu  [3*16 + output], xmm3
+        movdqu  [4*16 + output], xmm4
+        movdqu  [5*16 + output], xmm5
+        movdqu  [6*16 + output], xmm6
+        movdqu  [7*16 + output], xmm7
+        movdqu  [-32 + ctx], xmm8
+
+        lea input, [8*16 + input]
+        lea output, [8*16 + output]
+        sub inputLen, 8*16
+        jmp loop8
+dec1:
+
+        movdqu  xmm3, [-32 + ctx]
+
+loop1:
+        cmp     inputLen, 1*16
+        jb      bail
+
+        movdqu  xmm0, [input]
+        movdqa  xmm4, xmm0
+        movdqu  xmm7, [0*16 + ctx]
+        pxor    xmm0, xmm7
+
+        i = 1
+    WHILE i LT rnds
+            movdqu  xmm7, [i*16 + ctx]
+            aesdec  xmm0, xmm7
+            i = i+1
+        ENDM
+        movdqu  xmm7, [rnds*16 + ctx]
+        aesdeclast xmm0, xmm7
+        pxor    xmm3, xmm0
+
+        movdqu  [output], xmm3
+        movdqa  xmm3, xmm4
+
+        lea input, [1*16 + input]
+        lea output, [1*16 + output]
+        sub inputLen, 1*16
+        jmp loop1
+
+bail:
+        movdqu  [-32 + ctx], xmm3
+        xor rax, rax
+
+        movdqu  xmm6, [rsp + 0*16]
+        movdqu  xmm7, [rsp + 1*16]
+        movdqu  xmm8, [rsp + 2*16]
+        add     rsp, 3*16
+        ret
+ENDM
+
+intel_aes_encrypt_cbc_128 PROC
+gen_aes_cbc_enc_func  10
+intel_aes_encrypt_cbc_128 ENDP
+
+intel_aes_encrypt_cbc_192 PROC
+gen_aes_cbc_enc_func  12
+intel_aes_encrypt_cbc_192 ENDP
+
+intel_aes_encrypt_cbc_256 PROC
+gen_aes_cbc_enc_func  14
+intel_aes_encrypt_cbc_256 ENDP
+
+intel_aes_decrypt_cbc_128 PROC
+gen_aes_cbc_dec_func  10
+intel_aes_decrypt_cbc_128 ENDP
+
+intel_aes_decrypt_cbc_192 PROC
+gen_aes_cbc_dec_func  12
+intel_aes_decrypt_cbc_192 ENDP
+
+intel_aes_decrypt_cbc_256 PROC
+gen_aes_cbc_dec_func  14
+intel_aes_decrypt_cbc_256 ENDP
+
+
+
+ctrCtx textequ <r10>
+CTR textequ <r11d>
+CTRSave textequ <eax>
+
+gen_aes_ctr_func MACRO rnds
+
+LOCAL   loop8
+LOCAL   loop1
+LOCAL   enc1
+LOCAL   bail
+
+        mov     input,      [rsp + 8*1 + 4*8]
+        mov     inputLen,   [rsp + 8*1 + 5*8]
+
+        mov     ctrCtx, ctx
+        mov     ctx, [8+ctrCtx]
+        lea     ctx, [48+ctx]
+
+        sub     rsp, 3*16
+        movdqu  [rsp + 0*16], xmm6
+        movdqu  [rsp + 1*16], xmm7
+        movdqu  [rsp + 2*16], xmm8
+
+
+        push    rbp
+        mov     rbp, rsp
+        sub     rsp, 8*16
+        and     rsp, -16
+
+
+        movdqu  xmm0, [16+ctrCtx]
+        mov     CTRSave, DWORD PTR [ctrCtx + 16 + 3*4]
+        bswap   CTRSave
+        movdqu  xmm1, [ctx + 0*16]
+
+        pxor    xmm0, xmm1
+
+        movdqa  [rsp + 0*16], xmm0
+        movdqa  [rsp + 1*16], xmm0
+        movdqa  [rsp + 2*16], xmm0
+        movdqa  [rsp + 3*16], xmm0
+        movdqa  [rsp + 4*16], xmm0
+        movdqa  [rsp + 5*16], xmm0
+        movdqa  [rsp + 6*16], xmm0
+        movdqa  [rsp + 7*16], xmm0
+
+        inc     CTRSave
+        mov     CTR, CTRSave
+        bswap   CTR
+        xor     CTR, DWORD PTR [ctx + 3*4]
+        mov     DWORD PTR [rsp + 1*16 + 3*4], CTR
+
+        inc     CTRSave
+        mov     CTR, CTRSave
+        bswap   CTR
+        xor     CTR, DWORD PTR [ctx + 3*4]
+        mov     DWORD PTR [rsp + 2*16 + 3*4], CTR
+
+        inc     CTRSave
+        mov     CTR, CTRSave
+        bswap   CTR
+        xor     CTR, DWORD PTR [ctx + 3*4]
+        mov     DWORD PTR [rsp + 3*16 + 3*4], CTR
+
+        inc     CTRSave
+        mov     CTR, CTRSave
+        bswap   CTR
+        xor     CTR, DWORD PTR [ctx + 3*4]
+        mov     DWORD PTR [rsp + 4*16 + 3*4], CTR
+
+        inc     CTRSave
+        mov     CTR, CTRSave
+        bswap   CTR
+        xor     CTR, DWORD PTR [ctx + 3*4]
+        mov     DWORD PTR [rsp + 5*16 + 3*4], CTR
+
+        inc     CTRSave
+        mov     CTR, CTRSave
+        bswap   CTR
+        xor     CTR, DWORD PTR [ctx + 3*4]
+        mov     DWORD PTR [rsp + 6*16 + 3*4], CTR
+
+        inc     CTRSave
+        mov     CTR, CTRSave
+        bswap   CTR
+        xor     CTR, DWORD PTR [ctx + 3*4]
+        mov     DWORD PTR [rsp + 7*16 + 3*4], CTR
+
+
+loop8:
+        cmp     inputLen, 8*16
+        jb      loop1
+
+        movdqu  xmm0, [0*16 + rsp]
+        movdqu  xmm1, [1*16 + rsp]
+        movdqu  xmm2, [2*16 + rsp]
+        movdqu  xmm3, [3*16 + rsp]
+        movdqu  xmm4, [4*16 + rsp]
+        movdqu  xmm5, [5*16 + rsp]
+        movdqu  xmm6, [6*16 + rsp]
+        movdqu  xmm7, [7*16 + rsp]
+
+        i = 1
+        WHILE i LE 8
+            aes_rnd i
+
+            inc     CTRSave
+            mov     CTR, CTRSave
+            bswap   CTR
+            xor     CTR, DWORD PTR [ctx + 3*4]
+            mov     DWORD PTR [rsp + (i-1)*16 + 3*4], CTR
+
+            i = i+1
+        ENDM
+        WHILE i LT rnds
+            aes_rnd i
+            i = i+1
+            ENDM
+        aes_last_rnd rnds
+
+        movdqu  xmm8, [0*16 + input]
+        pxor    xmm0, xmm8
+        movdqu  xmm8, [1*16 + input]
+        pxor    xmm1, xmm8
+        movdqu  xmm8, [2*16 + input]
+        pxor    xmm2, xmm8
+        movdqu  xmm8, [3*16 + input]
+        pxor    xmm3, xmm8
+        movdqu  xmm8, [4*16 + input]
+        pxor    xmm4, xmm8
+        movdqu  xmm8, [5*16 + input]
+        pxor    xmm5, xmm8
+        movdqu  xmm8, [6*16 + input]
+        pxor    xmm6, xmm8
+        movdqu  xmm8, [7*16 + input]
+        pxor    xmm7, xmm8
+
+        movdqu  [0*16 + output], xmm0
+        movdqu  [1*16 + output], xmm1
+        movdqu  [2*16 + output], xmm2
+        movdqu  [3*16 + output], xmm3
+        movdqu  [4*16 + output], xmm4
+        movdqu  [5*16 + output], xmm5
+        movdqu  [6*16 + output], xmm6
+        movdqu  [7*16 + output], xmm7
+
+        lea input, [8*16 + input]
+        lea output, [8*16 + output]
+        sub inputLen, 8*16
+        jmp loop8
+
+
+loop1:
+        cmp     inputLen, 1*16
+        jb      bail
+
+        movdqu  xmm0, [rsp]
+        add     rsp, 16
+
+        i = 1
+    WHILE i LT rnds
+            movdqu  xmm7, [i*16 + ctx]
+            aesenc  xmm0, xmm7
+            i = i+1
+        ENDM
+        movdqu  xmm7, [rnds*16 + ctx]
+        aesenclast xmm0, xmm7
+
+        movdqu  xmm7, [input]
+        pxor    xmm0, xmm7
+        movdqu  [output], xmm0
+
+        lea input, [1*16 + input]
+        lea output, [1*16 + output]
+        sub inputLen, 1*16
+        jmp loop1
+
+bail:
+
+        movdqu  xmm0, [rsp]
+        movdqu  xmm1, [ctx + 0*16]
+        pxor    xmm0, xmm1
+        movdqu  [16+ctrCtx], xmm0
+
+
+        xor     rax, rax
+        mov     rsp, rbp
+        pop     rbp
+
+        movdqu  xmm6, [rsp + 0*16]
+        movdqu  xmm7, [rsp + 1*16]
+        movdqu  xmm8, [rsp + 2*16]
+        add     rsp, 3*16
+
+        ret
+ENDM
+
+
+intel_aes_encrypt_ctr_128 PROC
+gen_aes_ctr_func  10
+intel_aes_encrypt_ctr_128 ENDP
+
+intel_aes_encrypt_ctr_192 PROC
+gen_aes_ctr_func  12
+intel_aes_encrypt_ctr_192 ENDP
+
+intel_aes_encrypt_ctr_256 PROC
+gen_aes_ctr_func  14
+intel_aes_encrypt_ctr_256 ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-aes-x86-masm.asm b/security/nss/lib/freebl/intel-aes-x86-masm.asm
new file mode 100644
index 000000000..7d805e766
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes-x86-masm.asm
@@ -0,0 +1,949 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.MODEL FLAT, C
+.XMM
+
+.DATA
+ALIGN 16
+Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
+Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
+Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
+Lcon1 dd 1,1,1,1
+Lcon2 dd 1bh,1bh,1bh,1bh
+
+.CODE
+
+ctx     textequ <ecx>
+output  textequ <edx>
+input   textequ <eax>
+inputLen textequ <edi>
+
+
+aes_rnd MACRO i
+    movdqu  xmm7, [i*16 + ctx]
+    aesenc  xmm0, xmm7
+    aesenc  xmm1, xmm7
+    aesenc  xmm2, xmm7
+    aesenc  xmm3, xmm7
+    aesenc  xmm4, xmm7
+    aesenc  xmm5, xmm7
+    aesenc  xmm6, xmm7
+    ENDM
+
+aes_last_rnd MACRO i
+    movdqu  xmm7, [i*16 + ctx]
+    aesenclast  xmm0, xmm7
+    aesenclast  xmm1, xmm7
+    aesenclast  xmm2, xmm7
+    aesenclast  xmm3, xmm7
+    aesenclast  xmm4, xmm7
+    aesenclast  xmm5, xmm7
+    aesenclast  xmm6, xmm7
+    ENDM
+
+aes_dec_rnd MACRO i
+    movdqu  xmm7, [i*16 + ctx]
+    aesdec  xmm0, xmm7
+    aesdec  xmm1, xmm7
+    aesdec  xmm2, xmm7
+    aesdec  xmm3, xmm7
+    aesdec  xmm4, xmm7
+    aesdec  xmm5, xmm7
+    aesdec  xmm6, xmm7
+    ENDM
+
+aes_dec_last_rnd MACRO i
+    movdqu  xmm7, [i*16 + ctx]
+    aesdeclast  xmm0, xmm7
+    aesdeclast  xmm1, xmm7
+    aesdeclast  xmm2, xmm7
+    aesdeclast  xmm3, xmm7
+    aesdeclast  xmm4, xmm7
+    aesdeclast  xmm5, xmm7
+    aesdeclast  xmm6, xmm7
+    ENDM
+
+
+gen_aes_ecb_func MACRO enc, rnds
+
+LOCAL   loop7
+LOCAL   loop1
+LOCAL   bail
+
+        push    inputLen
+
+        mov     ctx,    [esp + 2*4 + 0*4]
+        mov     output,     [esp + 2*4 + 1*4]
+        mov     input,      [esp + 2*4 + 4*4]
+        mov     inputLen,   [esp + 2*4 + 5*4]
+
+        lea     ctx, [44+ctx]
+
+loop7:
+        cmp     inputLen, 7*16
+        jb      loop1
+
+        movdqu  xmm0, [0*16 + input]
+        movdqu  xmm1, [1*16 + input]
+        movdqu  xmm2, [2*16 + input]
+        movdqu  xmm3, [3*16 + input]
+        movdqu  xmm4, [4*16 + input]
+        movdqu  xmm5, [5*16 + input]
+        movdqu  xmm6, [6*16 + input]
+
+        movdqu  xmm7, [0*16 + ctx]
+        pxor    xmm0, xmm7
+        pxor    xmm1, xmm7
+        pxor    xmm2, xmm7
+        pxor    xmm3, xmm7
+        pxor    xmm4, xmm7
+        pxor    xmm5, xmm7
+        pxor    xmm6, xmm7
+
+IF enc eq 1
+        rnd textequ <aes_rnd>
+        lastrnd textequ <aes_last_rnd>
+        aesinst textequ <aesenc>
+        aeslastinst textequ <aesenclast>
+ELSE
+        rnd textequ <aes_dec_rnd>
+        lastrnd textequ <aes_dec_last_rnd>
+        aesinst textequ <aesdec>
+        aeslastinst textequ <aesdeclast>
+ENDIF
+
+        i = 1
+        WHILE i LT rnds
+            rnd i
+            i = i+1
+            ENDM
+        lastrnd rnds
+
+        movdqu  [0*16 + output], xmm0
+        movdqu  [1*16 + output], xmm1
+        movdqu  [2*16 + output], xmm2
+        movdqu  [3*16 + output], xmm3
+        movdqu  [4*16 + output], xmm4
+        movdqu  [5*16 + output], xmm5
+        movdqu  [6*16 + output], xmm6
+
+        lea input, [7*16 + input]
+        lea output, [7*16 + output]
+        sub inputLen, 7*16
+        jmp loop7
+
+loop1:
+        cmp     inputLen, 1*16
+        jb      bail
+
+        movdqu  xmm0, [input]
+        movdqu  xmm7, [0*16 + ctx]
+        pxor    xmm0, xmm7
+
+        i = 1
+    WHILE i LT rnds
+            movdqu  xmm7, [i*16 + ctx]
+            aesinst  xmm0, xmm7
+            i = i+1
+        ENDM
+        movdqu  xmm7, [rnds*16 + ctx]
+        aeslastinst xmm0, xmm7
+
+        movdqu  [output], xmm0
+
+        lea input, [1*16 + input]
+        lea output, [1*16 + output]
+        sub inputLen, 1*16
+        jmp loop1
+
+bail:
+        xor eax, eax
+        pop     inputLen
+        ret
+
+ENDM
+
+ALIGN 16
+intel_aes_encrypt_ecb_128 PROC
+gen_aes_ecb_func 1, 10
+intel_aes_encrypt_ecb_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ecb_192 PROC
+gen_aes_ecb_func 1, 12
+intel_aes_encrypt_ecb_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ecb_256 PROC
+gen_aes_ecb_func 1, 14
+intel_aes_encrypt_ecb_256 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_128 PROC
+gen_aes_ecb_func 0, 10
+intel_aes_decrypt_ecb_128 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_192 PROC
+gen_aes_ecb_func 0, 12
+intel_aes_decrypt_ecb_192 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_256 PROC
+gen_aes_ecb_func 0, 14
+intel_aes_decrypt_ecb_256 ENDP
+
+
+KEY textequ <ecx>
+KS  textequ <edx>
+ITR textequ <eax>
+
+ALIGN 16
+intel_aes_encrypt_init_128  PROC
+
+    mov     KEY,        [esp + 1*4 + 0*4]
+    mov     KS,         [esp + 1*4 + 1*4]
+
+
+    movdqu  xmm1, [KEY]
+    movdqu  [KS], xmm1
+    movdqa  xmm2, xmm1
+
+    lea ITR, Lcon1
+    movdqa  xmm0, [ITR]
+    lea ITR, Lmask
+    movdqa  xmm4, [ITR]
+
+    mov ITR, 8
+
+Lenc_128_ks_loop:
+        lea KS, [16 + KS]
+        dec ITR
+
+        pshufb  xmm2, xmm4
+        aesenclast  xmm2, xmm0
+        pslld   xmm0, 1
+        movdqa  xmm3, xmm1
+        pslldq  xmm3, 4
+        pxor    xmm1, xmm3
+        pslldq  xmm3, 4
+        pxor    xmm1, xmm3
+        pslldq  xmm3, 4
+        pxor    xmm1, xmm3
+        pxor    xmm1, xmm2
+        movdqu  [KS], xmm1
+        movdqa  xmm2, xmm1
+
+        jne Lenc_128_ks_loop
+
+    lea ITR, Lcon2
+    movdqa  xmm0, [ITR]
+
+    pshufb  xmm2, xmm4
+    aesenclast  xmm2, xmm0
+    pslld   xmm0, 1
+    movdqa  xmm3, xmm1
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pxor    xmm1, xmm2
+    movdqu  [16 + KS], xmm1
+    movdqa  xmm2, xmm1
+
+    pshufb  xmm2, xmm4
+    aesenclast  xmm2, xmm0
+    movdqa  xmm3, xmm1
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pslldq  xmm3, 4
+    pxor    xmm1, xmm3
+    pxor    xmm1, xmm2
+    movdqu  [32 + KS], xmm1
+    movdqa  xmm2, xmm1
+
+    ret
+intel_aes_encrypt_init_128  ENDP
+
+
+ALIGN 16
+intel_aes_decrypt_init_128  PROC
+
+    mov     KEY,        [esp + 1*4 + 0*4]
+    mov     KS,         [esp + 1*4 + 1*4]
+
+    push    KS
+    push    KEY
+
+    call    intel_aes_encrypt_init_128
+
+    pop     KEY
+    pop     KS
+
+    movdqu  xmm0, [0*16 + KS]
+    movdqu  xmm1, [10*16 + KS]
+    movdqu  [10*16 + KS], xmm0
+    movdqu  [0*16 + KS], xmm1
+
+    i = 1
+    WHILE i LT 5
+        movdqu  xmm0, [i*16 + KS]
+        movdqu  xmm1, [(10-i)*16 + KS]
+
+        aesimc  xmm0, xmm0
+        aesimc  xmm1, xmm1
+
+        movdqu  [(10-i)*16 + KS], xmm0
+        movdqu  [i*16 + KS], xmm1
+
+        i = i+1
+    ENDM
+
+    movdqu  xmm0, [5*16 + KS]
+    aesimc  xmm0, xmm0
+    movdqu  [5*16 + KS], xmm0
+    ret
+intel_aes_decrypt_init_128  ENDP
+
+
+ALIGN 16
+intel_aes_encrypt_init_192  PROC
+
+    mov     KEY, [esp + 1*4 + 0*4]
+    mov     KS,  [esp + 1*4 + 1*4]
+
+    pxor    xmm3, xmm3
+    movdqu  xmm1, [KEY]
+    pinsrd  xmm3, DWORD PTR [16 + KEY], 0
+    pinsrd  xmm3, DWORD PTR [20 + KEY], 1
+
+    movdqu  [KS], xmm1
+    movdqa  xmm5, xmm3
+
+    lea ITR, Lcon1
+    movdqu  xmm0, [ITR]
+    lea ITR, Lmask192
+    movdqu  xmm4, [ITR]
+
+    mov ITR, 4
+
+Lenc_192_ks_loop:
+        movdqa  xmm2, xmm3
+        pshufb  xmm2, xmm4
+        aesenclast xmm2, xmm0
+        pslld   xmm0, 1
+
+        movdqa  xmm6, xmm1
+        movdqa  xmm7, xmm3
+        pslldq  xmm6, 4
+        pslldq  xmm7, 4
+        pxor    xmm1, xmm6
+        pxor    xmm3, xmm7
+        pslldq  xmm6, 4
+        pxor    xmm1, xmm6
+        pslldq  xmm6, 4
+        pxor    xmm1, xmm6
+        pxor    xmm1, xmm2
+        pshufd  xmm2, xmm1, 0ffh
+        pxor    xmm3, xmm2
+
+        movdqa  xmm6, xmm1
+        shufpd  xmm5, xmm1, 00h
+        shufpd  xmm6, xmm3, 01h
+
+        movdqu  [16 + KS], xmm5
+        movdqu  [32 + KS], xmm6
+
+        movdqa  xmm2, xmm3
+        pshufb  xmm2, xmm4
+        aesenclast  xmm2, xmm0
+        pslld   xmm0, 1
+
+        movdqa  xmm6, xmm1
+        movdqa  xmm7, xmm3
+        pslldq  xmm6, 4
+        pslldq  xmm7, 4
+        pxor    xmm1, xmm6
+        pxor    xmm3, xmm7
+        pslldq  xmm6, 4
+        pxor    xmm1, xmm6
+        pslldq  xmm6, 4
+        pxor    xmm1, xmm6
+        pxor    xmm1, xmm2
+        pshufd  xmm2, xmm1, 0ffh
+        pxor    xmm3, xmm2
+
+        movdqu  [48 + KS], xmm1
+        movdqa  xmm5, xmm3
+
+        lea KS, [48 + KS]
+
+        dec ITR
+        jnz Lenc_192_ks_loop
+
+    movdqu  [16 + KS], xmm5
+ret
+intel_aes_encrypt_init_192  ENDP
+
+ALIGN 16
+intel_aes_decrypt_init_192  PROC
+    mov     KEY,        [esp + 1*4 + 0*4]
+    mov     KS,         [esp + 1*4 + 1*4]
+
+    push    KS
+    push    KEY
+
+    call    intel_aes_encrypt_init_192
+
+    pop     KEY
+    pop     KS
+
+    movdqu  xmm0, [0*16 + KS]
+    movdqu  xmm1, [12*16 + KS]
+    movdqu  [12*16 + KS], xmm0
+    movdqu  [0*16 + KS], xmm1
+
+    i = 1
+    WHILE i LT 6
+        movdqu  xmm0, [i*16 + KS]
+        movdqu  xmm1, [(12-i)*16 + KS]
+
+        aesimc  xmm0, xmm0
+        aesimc  xmm1, xmm1
+
+        movdqu  [(12-i)*16 + KS], xmm0
+        movdqu  [i*16 + KS], xmm1
+
+        i = i+1
+    ENDM
+
+    movdqu  xmm0, [6*16 + KS]
+    aesimc  xmm0, xmm0
+    movdqu  [6*16 + KS], xmm0
+    ret
+intel_aes_decrypt_init_192  ENDP
+
+ALIGN 16
+intel_aes_encrypt_init_256  PROC
+
+    mov     KEY,    [esp + 1*4 + 0*4]
+    mov     KS,     [esp + 1*4 + 1*4]
+    movdqu  xmm1, [16*0 + KEY]
+    movdqu  xmm3, [16*1 + KEY]
+
+    movdqu  [16*0 + KS], xmm1
+    movdqu  [16*1 + KS], xmm3
+
+    lea ITR, Lcon1
+    movdqu  xmm0, [ITR]
+    lea ITR, Lmask256
+    movdqu  xmm5, [ITR]
+
+    pxor    xmm6, xmm6
+
+    mov ITR, 6
+
+Lenc_256_ks_loop:
+
+        movdqa  xmm2, xmm3
+        pshufb  xmm2, xmm5
+        aesenclast  xmm2, xmm0
+        pslld   xmm0, 1
+        movdqa  xmm4, xmm1
+        pslldq  xmm4, 4
+        pxor    xmm1, xmm4
+        pslldq  xmm4, 4
+        pxor    xmm1, xmm4
+        pslldq  xmm4, 4
+        pxor    xmm1, xmm4
+        pxor    xmm1, xmm2
+        movdqu  [16*2 + KS], xmm1
+
+        pshufd  xmm2, xmm1, 0ffh
+        aesenclast  xmm2, xmm6
+        movdqa  xmm4, xmm3
+        pslldq  xmm4, 4
+        pxor    xmm3, xmm4
+        pslldq  xmm4, 4
+        pxor    xmm3, xmm4
+        pslldq  xmm4, 4
+        pxor    xmm3, xmm4
+        pxor    xmm3, xmm2
+        movdqu  [16*3 + KS], xmm3
+
+        lea KS, [32 + KS]
+        dec ITR
+        jnz Lenc_256_ks_loop
+
+    movdqa  xmm2, xmm3
+    pshufb  xmm2, xmm5
+    aesenclast  xmm2, xmm0
+    movdqa  xmm4, xmm1
+    pslldq  xmm4, 4
+    pxor    xmm1, xmm4
+    pslldq  xmm4, 4
+    pxor    xmm1, xmm4
+    pslldq  xmm4, 4
+    pxor    xmm1, xmm4
+    pxor    xmm1, xmm2
+    movdqu  [16*2 + KS], xmm1
+
+    ret
+intel_aes_encrypt_init_256  ENDP
+
+ALIGN 16
+intel_aes_decrypt_init_256  PROC
+    mov     KEY,        [esp + 1*4 + 0*4]
+    mov     KS,         [esp + 1*4 + 1*4]
+
+    push    KS
+    push    KEY
+
+    call    intel_aes_encrypt_init_256
+
+    pop     KEY
+    pop     KS
+
+    movdqu  xmm0, [0*16 + KS]
+    movdqu  xmm1, [14*16 + KS]
+    movdqu  [14*16 + KS], xmm0
+    movdqu  [0*16 + KS], xmm1
+
+    i = 1
+    WHILE i LT 7
+        movdqu  xmm0, [i*16 + KS]
+        movdqu  xmm1, [(14-i)*16 + KS]
+
+        aesimc  xmm0, xmm0
+        aesimc  xmm1, xmm1
+
+        movdqu  [(14-i)*16 + KS], xmm0
+        movdqu  [i*16 + KS], xmm1
+
+        i = i+1
+    ENDM
+
+    movdqu  xmm0, [7*16 + KS]
+    aesimc  xmm0, xmm0
+    movdqu  [7*16 + KS], xmm0
+    ret
+intel_aes_decrypt_init_256  ENDP
+
+
+
+gen_aes_cbc_enc_func MACRO rnds
+
+LOCAL   loop1
+LOCAL   bail
+
+        push    inputLen
+
+        mov     ctx,    [esp + 2*4 + 0*4]
+        mov     output,     [esp + 2*4 + 1*4]
+        mov     input,      [esp + 2*4 + 4*4]
+        mov     inputLen,   [esp + 2*4 + 5*4]
+
+        lea     ctx, [44+ctx]
+
+        movdqu  xmm0, [-32+ctx]
+
+        movdqu  xmm2, [0*16 + ctx]
+        movdqu  xmm3, [1*16 + ctx]
+        movdqu  xmm4, [2*16 + ctx]
+        movdqu  xmm5, [3*16 + ctx]
+        movdqu  xmm6, [4*16 + ctx]
+
+loop1:
+        cmp     inputLen, 1*16
+        jb      bail
+
+        movdqu  xmm1, [input]
+        pxor    xmm1, xmm2
+        pxor    xmm0, xmm1
+
+        aesenc  xmm0, xmm3
+        aesenc  xmm0, xmm4
+        aesenc  xmm0, xmm5
+        aesenc  xmm0, xmm6
+
+        i = 5
+    WHILE i LT rnds
+            movdqu  xmm7, [i*16 + ctx]
+            aesenc  xmm0, xmm7
+            i = i+1
+        ENDM
+        movdqu  xmm7, [rnds*16 + ctx]
+        aesenclast xmm0, xmm7
+
+        movdqu  [output], xmm0
+
+        lea input, [1*16 + input]
+        lea output, [1*16 + output]
+        sub inputLen, 1*16
+        jmp loop1
+
+bail:
+        movdqu  [-32+ctx], xmm0
+
+        xor eax, eax
+        pop inputLen
+        ret
+
+ENDM
+
+gen_aes_cbc_dec_func MACRO rnds
+
+LOCAL   loop7
+LOCAL   loop1
+LOCAL   dec1
+LOCAL   bail
+
+        push    inputLen
+
+        mov     ctx,    [esp + 2*4 + 0*4]
+        mov     output,     [esp + 2*4 + 1*4]
+        mov     input,      [esp + 2*4 + 4*4]
+        mov     inputLen,   [esp + 2*4 + 5*4]
+
+        lea     ctx, [44+ctx]
+
+loop7:
+        cmp     inputLen, 7*16
+        jb      dec1
+
+        movdqu  xmm0, [0*16 + input]
+        movdqu  xmm1, [1*16 + input]
+        movdqu  xmm2, [2*16 + input]
+        movdqu  xmm3, [3*16 + input]
+        movdqu  xmm4, [4*16 + input]
+        movdqu  xmm5, [5*16 + input]
+        movdqu  xmm6, [6*16 + input]
+
+        movdqu  xmm7, [0*16 + ctx]
+        pxor    xmm0, xmm7
+        pxor    xmm1, xmm7
+        pxor    xmm2, xmm7
+        pxor    xmm3, xmm7
+        pxor    xmm4, xmm7
+        pxor    xmm5, xmm7
+        pxor    xmm6, xmm7
+
+        i = 1
+        WHILE i LT rnds
+            aes_dec_rnd i
+            i = i+1
+            ENDM
+        aes_dec_last_rnd rnds
+
+        movdqu  xmm7, [-32 + ctx]
+        pxor    xmm0, xmm7
+        movdqu  xmm7, [0*16 + input]
+        pxor    xmm1, xmm7
+        movdqu  xmm7, [1*16 + input]
+        pxor    xmm2, xmm7
+        movdqu  xmm7, [2*16 + input]
+        pxor    xmm3, xmm7
+        movdqu  xmm7, [3*16 + input]
+        pxor    xmm4, xmm7
+        movdqu  xmm7, [4*16 + input]
+        pxor    xmm5, xmm7
+        movdqu  xmm7, [5*16 + input]
+        pxor    xmm6, xmm7
+        movdqu  xmm7, [6*16 + input]
+
+        movdqu  [0*16 + output], xmm0
+        movdqu  [1*16 + output], xmm1
+        movdqu  [2*16 + output], xmm2
+        movdqu  [3*16 + output], xmm3
+        movdqu  [4*16 + output], xmm4
+        movdqu  [5*16 + output], xmm5
+        movdqu  [6*16 + output], xmm6
+        movdqu  [-32 + ctx], xmm7
+
+        lea input, [7*16 + input]
+        lea output, [7*16 + output]
+        sub inputLen, 7*16
+        jmp loop7
+dec1:
+
+        movdqu  xmm3, [-32 + ctx]
+
+loop1:
+        cmp     inputLen, 1*16
+        jb      bail
+
+        movdqu  xmm0, [input]
+        movdqa  xmm4, xmm0
+        movdqu  xmm7, [0*16 + ctx]
+        pxor    xmm0, xmm7
+
+        i = 1
+    WHILE i LT rnds
+            movdqu  xmm7, [i*16 + ctx]
+            aesdec  xmm0, xmm7
+            i = i+1
+        ENDM
+        movdqu  xmm7, [rnds*16 + ctx]
+        aesdeclast xmm0, xmm7
+        pxor    xmm3, xmm0
+
+        movdqu  [output], xmm3
+        movdqa  xmm3, xmm4
+
+        lea input, [1*16 + input]
+        lea output, [1*16 + output]
+        sub inputLen, 1*16
+        jmp loop1
+
+bail:
+        movdqu  [-32 + ctx], xmm3
+        xor eax, eax
+        pop     inputLen
+        ret
+ENDM
+
+ALIGN 16
+intel_aes_encrypt_cbc_128 PROC
+gen_aes_cbc_enc_func  10
+intel_aes_encrypt_cbc_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_cbc_192 PROC
+gen_aes_cbc_enc_func  12
+intel_aes_encrypt_cbc_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_cbc_256 PROC
+gen_aes_cbc_enc_func  14
+intel_aes_encrypt_cbc_256 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_128 PROC
+gen_aes_cbc_dec_func  10
+intel_aes_decrypt_cbc_128 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_192 PROC
+gen_aes_cbc_dec_func  12
+intel_aes_decrypt_cbc_192 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_256 PROC
+gen_aes_cbc_dec_func  14
+intel_aes_decrypt_cbc_256 ENDP
+
+
+
+ctrCtx textequ <esi>
+CTR textequ <ebx>
+
+gen_aes_ctr_func MACRO rnds
+
+LOCAL   loop7
+LOCAL   loop1
+LOCAL   enc1
+LOCAL   bail
+
+        push    inputLen
+        push    ctrCtx
+        push    CTR
+        push    ebp
+
+        mov     ctrCtx, [esp + 4*5 + 0*4]
+        mov     output, [esp + 4*5 + 1*4]
+        mov     input,  [esp + 4*5 + 4*4]
+        mov     inputLen, [esp + 4*5 + 5*4]
+
+        mov     ctx, [4+ctrCtx]
+        lea     ctx, [44+ctx]
+
+        mov     ebp, esp
+        sub     esp, 7*16
+        and     esp, -16
+
+        movdqu  xmm0, [8+ctrCtx]
+        mov     ctrCtx, [ctrCtx + 8 + 3*4]
+        bswap   ctrCtx
+        movdqu  xmm1, [ctx + 0*16]
+
+        pxor    xmm0, xmm1
+
+        movdqa  [esp + 0*16], xmm0
+        movdqa  [esp + 1*16], xmm0
+        movdqa  [esp + 2*16], xmm0
+        movdqa  [esp + 3*16], xmm0
+        movdqa  [esp + 4*16], xmm0
+        movdqa  [esp + 5*16], xmm0
+        movdqa  [esp + 6*16], xmm0
+
+        inc     ctrCtx
+        mov     CTR, ctrCtx
+        bswap   CTR
+        xor     CTR, [ctx + 3*4]
+        mov     [esp + 1*16 + 3*4], CTR
+
+        inc     ctrCtx
+        mov     CTR, ctrCtx
+        bswap   CTR
+        xor     CTR, [ctx + 3*4]
+        mov     [esp + 2*16 + 3*4], CTR
+
+        inc     ctrCtx
+        mov     CTR, ctrCtx
+        bswap   CTR
+        xor     CTR, [ctx + 3*4]
+        mov     [esp + 3*16 + 3*4], CTR
+
+        inc     ctrCtx
+        mov     CTR, ctrCtx
+        bswap   CTR
+        xor     CTR, [ctx + 3*4]
+        mov     [esp + 4*16 + 3*4], CTR
+
+        inc     ctrCtx
+        mov     CTR, ctrCtx
+        bswap   CTR
+        xor     CTR, [ctx + 3*4]
+        mov     [esp + 5*16 + 3*4], CTR
+
+        inc     ctrCtx
+        mov     CTR, ctrCtx
+        bswap   CTR
+        xor     CTR, [ctx + 3*4]
+        mov     [esp + 6*16 + 3*4], CTR
+
+
+loop7:
+        cmp     inputLen, 7*16
+        jb      loop1
+
+        movdqu  xmm0, [0*16 + esp]
+        movdqu  xmm1, [1*16 + esp]
+        movdqu  xmm2, [2*16 + esp]
+        movdqu  xmm3, [3*16 + esp]
+        movdqu  xmm4, [4*16 + esp]
+        movdqu  xmm5, [5*16 + esp]
+        movdqu  xmm6, [6*16 + esp]
+
+        i = 1
+        WHILE i LE 7
+            aes_rnd i
+
+            inc     ctrCtx
+            mov     CTR, ctrCtx
+            bswap   CTR
+            xor     CTR, [ctx + 3*4]
+            mov     [esp + (i-1)*16 + 3*4], CTR
+
+            i = i+1
+        ENDM
+        WHILE i LT rnds
+            aes_rnd i
+            i = i+1
+            ENDM
+        aes_last_rnd rnds
+
+        movdqu  xmm7, [0*16 + input]
+        pxor    xmm0, xmm7
+        movdqu  xmm7, [1*16 + input]
+        pxor    xmm1, xmm7
+        movdqu  xmm7, [2*16 + input]
+        pxor    xmm2, xmm7
+        movdqu  xmm7, [3*16 + input]
+        pxor    xmm3, xmm7
+        movdqu  xmm7, [4*16 + input]
+        pxor    xmm4, xmm7
+        movdqu  xmm7, [5*16 + input]
+        pxor    xmm5, xmm7
+        movdqu  xmm7, [6*16 + input]
+        pxor    xmm6, xmm7
+
+        movdqu  [0*16 + output], xmm0
+        movdqu  [1*16 + output], xmm1
+        movdqu  [2*16 + output], xmm2
+        movdqu  [3*16 + output], xmm3
+        movdqu  [4*16 + output], xmm4
+        movdqu  [5*16 + output], xmm5
+        movdqu  [6*16 + output], xmm6
+
+        lea input, [7*16 + input]
+        lea output, [7*16 + output]
+        sub inputLen, 7*16
+        jmp loop7
+
+
+loop1:
+        cmp     inputLen, 1*16
+        jb      bail
+
+        movdqu  xmm0, [esp]
+        add     esp, 16
+
+        i = 1
+    WHILE i LT rnds
+            movdqu  xmm7, [i*16 + ctx]
+            aesenc  xmm0, xmm7
+            i = i+1
+        ENDM
+        movdqu  xmm7, [rnds*16 + ctx]
+        aesenclast xmm0, xmm7
+
+        movdqu  xmm7, [input]
+        pxor    xmm0, xmm7
+        movdqu  [output], xmm0
+
+        lea input, [1*16 + input]
+        lea output, [1*16 + output]
+        sub inputLen, 1*16
+        jmp loop1
+
+bail:
+
+        mov     ctrCtx, [ebp + 4*5 + 0*4]
+        movdqu  xmm0, [esp]
+        movdqu  xmm1, [ctx + 0*16]
+        pxor    xmm0, xmm1
+        movdqu  [8+ctrCtx], xmm0
+
+
+        xor     eax, eax
+        mov     esp, ebp
+        pop     ebp
+        pop     CTR
+        pop     ctrCtx
+        pop     inputLen
+        ret
+ENDM
+
+
+ALIGN 16
+intel_aes_encrypt_ctr_128 PROC
+gen_aes_ctr_func  10
+intel_aes_encrypt_ctr_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ctr_192 PROC
+gen_aes_ctr_func  12
+intel_aes_encrypt_ctr_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ctr_256 PROC
+gen_aes_ctr_func  14
+intel_aes_encrypt_ctr_256 ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-aes.h b/security/nss/lib/freebl/intel-aes.h
new file mode 100644
index 000000000..d5bd2d8ca
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes.h
@@ -0,0 +1,143 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Prototypes of the functions defined in the assembler file.  */
+void intel_aes_encrypt_init_128(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_encrypt_init_192(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_encrypt_init_256(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_128(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_192(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_256(const unsigned char *key, PRUint32 *expanded);
+SECStatus intel_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_128(CTRContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_192(CTRContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_256(CTRContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen,
+                                    unsigned int blocksize);
+
+#define intel_aes_ecb_worker(encrypt, keysize)                             \
+    ((encrypt)                                                             \
+         ? ((keysize) == 16 ? intel_aes_encrypt_ecb_128                    \
+                            : (keysize) == 24 ? intel_aes_encrypt_ecb_192  \
+                                              : intel_aes_encrypt_ecb_256) \
+         : ((keysize) == 16 ? intel_aes_decrypt_ecb_128                    \
+                            : (keysize) == 24 ? intel_aes_decrypt_ecb_192  \
+                                              : intel_aes_decrypt_ecb_256))
+
+#define intel_aes_cbc_worker(encrypt, keysize)                             \
+    ((encrypt)                                                             \
+         ? ((keysize) == 16 ? intel_aes_encrypt_cbc_128                    \
+                            : (keysize) == 24 ? intel_aes_encrypt_cbc_192  \
+                                              : intel_aes_encrypt_cbc_256) \
+         : ((keysize) == 16 ? intel_aes_decrypt_cbc_128                    \
+                            : (keysize) == 24 ? intel_aes_decrypt_cbc_192  \
+                                              : intel_aes_decrypt_cbc_256))
+
+#define intel_aes_ctr_worker(nr)                         \
+    ((nr) == 10 ? intel_aes_encrypt_ctr_128              \
+                : (nr) == 12 ? intel_aes_encrypt_ctr_192 \
+                             : intel_aes_encrypt_ctr_256)
+
+#define intel_aes_init(encrypt, keysize)                          \
+    do {                                                          \
+        if (encrypt) {                                            \
+            if (keysize == 16)                                    \
+                intel_aes_encrypt_init_128(key, cx->expandedKey); \
+            else if (keysize == 24)                               \
+                intel_aes_encrypt_init_192(key, cx->expandedKey); \
+            else                                                  \
+                intel_aes_encrypt_init_256(key, cx->expandedKey); \
+        } else {                                                  \
+            if (keysize == 16)                                    \
+                intel_aes_decrypt_init_128(key, cx->expandedKey); \
+            else if (keysize == 24)                               \
+                intel_aes_decrypt_init_192(key, cx->expandedKey); \
+            else                                                  \
+                intel_aes_decrypt_init_256(key, cx->expandedKey); \
+        }                                                         \
+    } while (0)
diff --git a/security/nss/lib/freebl/intel-aes.s b/security/nss/lib/freebl/intel-aes.s
new file mode 100644
index 000000000..2dfcfa15b
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes.s
@@ -0,0 +1,2514 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+	.text
+
+#define IV_OFFSET 16
+#define EXPANDED_KEY_OFFSET 48
+
+/*
+ * Warning: the length values used in this module are "unsigned int"
+ * in C, which is 32-bit.  When they're passed in registers, use only
+ * the low 32 bits, because the top half is unspecified.
+ *
+ * This is called from C code, so the contents of those bits can
+ * depend on the C compiler's optimization decisions.  This means that
+ * mistakes might not be obvious in testing if those bits happen to be
+ * zero in your build.
+ *
+ * Exception: 32-bit lea instructions use a 64-bit address because the
+ * address size doesn't affect the result, and that form is more
+ * compactly encoded and preferred by compilers over a 32-bit address.
+ */
+
+/* in %rdi : the key
+   in %rsi : buffer for expanded key
+*/
+	.type intel_aes_encrypt_init_128,@function
+	.globl intel_aes_encrypt_init_128
+	.align	16
+intel_aes_encrypt_init_128:
+	movups	(%rdi), %xmm1
+	movups	%xmm1, (%rsi)
+	leaq	16(%rsi), %rsi
+	xorl	%eax, %eax
+
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01	/* aeskeygenassist $0x01, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02	/* aeskeygenassist $0x02, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04	/* aeskeygenassist $0x04, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08	/* aeskeygenassist $0x08, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10	/* aeskeygenassist $0x10, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20	/* aeskeygenassist $0x20, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40	/* aeskeygenassist $0x40, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80	/* aeskeygenassist $0x80, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b	/* aeskeygenassist $0x1b, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36	/* aeskeygenassist $0x36, %xmm1, %xmm2 */
+	call key_expansion128
+
+	ret
+	.size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
+
+
+/* in %rdi : the key
+   in %rsi : buffer for expanded key
+*/
+	.type intel_aes_decrypt_init_128,@function
+	.globl intel_aes_decrypt_init_128
+	.align	16
+intel_aes_decrypt_init_128:
+	movups	(%rdi), %xmm1
+	movups	%xmm1, (%rsi)
+	leaq	16(%rsi), %rsi
+	xorl	%eax, %eax
+
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01	/* aeskeygenassist $0x01, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02	/* aeskeygenassist $0x02, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04	/* aeskeygenassist $0x04, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08	/* aeskeygenassist $0x08, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10	/* aeskeygenassist $0x10, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20	/* aeskeygenassist $0x20, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40	/* aeskeygenassist $0x40, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80	/* aeskeygenassist $0x80, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b	/* aeskeygenassist $0x1b, %xmm1, %xmm2 */
+	call key_expansion128
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36	/* aeskeygenassist $0x36, %xmm1, %xmm2 */
+	call key_expansion128
+
+	ret
+	.size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
+
+
+	.type key_expansion128,@function
+	.align	16
+key_expansion128:
+	movd	%eax, %xmm3
+	pshufd	$0xff, %xmm2, %xmm2
+	shufps	$0x10, %xmm1, %xmm3
+	pxor	%xmm3, %xmm1
+	shufps	$0x8c, %xmm1, %xmm3
+	pxor	%xmm2, %xmm1
+	pxor	%xmm3, %xmm1
+	movdqu	%xmm1, (%rsi)
+	addq	$16, %rsi
+	ret
+	.size key_expansion128, .-key_expansion128
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_encrypt_ecb_128,@function
+	.globl intel_aes_encrypt_ecb_128
+	.align	16
+intel_aes_encrypt_ecb_128:
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdi), %xmm2
+	movdqu	160(%rdi), %xmm12
+	xor	%eax, %eax
+//	cmpl	$8*16, %r9d
+	cmpl	$128, %r9d
+	jb	1f
+//	leal	-8*16(%r9), %r11d
+	leal	-128(%r9), %r11d
+2:	movdqu	(%r8, %rax), %xmm3
+	movdqu	16(%r8, %rax), %xmm4
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm2, %xmm3
+	pxor	%xmm2, %xmm4
+	pxor	%xmm2, %xmm5
+	pxor	%xmm2, %xmm6
+	pxor	%xmm2, %xmm7
+	pxor	%xmm2, %xmm8
+	pxor	%xmm2, %xmm9
+	pxor	%xmm2, %xmm10
+
+// complete loop unrolling
+	movdqu 16(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 144(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xdc 	/* aesenclast 	%xmm12, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 	/* aesenclast 	%xmm12, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xec 	/* aesenclast 	%xmm12, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 	/* aesenclast 	%xmm12, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xfc 	/* aesenclast 	%xmm12, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 	/* aesenclast 	%xmm12, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xcc 	/* aesenclast 	%xmm12, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 	/* aesenclast 	%xmm12, %xmm10 */
+
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+//	addl	$8*16, %eax
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	16(%rdi), %xmm3
+	movdqu	32(%rdi), %xmm4
+	movdqu	48(%rdi), %xmm5
+	movdqu	64(%rdi), %xmm6
+	movdqu	80(%rdi), %xmm7
+	movdqu	96(%rdi), %xmm8
+	movdqu	112(%rdi), %xmm9
+	movdqu	128(%rdi), %xmm10
+	movdqu	144(%rdi), %xmm11
+
+4:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm2, %xmm1
+	.byte 0x66,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xce	/* aesenc	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcf	/* aesenc	%xmm7, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8	/* aesenc	%xmm8, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xca	/* aesenc	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc	/* aesenclast %xmm12, %xmm1 */
+	movdqu	%xmm1, (%rsi, %rax)
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	xor	%eax, %eax
+	ret
+	.size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_decrypt_ecb_128,@function
+	.globl intel_aes_decrypt_ecb_128
+	.align	16
+intel_aes_decrypt_ecb_128:
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdi), %xmm2
+	movdqu	160(%rdi), %xmm12
+	xorl	%eax, %eax
+//	cmpl	$8*16, %r9d
+	cmpl	$128, %r9d
+	jb	1f
+//	leal	-8*16(%r9), %r11d
+	leal	-128(%r9), %r11d
+2:	movdqu	(%r8, %rax), %xmm3
+	movdqu	16(%r8, %rax), %xmm4
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm12, %xmm3
+	pxor	%xmm12, %xmm4
+	pxor	%xmm12, %xmm5
+	pxor	%xmm12, %xmm6
+	pxor	%xmm12, %xmm7
+	pxor	%xmm12, %xmm8
+	pxor	%xmm12, %xmm9
+	pxor	%xmm12, %xmm10
+
+// complete loop unrolling
+	movdqu 144(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 16(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x0f,0x38,0xdf,0xda		/* aesdeclast 	%xmm2, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdf,0xe2		/* aesdeclast 	%xmm2, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdf,0xea		/* aesdeclast 	%xmm2, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdf,0xf2		/* aesdeclast 	%xmm2, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdf,0xfa		/* aesdeclast 	%xmm2, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2	/* aesdeclast 	%xmm2, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xca	/* aesdeclast 	%xmm2, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2	/* aesdeclast 	%xmm2, %xmm10 */
+
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+//	addl	$8*16, %eax
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	16(%rdi), %xmm3
+	movdqu	32(%rdi), %xmm4
+	movdqu	48(%rdi), %xmm5
+	movdqu	64(%rdi), %xmm6
+	movdqu	80(%rdi), %xmm7
+	movdqu	96(%rdi), %xmm8
+	movdqu	112(%rdi), %xmm9
+	movdqu	128(%rdi), %xmm10
+	movdqu	144(%rdi), %xmm11
+
+4:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm12, %xmm1
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xca	/* aesdec	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc8	/* aesdec	%xmm8, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcf	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xce	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdf,0xca	/* aesdeclast %xmm2, %xmm1 */
+	movdqu	%xmm1, (%rsi, %rax)
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	xor	%eax, %eax
+	ret
+	.size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_encrypt_cbc_128,@function
+	.globl intel_aes_encrypt_cbc_128
+	.align	16
+intel_aes_encrypt_cbc_128:
+	testl	%r9d, %r9d
+	je	2f
+
+//	leaq	IV_OFFSET(%rdi), %rdx
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	16(%rdi), %rdx
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdx), %xmm0
+	movdqu	(%rdi), %xmm2
+	movdqu	16(%rdi), %xmm3
+	movdqu	32(%rdi), %xmm4
+	movdqu	48(%rdi), %xmm5
+	movdqu	64(%rdi), %xmm6
+	movdqu	80(%rdi), %xmm7
+	movdqu	96(%rdi), %xmm8
+	movdqu	112(%rdi), %xmm9
+	movdqu	128(%rdi), %xmm10
+	movdqu	144(%rdi), %xmm11
+	movdqu	160(%rdi), %xmm12
+
+	xorl	%eax, %eax
+1:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm0, %xmm1
+	pxor	%xmm2, %xmm1
+	.byte 0x66,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xce	/* aesenc	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcf	/* aesenc	%xmm7, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8	/* aesenc	%xmm8, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xca	/* aesenc	%xmma, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmmb, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc	/* aesenclast %xmm12, %xmm1 */
+	movdqu	%xmm1, (%rsi, %rax)
+	movdqa	%xmm1, %xmm0
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	1b
+
+	movdqu	%xmm0, (%rdx)
+
+2:	xor	%eax, %eax
+	ret
+	.size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_decrypt_cbc_128,@function
+	.globl intel_aes_decrypt_cbc_128
+	.align	16
+intel_aes_decrypt_cbc_128:
+//	leaq	IV_OFFSET(%rdi), %rdx
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	16(%rdi), %rdx
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdx), %xmm0   /* iv */
+	movdqu	(%rdi), %xmm2   /* first key block */
+	movdqu	160(%rdi), %xmm12 /* last key block */
+	xorl	%eax, %eax
+	cmpl	$128, %r9d
+	jb	1f
+	leal	-128(%r9), %r11d
+2:	movdqu	(%r8, %rax), %xmm3 /* 1st data block */
+	movdqu	16(%r8, %rax), %xmm4 /* 2d data block */
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm12, %xmm3
+	pxor	%xmm12, %xmm4
+	pxor	%xmm12, %xmm5
+	pxor	%xmm12, %xmm6
+	pxor	%xmm12, %xmm7
+	pxor	%xmm12, %xmm8
+	pxor	%xmm12, %xmm9
+	pxor	%xmm12, %xmm10
+
+// complete loop unrolling
+	movdqu 144(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 16(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x0f,0x38,0xdf,0xda		/* aesdeclast 	%xmm2, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdf,0xe2		/* aesdeclast 	%xmm2, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdf,0xea		/* aesdeclast 	%xmm2, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdf,0xf2		/* aesdeclast 	%xmm2, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdf,0xfa		/* aesdeclast 	%xmm2, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2	/* aesdeclast 	%xmm2, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xca	/* aesdeclast 	%xmm2, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2	/* aesdeclast 	%xmm2, %xmm10 */
+
+ 	pxor	%xmm0, %xmm3
+	movdqu	(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm4
+	movdqu	16(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm5
+	movdqu	32(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm6
+	movdqu	48(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm7
+	movdqu	64(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm8
+	movdqu	80(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm9
+	movdqu	96(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm10
+	movdqu	112(%r8, %rax), %xmm0
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	16(%rdi), %xmm3
+	movdqu	32(%rdi), %xmm4
+	movdqu	48(%rdi), %xmm5
+	movdqu	64(%rdi), %xmm6
+	movdqu	80(%rdi), %xmm7
+	movdqu	96(%rdi), %xmm8
+	movdqu	112(%rdi), %xmm9
+	movdqu	128(%rdi), %xmm10
+	movdqu	144(%rdi), %xmm11
+
+4:	movdqu	(%r8, %rax), %xmm1
+	movdqa	%xmm1, %xmm13
+	pxor	%xmm12, %xmm1
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xca	/* aesdec	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc8	/* aesdec	%xmm8, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcf	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xce	/* aesdec	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdf,0xca	/* aesdeclast %xmm2, %xmm1 */
+	pxor	%xmm0, %xmm1
+	movdqu	%xmm1, (%rsi, %rax)
+	movdqa	%xmm13, %xmm0
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	movdqu	%xmm0, (%rdx)
+
+	xor	%eax, %eax
+	ret
+	.size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
+        
+/* in %rdi : the key
+   in %rsi : buffer for expanded key
+*/
+	.type intel_aes_encrypt_init_192,@function
+	.globl intel_aes_encrypt_init_192
+	.align	16
+intel_aes_encrypt_init_192:
+	movdqu	(%rdi), %xmm1
+	movq	16(%rdi), %xmm3
+	movdqu	%xmm1, (%rsi)
+	movq	%xmm3, 16(%rsi)
+	leaq	24(%rsi), %rsi
+
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01	/* aeskeygenassist $0x01, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02	/* aeskeygenassist $0x02, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04	/* aeskeygenassist $0x04, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08	/* aeskeygenassist $0x08, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10	/* aeskeygenassist $0x10, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20	/* aeskeygenassist $0x20, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40	/* aeskeygenassist $0x40, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80	/* aeskeygenassist $0x80, %xmm3, %xmm2 */
+	call key_expansion192
+
+	ret
+	.size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
+
+
+/* in %rdi : the key
+   in %rsi : buffer for expanded key
+*/
+	.type intel_aes_decrypt_init_192,@function
+	.globl intel_aes_decrypt_init_192
+	.align	16
+intel_aes_decrypt_init_192:
+	movdqu	(%rdi), %xmm1
+	movq	16(%rdi), %xmm3
+	movdqu	%xmm1, (%rsi)
+	movq	%xmm3, 16(%rsi)
+	leaq	24(%rsi), %rsi
+
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01	/* aeskeygenassist $0x01, %xmm3, %xmm2 */
+	call key_expansion192
+	movups	-32(%rsi), %xmm2
+	movups	-16(%rsi), %xmm4
+	.byte 0x66,0x0f,0x38,0xdb,0xd2	/* aesimc	%xmm2, %xmm2 */
+	.byte 0x66,0x0f,0x38,0xdb,0xe4	/* aesimc	%xmm4, %xmm4 */
+	movups	%xmm2, -32(%rsi)
+	movups	%xmm4, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02	/* aeskeygenassist $0x02, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -24(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04	/* aeskeygenassist $0x04, %xmm3, %xmm2 */
+	call key_expansion192
+	movups	-32(%rsi), %xmm2
+	movups	-16(%rsi), %xmm4
+	.byte 0x66,0x0f,0x38,0xdb,0xd2	/* aesimc	%xmm2, %xmm2 */
+	.byte 0x66,0x0f,0x38,0xdb,0xe4	/* aesimc	%xmm4, %xmm4 */
+	movups	%xmm2, -32(%rsi)
+	movups	%xmm4, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08	/* aeskeygenassist $0x08, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -24(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10	/* aeskeygenassist $0x10, %xmm3, %xmm2 */
+	call key_expansion192
+	movups	-32(%rsi), %xmm2
+	movups	-16(%rsi), %xmm4
+	.byte 0x66,0x0f,0x38,0xdb,0xd2	/* aesimc	%xmm2, %xmm2 */
+	.byte 0x66,0x0f,0x38,0xdb,0xe4	/* aesimc	%xmm4, %xmm4 */
+	movups	%xmm2, -32(%rsi)
+	movups	%xmm4, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20	/* aeskeygenassist $0x20, %xmm3, %xmm2 */
+	call key_expansion192
+	.byte 0x66,0x0f,0x38,0xdb,0xd1	/* aesimc	%xmm1, %xmm2 */
+	movups	%xmm2, -24(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40	/* aeskeygenassist $0x40, %xmm3, %xmm2 */
+	call key_expansion192
+	movups	-32(%rsi), %xmm2
+	movups	-16(%rsi), %xmm4
+	.byte 0x66,0x0f,0x38,0xdb,0xd2	/* aesimc	%xmm2, %xmm2 */
+	.byte 0x66,0x0f,0x38,0xdb,0xe4	/* aesimc	%xmm4, %xmm4 */
+	movups	%xmm2, -32(%rsi)
+	movups	%xmm4, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80	/* aeskeygenassist $0x80, %xmm3, %xmm2 */
+	call key_expansion192
+
+	ret
+	.size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
+
+
+	.type key_expansion192,@function
+	.align	16
+key_expansion192:
+	pshufd	$0x55, %xmm2, %xmm2
+	xor	%eax, %eax
+	movd	%eax, %xmm4
+	shufps	$0x10, %xmm1, %xmm4
+	pxor	%xmm4, %xmm1
+	shufps	$0x8c, %xmm1, %xmm4
+	pxor	%xmm2, %xmm1
+	pxor	%xmm4, %xmm1
+	movdqu	%xmm1, (%rsi)
+	addq	$16, %rsi
+
+	pshufd	$0xff, %xmm1, %xmm4
+	movd	%eax, %xmm5
+	shufps	$0x00, %xmm3, %xmm5
+	shufps	$0x08, %xmm3, %xmm5
+	pxor	%xmm4, %xmm3
+	pxor	%xmm5, %xmm3
+	movq	%xmm3, (%rsi)
+	addq	$8, %rsi
+	ret
+	.size key_expansion192, .-key_expansion192
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_encrypt_ecb_192,@function
+	.globl intel_aes_encrypt_ecb_192
+	.align	16
+intel_aes_encrypt_ecb_192:
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdi), %xmm2
+	movdqu	192(%rdi), %xmm14
+	xorl	%eax, %eax
+//	cmpl	$8*16, %r9d
+	cmpl	$128, %r9d
+	jb	1f
+//	leal	-8*16(%r9), %r11d
+	leal	-128(%r9), %r11d
+2:	movdqu	(%r8, %rax), %xmm3
+	movdqu	16(%r8, %rax), %xmm4
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm2, %xmm3
+	pxor	%xmm2, %xmm4
+	pxor	%xmm2, %xmm5
+	pxor	%xmm2, %xmm6
+	pxor	%xmm2, %xmm7
+	pxor	%xmm2, %xmm8
+	pxor	%xmm2, %xmm9
+	pxor	%xmm2, %xmm10
+
+// complete loop unrolling
+	movdqu 16(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 144(%rdi), %xmm1
+	movdqu 160(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 176(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xde	/* aesenclast 	%xmm14, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xe6	/* aesenclast 	%xmm14, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xee	/* aesenclast 	%xmm14, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xf6	/* aesenclast 	%xmm14, %xmm7 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xfe	/* aesenclast 	%xmm14, %xmm3 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xc6	/* aesenclast 	%xmm14, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xce	/* aesenclast 	%xmm14, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xd6	/* aesenclast 	%xmm14, %xmm10 */
+
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+//	addl	$8*16, %eax
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	16(%rdi), %xmm3
+	movdqu	32(%rdi), %xmm4
+	movdqu	48(%rdi), %xmm5
+	movdqu	64(%rdi), %xmm6
+	movdqu	80(%rdi), %xmm7
+	movdqu	96(%rdi), %xmm8
+	movdqu	112(%rdi), %xmm9
+	movdqu	128(%rdi), %xmm10
+	movdqu	144(%rdi), %xmm11
+	movdqu	160(%rdi), %xmm12
+	movdqu	176(%rdi), %xmm13
+
+4:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm2, %xmm1
+	.byte 0x66,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xce	/* aesenc	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcf	/* aesenc	%xmm7, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8	/* aesenc	%xmm8, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xca	/* aesenc	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm12, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm13, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xce	/* aesenclast %xmm14, %xmm1 */
+	movdqu	%xmm1, (%rsi, %rax)
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	xor	%eax, %eax
+	ret
+	.size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_decrypt_ecb_192,@function
+	.globl intel_aes_decrypt_ecb_192
+	.align	16
+intel_aes_decrypt_ecb_192:
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdi), %xmm2
+	movdqu	192(%rdi), %xmm14
+	xorl	%eax, %eax
+//	cmpl	$8*16, %r9d
+	cmpl	$128, %r9d
+	jb	1f
+//	leal	-8*16(%r9), %r11d
+	leal	-128(%r9), %r11d
+2:	movdqu	(%r8, %rax), %xmm3
+	movdqu	16(%r8, %rax), %xmm4
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm14, %xmm3
+	pxor	%xmm14, %xmm4
+	pxor	%xmm14, %xmm5
+	pxor	%xmm14, %xmm6
+	pxor	%xmm14, %xmm7
+	pxor	%xmm14, %xmm8
+	pxor	%xmm14, %xmm9
+	pxor	%xmm14, %xmm10
+
+// complete loop unrolling
+	movdqu 176(%rdi), %xmm1
+	movdqu 160(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 144(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 16(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x0f,0x38,0xdf,0xda		/* aesdeclast 	%xmm2, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdf,0xe2		/* aesdeclast 	%xmm2, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdf,0xea		/* aesdeclast 	%xmm2, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdf,0xf2		/* aesdeclast 	%xmm2, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdf,0xfa		/* aesdeclast 	%xmm2, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2	/* aesdeclast 	%xmm2, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xca	/* aesdeclast 	%xmm2, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2	/* aesdeclast 	%xmm2, %xmm10 */
+
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+//	addl	$8*16, %eax
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	16(%rdi), %xmm3
+	movdqu	32(%rdi), %xmm4
+	movdqu	48(%rdi), %xmm5
+	movdqu	64(%rdi), %xmm6
+	movdqu	80(%rdi), %xmm7
+	movdqu	96(%rdi), %xmm8
+	movdqu	112(%rdi), %xmm9
+	movdqu	128(%rdi), %xmm10
+	movdqu	144(%rdi), %xmm11
+	movdqu	160(%rdi), %xmm12
+	movdqu	176(%rdi), %xmm13
+
+4:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm14, %xmm1
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm13, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm12, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xca	/* aesdec	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc8	/* aesdec	%xmm8, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcf	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xce	/* aesdec	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdf,0xca	/* aesdeclast %xmm2, %xmm1 */
+	movdqu	%xmm1, (%rsi, %rax)
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	xor	%eax, %eax
+	ret
+	.size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_encrypt_cbc_192,@function
+	.globl intel_aes_encrypt_cbc_192
+	.align	16
+intel_aes_encrypt_cbc_192:
+	testl	%r9d, %r9d
+	je	2f
+
+//	leaq	IV_OFFSET(%rdi), %rdx
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	16(%rdi), %rdx
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdx), %xmm0
+	movdqu	(%rdi), %xmm2
+	movdqu	16(%rdi), %xmm3
+	movdqu	32(%rdi), %xmm4
+	movdqu	48(%rdi), %xmm5
+	movdqu	64(%rdi), %xmm6
+	movdqu	80(%rdi), %xmm7
+	movdqu	96(%rdi), %xmm8
+	movdqu	112(%rdi), %xmm9
+	movdqu	128(%rdi), %xmm10
+	movdqu	144(%rdi), %xmm11
+	movdqu	160(%rdi), %xmm12
+	movdqu	176(%rdi), %xmm13
+	movdqu	192(%rdi), %xmm14
+
+	xorl	%eax, %eax
+1:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm0, %xmm1
+	pxor	%xmm2, %xmm1
+	.byte 0x66,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xce	/* aesenc	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcf	/* aesenc	%xmm7, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8	/* aesenc	%xmm8, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xca	/* aesenc	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm12, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm13, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xce	/* aesenclast %xmm14, %xmm1 */
+	movdqu	%xmm1, (%rsi, %rax)
+	movdqa	%xmm1, %xmm0
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	1b
+
+	movdqu	%xmm0, (%rdx)
+
+2:	xor	%eax, %eax
+	ret
+	.size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %exx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_decrypt_cbc_192,@function
+	.globl intel_aes_decrypt_cbc_192
+	.align	16
+intel_aes_decrypt_cbc_192:
+	leaq	16(%rdi), %rdx
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdx), %xmm0
+	movdqu	(%rdi), %xmm2
+	movdqu	192(%rdi), %xmm14
+	xorl	%eax, %eax
+	cmpl	$128, %r9d
+	jb	1f
+	leal	-128(%r9), %r11d
+2:	movdqu	(%r8, %rax), %xmm3
+	movdqu	16(%r8, %rax), %xmm4
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm14, %xmm3
+	pxor	%xmm14, %xmm4
+	pxor	%xmm14, %xmm5
+	pxor	%xmm14, %xmm6
+	pxor	%xmm14, %xmm7
+	pxor	%xmm14, %xmm8
+	pxor	%xmm14, %xmm9
+	pxor	%xmm14, %xmm10
+
+// complete loop unrolling
+	movdqu 176(%rdi), %xmm1
+	movdqu 160(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 144(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 16(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x0f,0x38,0xdf,0xda		/* aesdeclast 	%xmm2, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdf,0xe2		/* aesdeclast 	%xmm2, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdf,0xea		/* aesdeclast 	%xmm2, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdf,0xf2		/* aesdeclast 	%xmm2, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdf,0xfa		/* aesdeclast 	%xmm2, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2	/* aesdeclast 	%xmm2, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xca	/* aesdeclast 	%xmm2, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2	/* aesdeclast 	%xmm2, %xmm10 */
+
+ 	pxor	%xmm0, %xmm3
+	movdqu	(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm4
+	movdqu	16(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm5
+	movdqu	32(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm6
+	movdqu	48(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm7
+	movdqu	64(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm8
+	movdqu	80(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm9
+	movdqu	96(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm10
+	movdqu	112(%r8, %rax), %xmm0
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	16(%rdi), %xmm3
+	movdqu	32(%rdi), %xmm4
+	movdqu	48(%rdi), %xmm5
+	movdqu	64(%rdi), %xmm6
+	movdqu	80(%rdi), %xmm7
+	movdqu	96(%rdi), %xmm8
+	movdqu	112(%rdi), %xmm9
+	movdqu	128(%rdi), %xmm10
+	movdqu	144(%rdi), %xmm11
+	movdqu	160(%rdi), %xmm12
+	movdqu	176(%rdi), %xmm13
+
+4:	movdqu	(%r8, %rax), %xmm1
+	movdqa	%xmm1, %xmm15
+	pxor	%xmm14, %xmm1
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm13, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm12, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xca	/* aesdec	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc8	/* aesdec	%xmm8, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcf	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xce	/* aesdec	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdf,0xca	/* aesdeclast %xmm2, %xmm1 */
+	pxor	%xmm0, %xmm1
+	movdqu	%xmm1, (%rsi, %rax)
+	movdqa	%xmm15, %xmm0
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	movdqu	%xmm0, (%rdx)
+
+	xor	%eax, %eax
+	ret
+	.size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
+
+/* in %rdi : the key
+   in %rsi : buffer for expanded key
+*/
+	.type intel_aes_encrypt_init_256,@function
+	.globl intel_aes_encrypt_init_256
+	.align	16
+intel_aes_encrypt_init_256:
+	movdqu	(%rdi), %xmm1
+	movdqu	16(%rdi), %xmm3
+	movdqu	%xmm1, (%rsi)
+	movdqu	%xmm3, 16(%rsi)
+	leaq	32(%rsi), %rsi
+	xor	%eax, %eax
+
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01	/* aeskeygenassist $0x01, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02	/* aeskeygenassist $0x02, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04	/* aeskeygenassist $0x04, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08	/* aeskeygenassist $0x08, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10	/* aeskeygenassist $0x10, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20	/* aeskeygenassist $0x20, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40	/* aeskeygenassist $0x40, %xmm3, %xmm2 */
+	pxor	%xmm6, %xmm6
+	pshufd	$0xff, %xmm2, %xmm2
+	shufps	$0x10, %xmm1, %xmm6
+	pxor	%xmm6, %xmm1
+	shufps	$0x8c, %xmm1, %xmm6
+	pxor	%xmm2, %xmm1
+	pxor	%xmm6, %xmm1
+	movdqu	%xmm1, (%rsi)
+
+	ret
+	.size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
+
+
+/* in %rdi : the key
+   in %rsi : buffer for expanded key
+*/
+	.type intel_aes_decrypt_init_256,@function
+	.globl intel_aes_decrypt_init_256
+	.align	16
+intel_aes_decrypt_init_256:
+	movdqu	(%rdi), %xmm1
+	movdqu	16(%rdi), %xmm3
+	movdqu	%xmm1, (%rsi)
+	.byte 0x66,0x0f,0x38,0xdb,0xe3	/* aesimc	%xmm3, %xmm4 */
+	movdqu	%xmm4, 16(%rsi)
+	leaq	32(%rsi), %rsi
+	xor	%eax, %eax
+
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01	/* aeskeygenassist $0x01, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x38,0xdb,0xe1	/* aesimc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdb,0xeb	/* aesimc	%xmm3, %xmm5 */
+	movdqu	%xmm4, -32(%rsi)
+	movdqu	%xmm5, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02	/* aeskeygenassist $0x02, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x38,0xdb,0xe1	/* aesimc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdb,0xeb	/* aesimc	%xmm3, %xmm5 */
+	movdqu	%xmm4, -32(%rsi)
+	movdqu	%xmm5, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04	/* aeskeygenassist $0x04, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x38,0xdb,0xe1	/* aesimc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdb,0xeb	/* aesimc	%xmm3, %xmm5 */
+	movdqu	%xmm4, -32(%rsi)
+	movdqu	%xmm5, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08	/* aeskeygenassist $0x08, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x38,0xdb,0xe1	/* aesimc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdb,0xeb	/* aesimc	%xmm3, %xmm5 */
+	movdqu	%xmm4, -32(%rsi)
+	movdqu	%xmm5, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10	/* aeskeygenassist $0x10, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x38,0xdb,0xe1	/* aesimc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdb,0xeb	/* aesimc	%xmm3, %xmm5 */
+	movdqu	%xmm4, -32(%rsi)
+	movdqu	%xmm5, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20	/* aeskeygenassist $0x20, %xmm3, %xmm2 */
+	call key_expansion256
+	.byte 0x66,0x0f,0x38,0xdb,0xe1	/* aesimc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdb,0xeb	/* aesimc	%xmm3, %xmm5 */
+	movdqu	%xmm4, -32(%rsi)
+	movdqu	%xmm5, -16(%rsi)
+	.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40	/* aeskeygenassist $0x40, %xmm3, %xmm2 */
+	pxor	%xmm6, %xmm6
+	pshufd	$0xff, %xmm2, %xmm2
+	shufps	$0x10, %xmm1, %xmm6
+	pxor	%xmm6, %xmm1
+	shufps	$0x8c, %xmm1, %xmm6
+	pxor	%xmm2, %xmm1
+	pxor	%xmm6, %xmm1
+	movdqu	%xmm1, (%rsi)
+
+	ret
+	.size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
+
+
+	.type key_expansion256,@function
+	.align	16
+key_expansion256:
+	movd	%eax, %xmm6
+	pshufd	$0xff, %xmm2, %xmm2
+	shufps	$0x10, %xmm1, %xmm6
+	pxor	%xmm6, %xmm1
+	shufps	$0x8c, %xmm1, %xmm6
+	pxor	%xmm2, %xmm1
+	pxor	%xmm6, %xmm1
+	movdqu	%xmm1, (%rsi)
+
+	addq	$16, %rsi
+	.byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00	/* aeskeygenassist $0, %xmm1, %xmm4 */
+	pshufd	$0xaa, %xmm4, %xmm4
+	shufps	$0x10, %xmm3, %xmm6
+	pxor	%xmm6, %xmm3
+	shufps	$0x8c, %xmm3, %xmm6
+	pxor	%xmm4, %xmm3
+	pxor	%xmm6, %xmm3
+	movdqu	%xmm3, (%rsi)
+	addq	$16, %rsi
+	ret
+	.size key_expansion256, .-key_expansion256
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_encrypt_ecb_256,@function
+	.globl intel_aes_encrypt_ecb_256
+	.align	16
+intel_aes_encrypt_ecb_256:
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdi), %xmm2
+	movdqu	224(%rdi), %xmm15
+	xorl	%eax, %eax
+//	cmpl	$8*16, %r9d
+	cmpl	$128, %r9d
+	jb	1f
+//	leal	-8*16(%r9), %r11d
+	leal	-128(%r9), %r11d
+2:	movdqu	(%r8, %rax), %xmm3
+	movdqu	16(%r8, %rax), %xmm4
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm2, %xmm3
+	pxor	%xmm2, %xmm4
+	pxor	%xmm2, %xmm5
+	pxor	%xmm2, %xmm6
+	pxor	%xmm2, %xmm7
+	pxor	%xmm2, %xmm8
+	pxor	%xmm2, %xmm9
+	pxor	%xmm2, %xmm10
+
+// complete loop unrolling
+	movdqu 16(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 144(%rdi), %xmm1
+	movdqu 160(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 176(%rdi), %xmm1
+	movdqu 192(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb	/* aesenc	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3	/* aesenc	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb	/* aesenc	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3	/* aesenc	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb	/* aesenc	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3	/* aesenc	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3	/* aesenc	%xmm11, %xmm10 */
+
+	movdqu 208(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xdc,0xd9		/* aesenc	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe1		/* aesenc	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdc,0xe9		/* aesenc	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf1		/* aesenc	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdc,0xf9		/* aesenc	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1	/* aesenc	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1	/* aesenc	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xdf	/* aesenclast 	%xmm15, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xe7	/* aesenclast 	%xmm15, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xef	/* aesenclast 	%xmm15, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xf7	/* aesenclast 	%xmm15, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xff	/* aesenclast 	%xmm15, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xc7	/* aesenclast 	%xmm15, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xcf	/* aesenclast 	%xmm15, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xdd,0xd7	/* aesenclast 	%xmm15, %xmm10 */
+
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+//	addl	$8*16, %eax
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	(%rdi), %xmm8
+	movdqu	16(%rdi), %xmm2
+	movdqu	32(%rdi), %xmm3
+	movdqu	48(%rdi), %xmm4
+	movdqu	64(%rdi), %xmm5
+	movdqu	80(%rdi), %xmm6
+	movdqu	96(%rdi), %xmm7
+	movdqu	128(%rdi), %xmm9
+	movdqu	144(%rdi), %xmm10
+	movdqu	160(%rdi), %xmm11
+	movdqu	176(%rdi), %xmm12
+	movdqu	192(%rdi), %xmm13
+	movdqu	208(%rdi), %xmm14
+
+4:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm8, %xmm1
+	movdqu	112(%rdi), %xmm8
+	.byte 0x66,0x0f,0x38,0xdc,0xca	/* aesenc	%xmm2, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xce	/* aesenc	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcf	/* aesenc	%xmm7, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8	/* aesenc	%xmm8, %xmm1 */
+	movdqu	(%rdi), %xmm8
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xca	/* aesenc	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm12, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm13, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xce	/* aesenc	%xmm14, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf	/* aesenclast %xmm15, %xmm1 */
+	movdqu	%xmm1, (%rsi, %rax)
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	xor	%eax, %eax
+	ret
+	.size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_decrypt_ecb_256,@function
+	.globl intel_aes_decrypt_ecb_256
+	.align	16
+intel_aes_decrypt_ecb_256:
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdi), %xmm2
+	movdqu	224(%rdi), %xmm15
+	xorl	%eax, %eax
+//	cmpl	$8*16, %r9d
+	cmpl	$128, %r9d
+	jb	1f
+//	leal	-8*16(%r9), %r11d
+	leal	-128(%r9), %r11d
+2:	movdqu	(%r8, %rax), %xmm3
+	movdqu	16(%r8, %rax), %xmm4
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm15, %xmm3
+	pxor	%xmm15, %xmm4
+	pxor	%xmm15, %xmm5
+	pxor	%xmm15, %xmm6
+	pxor	%xmm15, %xmm7
+	pxor	%xmm15, %xmm8
+	pxor	%xmm15, %xmm9
+	pxor	%xmm15, %xmm10
+
+// complete loop unrolling
+	movdqu 208(%rdi), %xmm1
+	movdqu 192(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 176(%rdi), %xmm1
+	movdqu 160(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 144(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 16(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x0f,0x38,0xdf,0xda		/* aesdeclast 	%xmm2, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdf,0xe2		/* aesdeclast 	%xmm2, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdf,0xea		/* aesdeclast 	%xmm2, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdf,0xf2		/* aesdeclast 	%xmm2, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdf,0xfa		/* aesdeclast 	%xmm2, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2	/* aesdeclast 	%xmm2, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xca	/* aesdeclast 	%xmm2, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2	/* aesdeclast 	%xmm2, %xmm10 */
+
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+//	addl	$8*16, %eax
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	16(%rdi), %xmm2
+	movdqu	32(%rdi), %xmm3
+	movdqu	48(%rdi), %xmm4
+	movdqu	64(%rdi), %xmm5
+	movdqu	80(%rdi), %xmm6
+	movdqu	96(%rdi), %xmm7
+	movdqu	112(%rdi), %xmm8
+	movdqu	128(%rdi), %xmm9
+	movdqu	144(%rdi), %xmm10
+	movdqu	160(%rdi), %xmm11
+	movdqu	176(%rdi), %xmm12
+	movdqu	192(%rdi), %xmm13
+	movdqu	208(%rdi), %xmm14
+
+4:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm15, %xmm1
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xce	/* aesdec	%xmm14, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm13, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm12, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xca	/* aesdec	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc8	/* aesdec	%xmm8, %xmm1 */
+	movdqu	(%rdi), %xmm8
+	.byte 0x66,0x0f,0x38,0xde,0xcf	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xce	/* aesdec	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xca	/* aesdec	%xmm2, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdf,0xc8	/* aesdeclast %xmm8, %xmm1 */
+	movdqu	112(%rdi), %xmm8
+	movdqu	%xmm1, (%rsi, %rax)
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	xor	%eax, %eax
+	ret
+	.size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_encrypt_cbc_256,@function
+	.globl intel_aes_encrypt_cbc_256
+	.align	16
+intel_aes_encrypt_cbc_256:
+	testl	%r9d, %r9d
+	je	2f
+
+//	leaq	IV_OFFSET(%rdi), %rdx
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	16(%rdi), %rdx
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdx), %xmm0
+	movdqu	(%rdi), %xmm8
+	movdqu	16(%rdi), %xmm2
+	movdqu	32(%rdi), %xmm3
+	movdqu	48(%rdi), %xmm4
+	movdqu	64(%rdi), %xmm5
+	movdqu	80(%rdi), %xmm6
+	movdqu	96(%rdi), %xmm7
+	movdqu	128(%rdi), %xmm9
+	movdqu	144(%rdi), %xmm10
+	movdqu	160(%rdi), %xmm11
+	movdqu	176(%rdi), %xmm12
+	movdqu	192(%rdi), %xmm13
+	movdqu	208(%rdi), %xmm14
+	movdqu	224(%rdi), %xmm15
+
+	xorl	%eax, %eax
+1:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm0, %xmm1
+	pxor	%xmm8, %xmm1
+	movdqu	112(%rdi), %xmm8
+	.byte 0x66,0x0f,0x38,0xdc,0xca	/* aesenc	%xmm2, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xce	/* aesenc	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xdc,0xcf	/* aesenc	%xmm7, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8	/* aesenc	%xmm8, %xmm1 */
+	movdqu	(%rdi), %xmm8
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9	/* aesenc	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xca	/* aesenc	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb	/* aesenc	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc	/* aesenc	%xmm12, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd	/* aesenc	%xmm13, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdc,0xce	/* aesenc	%xmm14, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf	/* aesenclast %xmm15, %xmm1 */
+	movdqu	%xmm1, (%rsi, %rax)
+	movdqa	%xmm1, %xmm0
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	1b
+
+	movdqu	%xmm0, (%rdx)
+
+2:	xor	%eax, %eax
+	ret
+	.size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
+
+
+/* in %rdi : cx - context
+   in %rsi : output - pointer to output buffer
+   in %rdx : outputLen - pointer to variable for length of output
+             (already filled in by caller)
+   in %ecx : maxOutputLen - length of output buffer
+             (already checked by caller)
+   in %r8  : input - pointer to input buffer
+   in %r9d : inputLen - length of input buffer
+   on stack: blocksize - AES blocksize (always 16, unused)
+*/
+	.type intel_aes_decrypt_cbc_256,@function
+	.globl intel_aes_decrypt_cbc_256
+	.align	16
+intel_aes_decrypt_cbc_256:
+//	leaq	IV_OFFSET(%rdi), %rdx
+//	leaq	EXPANDED_KEY_OFFSET(%rdi), %rdi
+	leaq	16(%rdi), %rdx
+	leaq	48(%rdi), %rdi
+
+	movdqu	(%rdx), %xmm0
+	movdqu	(%rdi), %xmm2
+	movdqu	224(%rdi), %xmm15
+	xorl	%eax, %eax
+//	cmpl	$8*16, %r9d
+	cmpl	$128, %r9d
+	jb	1f
+//	leal	-8*16(%r9), %r11d
+	leal	-128(%r9), %r11d
+2:	movdqu  (%r8, %rax), %xmm3
+	movdqu	16(%r8, %rax), %xmm4
+	movdqu	32(%r8, %rax), %xmm5
+	movdqu	48(%r8, %rax), %xmm6
+	movdqu	64(%r8, %rax), %xmm7
+	movdqu	80(%r8, %rax), %xmm8
+	movdqu	96(%r8, %rax), %xmm9
+	movdqu	112(%r8, %rax), %xmm10
+	pxor	%xmm15, %xmm3
+	pxor	%xmm15, %xmm4
+	pxor	%xmm15, %xmm5
+	pxor	%xmm15, %xmm6
+	pxor	%xmm15, %xmm7
+	pxor	%xmm15, %xmm8
+	pxor	%xmm15, %xmm9
+	pxor	%xmm15, %xmm10
+
+// complete loop unrolling
+	movdqu 208(%rdi), %xmm1
+	movdqu 192(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 176(%rdi), %xmm1
+	movdqu 160(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 144(%rdi), %xmm1
+	movdqu 128(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 112(%rdi), %xmm1
+	movdqu 96(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 80(%rdi), %xmm1
+	movdqu 64(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 48(%rdi), %xmm1
+	movdqu 32(%rdi), %xmm11
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xdb	/* aesdec	%xmm11, %xmm3 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xe3	/* aesdec	%xmm11, %xmm4 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xeb	/* aesdec	%xmm11, %xmm5 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xf3	/* aesdec	%xmm11, %xmm6 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xfb	/* aesdec	%xmm11, %xmm7 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xc3	/* aesdec	%xmm11, %xmm8 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm9 */
+	.byte 0x66,0x45,0x0f,0x38,0xde,0xd3	/* aesdec	%xmm11, %xmm10 */
+
+	movdqu 16(%rdi), %xmm1
+	.byte 0x66,0x0f,0x38,0xde,0xd9		/* aesdec	%xmm1, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xde,0xe1		/* aesdec	%xmm1, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xde,0xe9		/* aesdec	%xmm1, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xde,0xf1		/* aesdec	%xmm1, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xde,0xf9		/* aesdec	%xmm1, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc1	/* aesdec	%xmm1, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm1, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xde,0xd1	/* aesdec	%xmm1, %xmm10 */
+	.byte 0x66,0x0f,0x38,0xdf,0xda		/* aesdeclast 	%xmm2, %xmm3 */
+	.byte 0x66,0x0f,0x38,0xdf,0xe2		/* aesdeclast 	%xmm2, %xmm4 */
+	.byte 0x66,0x0f,0x38,0xdf,0xea		/* aesdeclast 	%xmm2, %xmm5 */
+	.byte 0x66,0x0f,0x38,0xdf,0xf2		/* aesdeclast 	%xmm2, %xmm6 */
+	.byte 0x66,0x0f,0x38,0xdf,0xfa		/* aesdeclast 	%xmm2, %xmm7 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2	/* aesdeclast 	%xmm2, %xmm8 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xca	/* aesdeclast 	%xmm2, %xmm9 */
+	.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2	/* aesdeclast 	%xmm2, %xmm10 */
+
+ 	pxor	%xmm0, %xmm3
+	movdqu	(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm4
+	movdqu	16(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm5
+	movdqu	32(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm6
+	movdqu	48(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm7
+	movdqu	64(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm8
+	movdqu	80(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm9
+	movdqu	96(%r8, %rax), %xmm0
+	pxor	%xmm0, %xmm10
+	movdqu	112(%r8, %rax), %xmm0
+	movdqu	%xmm3, (%rsi, %rax)
+	movdqu	%xmm4, 16(%rsi, %rax)
+	movdqu	%xmm5, 32(%rsi, %rax)
+	movdqu	%xmm6, 48(%rsi, %rax)
+	movdqu	%xmm7, 64(%rsi, %rax)
+	movdqu	%xmm8, 80(%rsi, %rax)
+	movdqu	%xmm9, 96(%rsi, %rax)
+	movdqu	%xmm10, 112(%rsi, %rax)
+//	addl	$8*16, %eax
+	addl	$128, %eax
+	cmpl	%r11d, %eax
+	jbe	2b
+1:	cmpl	%eax, %r9d
+	je	5f
+
+	movdqu	16(%rdi), %xmm2
+	movdqu	32(%rdi), %xmm3
+	movdqu	48(%rdi), %xmm4
+	movdqu	64(%rdi), %xmm5
+	movdqu	80(%rdi), %xmm6
+	movdqu	96(%rdi), %xmm7
+	movdqu	112(%rdi), %xmm8
+	movdqu	128(%rdi), %xmm9
+	movdqu	144(%rdi), %xmm10
+	movdqu	160(%rdi), %xmm11
+	movdqu	176(%rdi), %xmm12
+	movdqu	192(%rdi), %xmm13
+	movdqu	208(%rdi), %xmm14
+
+4:	movdqu	(%r8, %rax), %xmm1
+	pxor	%xmm15, %xmm1
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xce	/* aesdec	%xmm14, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm13, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm12, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm11, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xca	/* aesdec	%xmm10, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc9	/* aesdec	%xmm9, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xde,0xc8	/* aesdec	%xmm8, %xmm1 */
+	movdqu	(%rdi), %xmm8
+	.byte 0x66,0x0f,0x38,0xde,0xcf	/* aesdec	%xmm7, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xce	/* aesdec	%xmm6, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcd	/* aesdec	%xmm5, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcc	/* aesdec	%xmm4, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xcb	/* aesdec	%xmm3, %xmm1 */
+	.byte 0x66,0x0f,0x38,0xde,0xca	/* aesdec	%xmm2, %xmm1 */
+	.byte 0x66,0x41,0x0f,0x38,0xdf,0xc8	/* aesdeclast %xmm8, %xmm1 */
+	movdqu	112(%rdi), %xmm8
+	pxor	%xmm0, %xmm1
+	movdqu	(%r8, %rax), %xmm0  /* fetch the IV before we store the block */
+	movdqu	%xmm1, (%rsi, %rax) /* in case input buf = output buf */
+	addl	$16, %eax
+	cmpl	%eax, %r9d
+	jne	4b
+
+5:	movdqu	%xmm0, (%rdx)
+
+	xor	%eax, %eax
+	ret
+	.size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256
diff --git a/security/nss/lib/freebl/intel-gcm-wrap.c b/security/nss/lib/freebl/intel-gcm-wrap.c
new file mode 100644
index 000000000..8c5eaf021
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-wrap.c
@@ -0,0 +1,254 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* Copyright(c) 2013, Intel Corp. */
+
+/* Wrapper functions for Intel optimized implementation of AES-GCM */
+
+#ifdef USE_HW_AES
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapii.h"
+#include "blapit.h"
+#include "gcm.h"
+#include "ctr.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "pkcs11t.h"
+
+#include <limits.h>
+
+#include "intel-gcm.h"
+#include "rijndael.h"
+
+#include <emmintrin.h>
+#include <tmmintrin.h>
+
+struct intel_AES_GCMContextStr {
+    unsigned char Htbl[16 * AES_BLOCK_SIZE];
+    unsigned char X0[AES_BLOCK_SIZE];
+    unsigned char T[AES_BLOCK_SIZE];
+    unsigned char CTR[AES_BLOCK_SIZE];
+    AESContext *aes_context;
+    unsigned long tagBits;
+    unsigned long Alen;
+    unsigned long Mlen;
+};
+
+intel_AES_GCMContext *
+intel_AES_GCM_CreateContext(void *context,
+                            freeblCipherFunc cipher,
+                            const unsigned char *params,
+                            unsigned int blocksize)
+{
+    intel_AES_GCMContext *gcm = NULL;
+    AESContext *aes = (AESContext *)context;
+    const CK_GCM_PARAMS *gcmParams = (const CK_GCM_PARAMS *)params;
+    unsigned char buff[AES_BLOCK_SIZE]; /* aux buffer */
+
+    unsigned long IV_whole_len = gcmParams->ulIvLen & (~0xful);
+    unsigned int IV_remainder_len = gcmParams->ulIvLen & 0xful;
+    unsigned long AAD_whole_len = gcmParams->ulAADLen & (~0xful);
+    unsigned int AAD_remainder_len = gcmParams->ulAADLen & 0xful;
+
+    __m128i BSWAP_MASK = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+    __m128i ONE = _mm_set_epi32(0, 0, 0, 1);
+    unsigned int j;
+    SECStatus rv;
+
+    if (blocksize != AES_BLOCK_SIZE) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return NULL;
+    }
+    gcm = PORT_ZNew(intel_AES_GCMContext);
+
+    if (gcm == NULL) {
+        return NULL;
+    }
+
+    /* initialize context fields */
+    gcm->aes_context = aes;
+    gcm->tagBits = gcmParams->ulTagBits;
+    gcm->Alen = 0;
+    gcm->Mlen = 0;
+
+    /* first prepare H and its derivatives for ghash */
+    intel_aes_gcmINIT(gcm->Htbl, (unsigned char *)aes->expandedKey, aes->Nr);
+
+    /* Initial TAG value is zero */
+    _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128());
+    _mm_storeu_si128((__m128i *)gcm->X0, _mm_setzero_si128());
+
+    /* Init the counter */
+    if (gcmParams->ulIvLen == 12) {
+        _mm_storeu_si128((__m128i *)gcm->CTR,
+                         _mm_setr_epi32(((unsigned int *)gcmParams->pIv)[0],
+                                        ((unsigned int *)gcmParams->pIv)[1],
+                                        ((unsigned int *)gcmParams->pIv)[2],
+                                        0x01000000));
+    } else {
+        /* If IV size is not 96 bits, then the initial counter value is GHASH
+         * of the IV */
+        intel_aes_gcmAAD(gcm->Htbl, gcmParams->pIv, IV_whole_len, gcm->T);
+
+        /* Partial block */
+        if (IV_remainder_len) {
+            PORT_Memset(buff, 0, AES_BLOCK_SIZE);
+            PORT_Memcpy(buff, gcmParams->pIv + IV_whole_len, IV_remainder_len);
+            intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T);
+        }
+
+        intel_aes_gcmTAG(
+            gcm->Htbl,
+            gcm->T,
+            gcmParams->ulIvLen,
+            0,
+            gcm->X0,
+            gcm->CTR);
+
+        /* TAG should be zero again */
+        _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128());
+    }
+
+    /* Encrypt the initial counter, will be used to encrypt the GHASH value,
+     * in the end */
+    rv = (*cipher)(context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR,
+                   AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+
+    /* Promote the counter by 1 */
+    _mm_storeu_si128((__m128i *)gcm->CTR, _mm_shuffle_epi8(_mm_add_epi32(ONE, _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)gcm->CTR), BSWAP_MASK)), BSWAP_MASK));
+
+    /* Now hash AAD - it would actually make sense to seperate the context
+     * creation from the AAD, because that would allow to reuse the H, which
+     * only changes when the AES key changes, and not every package, like the
+     * IV and AAD */
+    intel_aes_gcmAAD(gcm->Htbl, gcmParams->pAAD, AAD_whole_len, gcm->T);
+    if (AAD_remainder_len) {
+        PORT_Memset(buff, 0, AES_BLOCK_SIZE);
+        PORT_Memcpy(buff, gcmParams->pAAD + AAD_whole_len, AAD_remainder_len);
+        intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T);
+    }
+    gcm->Alen += gcmParams->ulAADLen;
+    return gcm;
+
+loser:
+    PORT_Free(gcm);
+    return NULL;
+}
+
+void
+intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit)
+{
+    if (freeit) {
+        PORT_Free(gcm);
+    }
+}
+
+SECStatus
+intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm,
+                            unsigned char *outbuf,
+                            unsigned int *outlen, unsigned int maxout,
+                            const unsigned char *inbuf, unsigned int inlen,
+                            unsigned int blocksize)
+{
+    unsigned int tagBytes;
+    unsigned char T[AES_BLOCK_SIZE];
+    unsigned int j;
+
+    tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+    if (UINT_MAX - inlen < tagBytes) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    if (maxout < inlen + tagBytes) {
+        *outlen = inlen + tagBytes;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    intel_aes_gcmENC(
+        inbuf,
+        outbuf,
+        gcm,
+        inlen);
+
+    gcm->Mlen += inlen;
+
+    intel_aes_gcmTAG(
+        gcm->Htbl,
+        gcm->T,
+        gcm->Mlen,
+        gcm->Alen,
+        gcm->X0,
+        T);
+
+    *outlen = inlen + tagBytes;
+
+    for (j = 0; j < tagBytes; j++) {
+        outbuf[inlen + j] = T[j];
+    }
+    return SECSuccess;
+}
+
+SECStatus
+intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm,
+                            unsigned char *outbuf,
+                            unsigned int *outlen, unsigned int maxout,
+                            const unsigned char *inbuf, unsigned int inlen,
+                            unsigned int blocksize)
+{
+    unsigned int tagBytes;
+    unsigned char T[AES_BLOCK_SIZE];
+    const unsigned char *intag;
+
+    tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+    /* get the authentication block */
+    if (inlen < tagBytes) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+
+    inlen -= tagBytes;
+    intag = inbuf + inlen;
+
+    if (maxout < inlen) {
+        *outlen = inlen;
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    intel_aes_gcmDEC(
+        inbuf,
+        outbuf,
+        gcm,
+        inlen);
+
+    gcm->Mlen += inlen;
+    intel_aes_gcmTAG(
+        gcm->Htbl,
+        gcm->T,
+        gcm->Mlen,
+        gcm->Alen,
+        gcm->X0,
+        T);
+
+    if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) {
+        memset(outbuf, 0, inlen);
+        *outlen = 0;
+        /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+        PORT_SetError(SEC_ERROR_BAD_DATA);
+        return SECFailure;
+    }
+    *outlen = inlen;
+
+    return SECSuccess;
+}
+
+#endif
diff --git a/security/nss/lib/freebl/intel-gcm-x64-masm.asm b/security/nss/lib/freebl/intel-gcm-x64-masm.asm
new file mode 100644
index 000000000..8b68b76e5
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-x64-masm.asm
@@ -0,0 +1,1295 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.DATA
+ALIGN 16
+Lone            dq 1,0
+Ltwo            dq 2,0
+Lbswap_mask     db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+Lshuff_mask     dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh
+Lpoly           dq 01h, 0c200000000000000h
+
+.CODE
+
+
+GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4
+    vpclmulqdq  TMP1, SRC2, SRC1, 0h
+    vpclmulqdq  TMP4, SRC2, SRC1, 011h
+
+    vpshufd     TMP2, SRC2, 78
+    vpshufd     TMP3, SRC1, 78
+    vpxor       TMP2, TMP2, SRC2
+    vpxor       TMP3, TMP3, SRC1
+
+    vpclmulqdq  TMP2, TMP2, TMP3, 0h
+    vpxor       TMP2, TMP2, TMP1
+    vpxor       TMP2, TMP2, TMP4
+
+    vpslldq     TMP3, TMP2, 8
+    vpsrldq     TMP2, TMP2, 8
+
+    vpxor       TMP1, TMP1, TMP3
+    vpxor       TMP4, TMP4, TMP2
+
+    vpclmulqdq  TMP2, TMP1, [Lpoly], 010h
+    vpshufd     TMP3, TMP1, 78
+    vpxor       TMP1, TMP2, TMP3
+
+    vpclmulqdq  TMP2, TMP1, [Lpoly], 010h
+    vpshufd     TMP3, TMP1, 78
+    vpxor       TMP1, TMP2, TMP3
+
+    vpxor       DST, TMP1, TMP4
+
+    ENDM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the final GCM tag
+; void intel_aes_gcmTAG(unsigned char Htbl[16*16],
+;                       unsigned char *Tp,
+;                       unsigned int Mlen,
+;                       unsigned int Alen,
+;                       unsigned char *X0,
+;                       unsigned char *TAG);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmTAG PROC
+
+Htbl    textequ <rcx>
+Tp      textequ <rdx>
+Mlen    textequ <r8>
+Alen    textequ <r9>
+X0      textequ <r10>
+TAG     textequ <r11>
+
+T       textequ <xmm0>
+TMP0    textequ <xmm1>
+
+    mov     X0, [rsp + 1*8 + 4*8]
+    mov     TAG, [rsp + 1*8 + 5*8]
+
+    vzeroupper
+    vmovdqu T, XMMWORD PTR[Tp]
+    vpxor   TMP0, TMP0, TMP0
+
+    shl     Mlen, 3
+    shl     Alen, 3
+
+    ;vpinsrq    TMP0, TMP0, Mlen, 0
+    ;vpinsrq    TMP0, TMP0, Alen, 1
+    ; workaround the ml64.exe vpinsrq issue
+    vpinsrd TMP0, TMP0, r8d, 0
+    vpinsrd TMP0, TMP0, r9d, 2
+    shr Mlen, 32
+    shr Alen, 32
+    vpinsrd TMP0, TMP0, r8d, 1
+    vpinsrd TMP0, TMP0, r9d, 3
+
+    vpxor   T, T, TMP0
+    vmovdqu TMP0, XMMWORD PTR[Htbl]
+    GFMUL   T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+
+    vpshufb T, T, [Lbswap_mask]
+    vpxor   T, T, [X0]
+    vmovdqu XMMWORD PTR[TAG], T
+    vzeroupper
+
+    ret
+
+intel_aes_gcmTAG ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the H table
+; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmINIT PROC
+
+Htbl    textequ <rcx>
+KS      textequ <rdx>
+NR      textequ <r8d>
+
+T       textequ <xmm0>
+TMP0    textequ <xmm1>
+
+    vzeroupper
+    ; AES-ENC(0)
+    vmovdqu T, XMMWORD PTR[KS]
+    lea KS, [16 + KS]
+    dec NR
+Lenc_loop:
+        vaesenc T, T, [KS]
+        lea KS, [16 + KS]
+        dec NR
+        jnz Lenc_loop
+
+    vaesenclast T, T, [KS]
+    vpshufb T, T, [Lbswap_mask]
+
+    ;Calculate H` = GFMUL(H, 2)
+    vpsrad  xmm3, T, 31
+    vpshufd xmm3, xmm3, 0ffh
+    vpand   xmm5, xmm3, [Lpoly]
+    vpsrld  xmm3, T, 31
+    vpslld  xmm4, T, 1
+    vpslldq xmm3, xmm3, 4
+    vpxor   T, xmm4, xmm3
+    vpxor   T, T, xmm5
+
+    vmovdqu TMP0, T
+    vmovdqu XMMWORD PTR[Htbl + 0*16], T
+
+    vpshufd xmm2, T, 78
+    vpxor   xmm2, xmm2, T
+    vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2
+
+    i = 1
+    WHILE i LT 8
+        GFMUL   T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+        vmovdqu XMMWORD PTR[Htbl + i*16], T
+        vpshufd xmm2, T, 78
+        vpxor   xmm2, xmm2, T
+        vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2
+        i = i+1
+        ENDM
+    vzeroupper
+    ret
+intel_aes_gcmINIT ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Authenticate only
+; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmAAD PROC
+
+Htbl    textequ <rcx>
+inp     textequ <rdx>
+len     textequ <r8>
+Tp      textequ <r9>
+hlp0    textequ <r10>
+
+DATA    textequ <xmm0>
+T       textequ <xmm1>
+TMP0    textequ <xmm2>
+TMP1    textequ <xmm3>
+TMP2    textequ <xmm4>
+TMP3    textequ <xmm5>
+TMP4    textequ <xmm6>
+Xhi     textequ <xmm7>
+
+KARATSUBA_AAD MACRO i
+    vpclmulqdq  TMP3, DATA, [Htbl + i*16], 0h
+    vpxor       TMP0, TMP0, TMP3
+    vpclmulqdq  TMP3, DATA, [Htbl + i*16], 011h
+    vpxor       TMP1, TMP1, TMP3
+    vpshufd     TMP3, DATA, 78
+    vpxor       TMP3, TMP3, DATA
+    vpclmulqdq  TMP3, TMP3, [Htbl + 8*16 + i*16], 0h
+    vpxor       TMP2, TMP2, TMP3
+ENDM
+
+    test  len, len
+    jnz   LbeginAAD
+    ret
+
+LbeginAAD:
+    vzeroupper
+
+    sub rsp, 2*16
+    vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+    vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+
+    vpxor   Xhi, Xhi, Xhi
+
+    vmovdqu T, XMMWORD PTR[Tp]
+    ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+    mov hlp0, len
+    and hlp0, 128-1
+    jz  Lmod_loop
+
+    and len, -128
+    sub hlp0, 16
+
+    ; Prefix block
+    vmovdqu DATA, XMMWORD PTR[inp]
+    vpshufb DATA, DATA, [Lbswap_mask]
+    vpxor   DATA, DATA, T
+
+    vpclmulqdq  TMP0, DATA, [Htbl + hlp0], 0h
+    vpclmulqdq  TMP1, DATA, [Htbl + hlp0], 011h
+    vpshufd     TMP3, DATA, 78
+    vpxor       TMP3, TMP3, DATA
+    vpclmulqdq  TMP2, TMP3, [Htbl + 8*16 + hlp0], 0h
+
+    lea     inp, [inp+16]
+    test    hlp0, hlp0
+    jnz     Lpre_loop
+    jmp     Lred1
+
+    ;hash remaining prefix bocks (up to 7 total prefix blocks)
+Lpre_loop:
+
+        sub hlp0, 16
+
+        vmovdqu DATA, XMMWORD PTR[inp]
+        vpshufb DATA, DATA, [Lbswap_mask]
+
+        vpclmulqdq  TMP3, DATA, [Htbl + hlp0], 0h
+        vpxor       TMP0, TMP0, TMP3
+        vpclmulqdq  TMP3, DATA, [Htbl + hlp0], 011h
+        vpxor       TMP1, TMP1, TMP3
+        vpshufd     TMP3, DATA, 78
+        vpxor       TMP3, TMP3, DATA
+        vpclmulqdq  TMP3, TMP3, [Htbl + 8*16 + hlp0], 0h
+        vpxor       TMP2, TMP2, TMP3
+
+        test    hlp0, hlp0
+        lea     inp, [inp+16]
+        jnz     Lpre_loop
+
+Lred1:
+
+    vpxor       TMP2, TMP2, TMP0
+    vpxor       TMP2, TMP2, TMP1
+    vpsrldq     TMP3, TMP2, 8
+    vpslldq     TMP2, TMP2, 8
+
+    vpxor       Xhi, TMP1, TMP3
+    vpxor       T, TMP0, TMP2
+
+
+Lmod_loop:
+
+        sub len, 16*8
+        jb  Ldone
+        ; Block #0
+        vmovdqu DATA, XMMWORD PTR[inp + 16*7]
+        vpshufb DATA, DATA, [Lbswap_mask]
+
+        vpclmulqdq  TMP0, DATA, [Htbl + 0*16], 0h
+        vpclmulqdq  TMP1, DATA, [Htbl + 0*16], 011h
+        vpshufd     TMP3, DATA, 78
+        vpxor       TMP3, TMP3, DATA
+        vpclmulqdq  TMP2, TMP3, [Htbl + 8*16 + 0*16], 0h
+
+        ; Block #1
+        vmovdqu DATA, XMMWORD PTR[inp + 16*6]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        KARATSUBA_AAD 1
+
+        ; Block #2
+        vmovdqu DATA, XMMWORD PTR[inp + 16*5]
+        vpshufb DATA, DATA, [Lbswap_mask]
+
+        vpclmulqdq  TMP4, T, [Lpoly], 010h         ;reduction stage 1a
+        vpalignr    T, T, T, 8
+
+        KARATSUBA_AAD 2
+
+        vpxor       T, T, TMP4                          ;reduction stage 1b
+
+        ; Block #3
+        vmovdqu DATA, XMMWORD PTR[inp + 16*4]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        KARATSUBA_AAD 3
+        ; Block #4
+        vmovdqu DATA, XMMWORD PTR[inp + 16*3]
+        vpshufb DATA, DATA, [Lbswap_mask]
+
+        vpclmulqdq  TMP4, T, [Lpoly], 010h        ;reduction stage 2a
+        vpalignr    T, T, T, 8
+
+        KARATSUBA_AAD 4
+
+        vpxor       T, T, TMP4                          ;reduction stage 2b
+        ; Block #5
+        vmovdqu DATA, XMMWORD PTR[inp + 16*2]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        KARATSUBA_AAD 5
+
+        vpxor   T, T, Xhi                               ;reduction finalize
+        ; Block #6
+        vmovdqu DATA, XMMWORD PTR[inp + 16*1]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        KARATSUBA_AAD 6
+        ; Block #7
+        vmovdqu DATA, XMMWORD PTR[inp + 16*0]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        vpxor   DATA, DATA, T
+        KARATSUBA_AAD 7
+        ; Aggregated 8 blocks, now karatsuba fixup
+        vpxor   TMP2, TMP2, TMP0
+        vpxor   TMP2, TMP2, TMP1
+        vpsrldq TMP3, TMP2, 8
+        vpslldq TMP2, TMP2, 8
+
+        vpxor   Xhi, TMP1, TMP3
+        vpxor   T, TMP0, TMP2
+
+        lea inp, [inp + 16*8]
+        jmp Lmod_loop
+
+Ldone:
+    vpclmulqdq  TMP4, T, [Lpoly], 010h
+    vpalignr    T, T, T, 8
+    vpxor       T, T, TMP4
+
+    vpclmulqdq  TMP4, T, [Lpoly], 010h
+    vpalignr    T, T, T, 8
+    vpxor       T, T, TMP4
+
+    vpxor       T, T, Xhi
+    vmovdqu     XMMWORD PTR[Tp], T
+    vzeroupper
+
+    vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+    vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+    add rsp, 16*2
+
+    ret
+
+intel_aes_gcmAAD ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Encrypt and Authenticate
+; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmENC PROC
+
+PT      textequ <rcx>
+CT      textequ <rdx>
+Htbl    textequ <r8>
+Gctx    textequ <r8>
+len     textequ <r9>
+KS      textequ <r10>
+NR      textequ <eax>
+
+aluCTR  textequ <r11d>
+aluKSl  textequ <r12d>
+aluTMP  textequ <r13d>
+
+T       textequ <xmm0>
+TMP0    textequ <xmm1>
+TMP1    textequ <xmm2>
+TMP2    textequ <xmm3>
+TMP3    textequ <xmm4>
+TMP4    textequ <xmm5>
+TMP5    textequ <xmm6>
+CTR0    textequ <xmm7>
+CTR1    textequ <xmm8>
+CTR2    textequ <xmm9>
+CTR3    textequ <xmm10>
+CTR4    textequ <xmm11>
+CTR5    textequ <xmm12>
+CTR6    textequ <xmm13>
+CTR7    textequ <xmm14>
+BSWAPMASK   textequ <xmm15>
+
+ROUND MACRO i
+    vmovdqu TMP3, XMMWORD PTR[i*16 + KS]
+    vaesenc CTR0, CTR0, TMP3
+    vaesenc CTR1, CTR1, TMP3
+    vaesenc CTR2, CTR2, TMP3
+    vaesenc CTR3, CTR3, TMP3
+    vaesenc CTR4, CTR4, TMP3
+    vaesenc CTR5, CTR5, TMP3
+    vaesenc CTR6, CTR6, TMP3
+    vaesenc CTR7, CTR7, TMP3
+ENDM
+ROUNDMUL MACRO i
+    vmovdqu TMP3, XMMWORD PTR[i*16 + KS]
+
+    vaesenc CTR0, CTR0, TMP3
+    vaesenc CTR1, CTR1, TMP3
+    vaesenc CTR2, CTR2, TMP3
+    vaesenc CTR3, CTR3, TMP3
+
+    vpshufd TMP4, TMP5, 78
+    vpxor   TMP4, TMP4, TMP5
+
+    vaesenc CTR4, CTR4, TMP3
+    vaesenc CTR5, CTR5, TMP3
+    vaesenc CTR6, CTR6, TMP3
+    vaesenc CTR7, CTR7, TMP3
+
+    vpclmulqdq  TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+    vpxor       TMP0, TMP0, TMP3
+    vmovdqu     TMP4, XMMWORD PTR[i*16 + Htbl]
+    vpclmulqdq  TMP3, TMP5, TMP4, 011h
+    vpxor       TMP1, TMP1, TMP3
+    vpclmulqdq  TMP3, TMP5, TMP4, 000h
+    vpxor       TMP2, TMP2, TMP3
+ENDM
+KARATSUBA MACRO i
+    vpshufd TMP4, TMP5, 78
+    vpxor   TMP4, TMP4, TMP5
+    vpclmulqdq  TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+    vpxor       TMP0, TMP0, TMP3
+    vmovdqu     TMP4, XMMWORD PTR[i*16 + Htbl]
+    vpclmulqdq  TMP3, TMP5, TMP4, 011h
+    vpxor       TMP1, TMP1, TMP3
+    vpclmulqdq  TMP3, TMP5, TMP4, 000h
+    vpxor       TMP2, TMP2, TMP3
+ENDM
+NEXTCTR MACRO i
+    add aluCTR, 1
+    mov aluTMP, aluCTR
+    xor aluTMP, aluKSl
+    bswap   aluTMP
+    mov [3*4 + 8*16 + i*16 + rsp], aluTMP
+ENDM
+
+
+    test  len, len
+    jnz   LbeginENC
+    ret
+
+LbeginENC:
+
+    vzeroupper
+    push    r11
+    push    r12
+    push    r13
+    push    rbp
+    sub rsp, 10*16
+    vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+    vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+    vmovdqu XMMWORD PTR[rsp + 2*16], xmm8
+    vmovdqu XMMWORD PTR[rsp + 3*16], xmm9
+    vmovdqu XMMWORD PTR[rsp + 4*16], xmm10
+    vmovdqu XMMWORD PTR[rsp + 5*16], xmm11
+    vmovdqu XMMWORD PTR[rsp + 6*16], xmm12
+    vmovdqu XMMWORD PTR[rsp + 7*16], xmm13
+    vmovdqu XMMWORD PTR[rsp + 8*16], xmm14
+    vmovdqu XMMWORD PTR[rsp + 9*16], xmm15
+
+    mov rbp, rsp
+    sub rsp, 16*16
+    and rsp, -16
+
+    vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx]
+    vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+    vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask]
+    mov     KS, [16*16 + 3*16 + Gctx]
+    mov     NR, [4 + KS]
+    lea     KS, [48 + KS]
+
+    vpshufb CTR0, CTR0, BSWAPMASK
+
+    mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+    mov aluKSl, [3*4 + KS]
+    bswap   aluCTR
+    bswap   aluKSl
+
+    vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+    vpxor   TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+    vmovdqu XMMWORD PTR[8*16 + 0*16 + rsp], TMP0
+
+    cmp len, 128
+    jb  LEncDataSingles
+; Prepare the "top" counters
+    vmovdqu XMMWORD PTR[8*16 + 1*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 2*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 3*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 4*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 5*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 6*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 7*16 + rsp], TMP0
+
+; Encrypt the initial 8 blocks
+    sub len, 128
+    vpaddd  CTR1, CTR0, XMMWORD PTR[Lone]
+    vpaddd  CTR2, CTR0, XMMWORD PTR[Ltwo]
+    vpaddd  CTR3, CTR2, XMMWORD PTR[Lone]
+    vpaddd  CTR4, CTR2, XMMWORD PTR[Ltwo]
+    vpaddd  CTR5, CTR4, XMMWORD PTR[Lone]
+    vpaddd  CTR6, CTR4, XMMWORD PTR[Ltwo]
+    vpaddd  CTR7, CTR6, XMMWORD PTR[Lone]
+
+    vpshufb CTR0, CTR0, BSWAPMASK
+    vpshufb CTR1, CTR1, BSWAPMASK
+    vpshufb CTR2, CTR2, BSWAPMASK
+    vpshufb CTR3, CTR3, BSWAPMASK
+    vpshufb CTR4, CTR4, BSWAPMASK
+    vpshufb CTR5, CTR5, BSWAPMASK
+    vpshufb CTR6, CTR6, BSWAPMASK
+    vpshufb CTR7, CTR7, BSWAPMASK
+
+    vmovdqu TMP3, XMMWORD PTR[0*16 + KS]
+    vpxor   CTR0, CTR0, TMP3
+    vpxor   CTR1, CTR1, TMP3
+    vpxor   CTR2, CTR2, TMP3
+    vpxor   CTR3, CTR3, TMP3
+    vpxor   CTR4, CTR4, TMP3
+    vpxor   CTR5, CTR5, TMP3
+    vpxor   CTR6, CTR6, TMP3
+    vpxor   CTR7, CTR7, TMP3
+
+    ROUND   1
+
+    add aluCTR, 8
+    mov aluTMP, aluCTR
+    xor aluTMP, aluKSl
+    bswap   aluTMP
+    mov [8*16 + 0*16 + 3*4 + rsp], aluTMP
+
+    ROUND   2
+    NEXTCTR 1
+    ROUND   3
+    NEXTCTR 2
+    ROUND   4
+    NEXTCTR 3
+    ROUND   5
+    NEXTCTR 4
+    ROUND   6
+    NEXTCTR 5
+    ROUND   7
+    NEXTCTR 6
+    ROUND   8
+    NEXTCTR 7
+    ROUND   9
+    vmovdqu TMP5, XMMWORD PTR[10*16 + KS]
+    cmp     NR, 10
+    je      @f
+
+    ROUND   10
+    ROUND   11
+    vmovdqu TMP5, XMMWORD PTR[12*16 + KS]
+    cmp     NR, 12
+    je      @f
+
+    ROUND   12
+    ROUND   13
+    vmovdqu TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+    vpxor   TMP3, TMP5, XMMWORD PTR[0*16 + PT]
+    vaesenclast CTR0, CTR0, TMP3
+    vpxor   TMP3, TMP5, XMMWORD PTR[1*16 + PT]
+    vaesenclast CTR1, CTR1, TMP3
+    vpxor   TMP3, TMP5, XMMWORD PTR[2*16 + PT]
+    vaesenclast CTR2, CTR2, TMP3
+    vpxor   TMP3, TMP5, XMMWORD PTR[3*16 + PT]
+    vaesenclast CTR3, CTR3, TMP3
+    vpxor   TMP3, TMP5, XMMWORD PTR[4*16 + PT]
+    vaesenclast CTR4, CTR4, TMP3
+    vpxor   TMP3, TMP5, XMMWORD PTR[5*16 + PT]
+    vaesenclast CTR5, CTR5, TMP3
+    vpxor   TMP3, TMP5, XMMWORD PTR[6*16 + PT]
+    vaesenclast CTR6, CTR6, TMP3
+    vpxor   TMP3, TMP5, XMMWORD PTR[7*16 + PT]
+    vaesenclast CTR7, CTR7, TMP3
+
+    vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+    vpshufb CTR0, CTR0, BSWAPMASK
+    vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+    vpshufb CTR1, CTR1, BSWAPMASK
+    vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+    vpshufb CTR2, CTR2, BSWAPMASK
+    vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+    vpshufb CTR3, CTR3, BSWAPMASK
+    vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+    vpshufb CTR4, CTR4, BSWAPMASK
+    vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+    vpshufb CTR5, CTR5, BSWAPMASK
+    vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+    vpshufb CTR6, CTR6, BSWAPMASK
+    vmovdqu XMMWORD PTR[7*16 + CT], CTR7
+    vpshufb TMP5, CTR7, BSWAPMASK
+
+    vmovdqa XMMWORD PTR[1*16 + rsp], CTR6
+    vmovdqa XMMWORD PTR[2*16 + rsp], CTR5
+    vmovdqa XMMWORD PTR[3*16 + rsp], CTR4
+    vmovdqa XMMWORD PTR[4*16 + rsp], CTR3
+    vmovdqa XMMWORD PTR[5*16 + rsp], CTR2
+    vmovdqa XMMWORD PTR[6*16 + rsp], CTR1
+    vmovdqa XMMWORD PTR[7*16 + rsp], CTR0
+
+    lea CT, [8*16 + CT]
+    lea PT, [8*16 + PT]
+    jmp LEncDataOctets
+
+LEncDataOctets:
+        cmp len, 128
+        jb  LEndEncOctets
+        sub len, 128
+
+        vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + rsp]
+        vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + rsp]
+        vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + rsp]
+        vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + rsp]
+        vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + rsp]
+        vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + rsp]
+        vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + rsp]
+        vmovdqa CTR7, XMMWORD PTR[8*16 + 7*16 + rsp]
+
+        vpshufd TMP4, TMP5, 78
+        vpxor   TMP4, TMP4, TMP5
+        vpclmulqdq  TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+        vmovdqu     TMP4, XMMWORD PTR[0*16 + Htbl]
+        vpclmulqdq  TMP1, TMP5, TMP4, 011h
+        vpclmulqdq  TMP2, TMP5, TMP4, 000h
+
+        vmovdqu TMP5, XMMWORD PTR[1*16 + rsp]
+        ROUNDMUL 1
+        NEXTCTR 0
+        vmovdqu TMP5, XMMWORD PTR[2*16 + rsp]
+        ROUNDMUL 2
+        NEXTCTR 1
+        vmovdqu TMP5, XMMWORD PTR[3*16 + rsp]
+        ROUNDMUL 3
+        NEXTCTR 2
+        vmovdqu TMP5, XMMWORD PTR[4*16 + rsp]
+        ROUNDMUL 4
+        NEXTCTR 3
+        vmovdqu TMP5, XMMWORD PTR[5*16 + rsp]
+        ROUNDMUL 5
+        NEXTCTR 4
+        vmovdqu TMP5, XMMWORD PTR[6*16 + rsp]
+        ROUNDMUL 6
+        NEXTCTR 5
+        vpxor   TMP5, T, XMMWORD PTR[7*16 + rsp]
+        ROUNDMUL 7
+        NEXTCTR 6
+
+        ROUND 8
+        NEXTCTR 7
+
+        vpxor   TMP0, TMP0, TMP1
+        vpxor   TMP0, TMP0, TMP2
+        vpsrldq TMP3, TMP0, 8
+        vpxor   TMP4, TMP1, TMP3
+        vpslldq TMP3, TMP0, 8
+        vpxor   T, TMP2, TMP3
+
+        vpclmulqdq  TMP1, T, XMMWORD PTR[Lpoly], 010h
+        vpalignr    T,T,T,8
+        vpxor       T, T, TMP1
+
+        ROUND 9
+
+        vpclmulqdq  TMP1, T, XMMWORD PTR[Lpoly], 010h
+        vpalignr    T,T,T,8
+        vpxor       T, T, TMP1
+
+        vmovdqu     TMP5, XMMWORD PTR[10*16 + KS]
+        cmp         NR, 10
+        je          @f
+
+        ROUND 10
+        ROUND 11
+        vmovdqu     TMP5, XMMWORD PTR[12*16 + KS]
+        cmp         NR, 12
+        je          @f
+
+        ROUND 12
+        ROUND 13
+        vmovdqu     TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+        vpxor   TMP3, TMP5, XMMWORD PTR[0*16 + PT]
+        vaesenclast CTR0, CTR0, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[1*16 + PT]
+        vaesenclast CTR1, CTR1, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[2*16 + PT]
+        vaesenclast CTR2, CTR2, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[3*16 + PT]
+        vaesenclast CTR3, CTR3, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[4*16 + PT]
+        vaesenclast CTR4, CTR4, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[5*16 + PT]
+        vaesenclast CTR5, CTR5, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[6*16 + PT]
+        vaesenclast CTR6, CTR6, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[7*16 + PT]
+        vaesenclast CTR7, CTR7, TMP3
+
+        vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+        vpshufb CTR0, CTR0, BSWAPMASK
+        vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+        vpshufb CTR1, CTR1, BSWAPMASK
+        vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+        vpshufb CTR2, CTR2, BSWAPMASK
+        vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+        vpshufb CTR3, CTR3, BSWAPMASK
+        vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+        vpshufb CTR4, CTR4, BSWAPMASK
+        vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+        vpshufb CTR5, CTR5, BSWAPMASK
+        vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+        vpshufb CTR6, CTR6, BSWAPMASK
+        vmovdqu XMMWORD PTR[7*16 + CT], CTR7
+        vpshufb TMP5, CTR7, BSWAPMASK
+
+        vmovdqa XMMWORD PTR[1*16 + rsp], CTR6
+        vmovdqa XMMWORD PTR[2*16 + rsp], CTR5
+        vmovdqa XMMWORD PTR[3*16 + rsp], CTR4
+        vmovdqa XMMWORD PTR[4*16 + rsp], CTR3
+        vmovdqa XMMWORD PTR[5*16 + rsp], CTR2
+        vmovdqa XMMWORD PTR[6*16 + rsp], CTR1
+        vmovdqa XMMWORD PTR[7*16 + rsp], CTR0
+
+        vpxor   T, T, TMP4
+
+        lea CT, [8*16 + CT]
+        lea PT, [8*16 + PT]
+        jmp LEncDataOctets
+
+LEndEncOctets:
+
+    vpshufd TMP4, TMP5, 78
+    vpxor   TMP4, TMP4, TMP5
+    vpclmulqdq  TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+    vmovdqu     TMP4, XMMWORD PTR[0*16 + Htbl]
+    vpclmulqdq  TMP1, TMP5, TMP4, 011h
+    vpclmulqdq  TMP2, TMP5, TMP4, 000h
+
+    vmovdqu TMP5, XMMWORD PTR[1*16 + rsp]
+    KARATSUBA 1
+    vmovdqu TMP5, XMMWORD PTR[2*16 + rsp]
+    KARATSUBA 2
+    vmovdqu TMP5, XMMWORD PTR[3*16 + rsp]
+    KARATSUBA 3
+    vmovdqu TMP5, XMMWORD PTR[4*16 + rsp]
+    KARATSUBA 4
+    vmovdqu TMP5, XMMWORD PTR[5*16 + rsp]
+    KARATSUBA 5
+    vmovdqu TMP5, XMMWORD PTR[6*16 + rsp]
+    KARATSUBA 6
+    vpxor   TMP5, T, XMMWORD PTR[7*16 + rsp]
+    KARATSUBA 7
+
+    vpxor   TMP0, TMP0, TMP1
+    vpxor   TMP0, TMP0, TMP2
+    vpsrldq TMP3, TMP0, 8
+    vpxor   TMP4, TMP1, TMP3
+    vpslldq TMP3, TMP0, 8
+    vpxor   T, TMP2, TMP3
+
+    vpclmulqdq  TMP1, T, XMMWORD PTR[Lpoly], 010h
+    vpalignr    T,T,T,8
+    vpxor       T, T, TMP1
+
+    vpclmulqdq  TMP1, T, XMMWORD PTR[Lpoly], 010h
+    vpalignr    T,T,T,8
+    vpxor       T, T, TMP1
+
+    vpxor       T, T, TMP4
+
+    sub aluCTR, 7
+
+LEncDataSingles:
+
+        cmp len, 16
+        jb  LEncDataTail
+        sub len, 16
+
+        vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp]
+        NEXTCTR 0
+
+        vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+        cmp NR, 10
+        je  @f
+        vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+        cmp NR, 12
+        je  @f
+        vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+        vaesenclast TMP1, TMP1, TMP2
+        vpxor   TMP1, TMP1, XMMWORD PTR[PT]
+        vmovdqu XMMWORD PTR[CT], TMP1
+
+        lea PT, [16+PT]
+        lea CT, [16+CT]
+
+        vpshufb TMP1, TMP1, BSWAPMASK
+        vpxor   T, T, TMP1
+        vmovdqu TMP0, XMMWORD PTR[Htbl]
+        GFMUL   T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+        jmp LEncDataSingles
+
+LEncDataTail:
+
+    test    len, len
+    jz  LEncDataEnd
+
+    vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp]
+
+    vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+    cmp NR, 10
+    je  @f
+    vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+    cmp NR, 12
+    je  @f
+    vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+    vaesenclast TMP1, TMP1, TMP2
+; zero a temp location
+    vpxor   TMP2, TMP2, TMP2
+    vmovdqa XMMWORD PTR[rsp], TMP2
+; copy as many bytes as needed
+    xor KS, KS
+
+@@:
+        cmp len, KS
+        je  @f
+        mov al, [PT + KS]
+        mov [rsp + KS], al
+        inc KS
+        jmp @b
+@@:
+    vpxor   TMP1, TMP1, XMMWORD PTR[rsp]
+    vmovdqa XMMWORD PTR[rsp], TMP1
+    xor KS, KS
+@@:
+        cmp len, KS
+        je  @f
+        mov al, [rsp + KS]
+        mov [CT + KS], al
+        inc KS
+        jmp @b
+@@:
+        cmp KS, 16
+        je  @f
+        mov BYTE PTR[rsp + KS], 0
+        inc KS
+        jmp @b
+@@:
+BAIL:
+    vmovdqa TMP1, XMMWORD PTR[rsp]
+    vpshufb TMP1, TMP1, BSWAPMASK
+    vpxor   T, T, TMP1
+    vmovdqu TMP0, XMMWORD PTR[Htbl]
+    GFMUL   T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+LEncDataEnd:
+
+    vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T
+    bswap   aluCTR
+    mov     [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+    mov rsp, rbp
+
+    vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+    vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+    vmovdqu xmm8, XMMWORD PTR[rsp + 2*16]
+    vmovdqu xmm9, XMMWORD PTR[rsp + 3*16]
+    vmovdqu xmm10, XMMWORD PTR[rsp + 4*16]
+    vmovdqu xmm11, XMMWORD PTR[rsp + 5*16]
+    vmovdqu xmm12, XMMWORD PTR[rsp + 6*16]
+    vmovdqu xmm13, XMMWORD PTR[rsp + 7*16]
+    vmovdqu xmm14, XMMWORD PTR[rsp + 8*16]
+    vmovdqu xmm15, XMMWORD PTR[rsp + 9*16]
+
+    add rsp, 10*16
+    pop rbp
+    pop r13
+    pop r12
+    pop r11
+
+    vzeroupper
+
+    ret
+intel_aes_gcmENC ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Decrypt and Authenticate
+; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmDEC PROC
+
+NEXTCTR MACRO i
+    add aluCTR, 1
+    mov aluTMP, aluCTR
+    xor aluTMP, aluKSl
+    bswap   aluTMP
+    mov [3*4 + i*16 + rsp], aluTMP
+ENDM
+
+PT      textequ <rdx>
+CT      textequ <rcx>
+
+    test  len, len
+    jnz   LbeginDEC
+    ret
+
+LbeginDEC:
+
+    vzeroupper
+    push    r11
+    push    r12
+    push    r13
+    push    rbp
+    sub rsp, 10*16
+    vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+    vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+    vmovdqu XMMWORD PTR[rsp + 2*16], xmm8
+    vmovdqu XMMWORD PTR[rsp + 3*16], xmm9
+    vmovdqu XMMWORD PTR[rsp + 4*16], xmm10
+    vmovdqu XMMWORD PTR[rsp + 5*16], xmm11
+    vmovdqu XMMWORD PTR[rsp + 6*16], xmm12
+    vmovdqu XMMWORD PTR[rsp + 7*16], xmm13
+    vmovdqu XMMWORD PTR[rsp + 8*16], xmm14
+    vmovdqu XMMWORD PTR[rsp + 9*16], xmm15
+
+    mov rbp, rsp
+    sub rsp, 8*16
+    and rsp, -16
+
+    vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx]
+    vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+    vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask]
+    mov     KS, [16*16 + 3*16 + Gctx]
+    mov     NR, [4 + KS]
+    lea     KS, [48 + KS]
+
+    vpshufb CTR0, CTR0, BSWAPMASK
+
+    mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+    mov aluKSl, [3*4 + KS]
+    bswap   aluCTR
+    bswap   aluKSl
+
+    vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+    vpxor   TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+    vmovdqu XMMWORD PTR[0*16 + rsp], TMP0
+
+    cmp len, 128
+    jb  LDecDataSingles
+; Prepare the "top" counters
+    vmovdqu XMMWORD PTR[1*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[2*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[3*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[4*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[5*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[6*16 + rsp], TMP0
+    vmovdqu XMMWORD PTR[7*16 + rsp], TMP0
+
+    NEXTCTR 1
+    NEXTCTR 2
+    NEXTCTR 3
+    NEXTCTR 4
+    NEXTCTR 5
+    NEXTCTR 6
+    NEXTCTR 7
+
+LDecDataOctets:
+        cmp len, 128
+        jb  LEndDecOctets
+        sub len, 128
+
+        vmovdqa CTR0, XMMWORD PTR[0*16 + rsp]
+        vmovdqa CTR1, XMMWORD PTR[1*16 + rsp]
+        vmovdqa CTR2, XMMWORD PTR[2*16 + rsp]
+        vmovdqa CTR3, XMMWORD PTR[3*16 + rsp]
+        vmovdqa CTR4, XMMWORD PTR[4*16 + rsp]
+        vmovdqa CTR5, XMMWORD PTR[5*16 + rsp]
+        vmovdqa CTR6, XMMWORD PTR[6*16 + rsp]
+        vmovdqa CTR7, XMMWORD PTR[7*16 + rsp]
+
+        vmovdqu TMP5, XMMWORD PTR[7*16 + CT]
+        vpshufb TMP5, TMP5, BSWAPMASK
+        vpshufd TMP4, TMP5, 78
+        vpxor   TMP4, TMP4, TMP5
+        vpclmulqdq  TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+        vmovdqu     TMP4, XMMWORD PTR[0*16 + Htbl]
+        vpclmulqdq  TMP1, TMP5, TMP4, 011h
+        vpclmulqdq  TMP2, TMP5, TMP4, 000h
+
+        vmovdqu TMP5, XMMWORD PTR[6*16 + CT]
+        vpshufb TMP5, TMP5, BSWAPMASK
+        ROUNDMUL 1
+        NEXTCTR 0
+        vmovdqu TMP5, XMMWORD PTR[5*16 + CT]
+        vpshufb TMP5, TMP5, BSWAPMASK
+        ROUNDMUL 2
+        NEXTCTR 1
+        vmovdqu TMP5, XMMWORD PTR[4*16 + CT]
+        vpshufb TMP5, TMP5, BSWAPMASK
+        ROUNDMUL 3
+        NEXTCTR 2
+        vmovdqu TMP5, XMMWORD PTR[3*16 + CT]
+        vpshufb TMP5, TMP5, BSWAPMASK
+        ROUNDMUL 4
+        NEXTCTR 3
+        vmovdqu TMP5, XMMWORD PTR[2*16 + CT]
+        vpshufb TMP5, TMP5, BSWAPMASK
+        ROUNDMUL 5
+        NEXTCTR 4
+        vmovdqu TMP5, XMMWORD PTR[1*16 + CT]
+        vpshufb TMP5, TMP5, BSWAPMASK
+        ROUNDMUL 6
+        NEXTCTR 5
+        vmovdqu TMP5, XMMWORD PTR[0*16 + CT]
+        vpshufb TMP5, TMP5, BSWAPMASK
+        vpxor   TMP5, TMP5, T
+        ROUNDMUL 7
+        NEXTCTR 6
+
+        ROUND 8
+        NEXTCTR 7
+
+        vpxor   TMP0, TMP0, TMP1
+        vpxor   TMP0, TMP0, TMP2
+        vpsrldq TMP3, TMP0, 8
+        vpxor   TMP4, TMP1, TMP3
+        vpslldq TMP3, TMP0, 8
+        vpxor   T, TMP2, TMP3
+
+        vpclmulqdq  TMP1, T, XMMWORD PTR[Lpoly], 010h
+        vpalignr    T,T,T,8
+        vpxor       T, T, TMP1
+
+        ROUND 9
+
+        vpclmulqdq  TMP1, T, XMMWORD PTR[Lpoly], 010h
+        vpalignr    T,T,T,8
+        vpxor       T, T, TMP1
+
+        vmovdqu     TMP5, XMMWORD PTR[10*16 + KS]
+        cmp         NR, 10
+        je          @f
+
+        ROUND 10
+        ROUND 11
+        vmovdqu     TMP5, XMMWORD PTR[12*16 + KS]
+        cmp         NR, 12
+        je          @f
+
+        ROUND 12
+        ROUND 13
+        vmovdqu     TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+        vpxor   TMP3, TMP5, XMMWORD PTR[0*16 + CT]
+        vaesenclast CTR0, CTR0, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[1*16 + CT]
+        vaesenclast CTR1, CTR1, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[2*16 + CT]
+        vaesenclast CTR2, CTR2, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[3*16 + CT]
+        vaesenclast CTR3, CTR3, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[4*16 + CT]
+        vaesenclast CTR4, CTR4, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[5*16 + CT]
+        vaesenclast CTR5, CTR5, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[6*16 + CT]
+        vaesenclast CTR6, CTR6, TMP3
+        vpxor   TMP3, TMP5, XMMWORD PTR[7*16 + CT]
+        vaesenclast CTR7, CTR7, TMP3
+
+        vmovdqu XMMWORD PTR[0*16 + PT], CTR0
+        vmovdqu XMMWORD PTR[1*16 + PT], CTR1
+        vmovdqu XMMWORD PTR[2*16 + PT], CTR2
+        vmovdqu XMMWORD PTR[3*16 + PT], CTR3
+        vmovdqu XMMWORD PTR[4*16 + PT], CTR4
+        vmovdqu XMMWORD PTR[5*16 + PT], CTR5
+        vmovdqu XMMWORD PTR[6*16 + PT], CTR6
+        vmovdqu XMMWORD PTR[7*16 + PT], CTR7
+
+        vpxor   T, T, TMP4
+
+        lea CT, [8*16 + CT]
+        lea PT, [8*16 + PT]
+        jmp LDecDataOctets
+
+LEndDecOctets:
+
+    sub aluCTR, 7
+
+LDecDataSingles:
+
+        cmp len, 16
+        jb  LDecDataTail
+        sub len, 16
+
+        vmovdqa TMP1, XMMWORD PTR[0*16 + rsp]
+        NEXTCTR 0
+
+        vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+        cmp NR, 10
+        je  @f
+        vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+        cmp NR, 12
+        je  @f
+        vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+        vaesenclast TMP1, TMP1, TMP2
+
+        vmovdqu TMP2, XMMWORD PTR[CT]
+        vpxor   TMP1, TMP1, TMP2
+        vmovdqu XMMWORD PTR[PT], TMP1
+
+        lea PT, [16+PT]
+        lea CT, [16+CT]
+
+        vpshufb TMP2, TMP2, BSWAPMASK
+        vpxor   T, T, TMP2
+        vmovdqu TMP0, XMMWORD PTR[Htbl]
+        GFMUL   T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+        jmp LDecDataSingles
+
+LDecDataTail:
+
+    test    len, len
+    jz      LDecDataEnd
+
+    vmovdqa TMP1, XMMWORD PTR[0*16 + rsp]
+    inc aluCTR
+    vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+    cmp NR, 10
+    je  @f
+    vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+    cmp NR, 12
+    je  @f
+    vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+    vaesenclast TMP1, TMP1, TMP2
+; copy as many bytes as needed
+    xor KS, KS
+@@:
+        cmp len, KS
+        je  @f
+        mov al, [CT + KS]
+        mov [rsp + KS], al
+        inc KS
+        jmp @b
+@@:
+        cmp KS, 16
+        je  @f
+        mov BYTE PTR[rsp + KS], 0
+        inc KS
+        jmp @b
+@@:
+    vmovdqa TMP2, XMMWORD PTR[rsp]
+    vpshufb TMP2, TMP2, BSWAPMASK
+    vpxor   T, T, TMP2
+    vmovdqu TMP0, XMMWORD PTR[Htbl]
+    GFMUL   T, T, TMP0, TMP5, TMP2, TMP3, TMP4
+
+
+    vpxor   TMP1, TMP1, XMMWORD PTR[rsp]
+    vmovdqa XMMWORD PTR[rsp], TMP1
+    xor KS, KS
+@@:
+        cmp len, KS
+        je  @f
+        mov al, [rsp + KS]
+        mov [PT + KS], al
+        inc KS
+        jmp @b
+@@:
+
+LDecDataEnd:
+
+    vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T
+    bswap   aluCTR
+    mov     [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+    mov rsp, rbp
+
+    vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+    vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+    vmovdqu xmm8, XMMWORD PTR[rsp + 2*16]
+    vmovdqu xmm9, XMMWORD PTR[rsp + 3*16]
+    vmovdqu xmm10, XMMWORD PTR[rsp + 4*16]
+    vmovdqu xmm11, XMMWORD PTR[rsp + 5*16]
+    vmovdqu xmm12, XMMWORD PTR[rsp + 6*16]
+    vmovdqu xmm13, XMMWORD PTR[rsp + 7*16]
+    vmovdqu xmm14, XMMWORD PTR[rsp + 8*16]
+    vmovdqu xmm15, XMMWORD PTR[rsp + 9*16]
+
+    add rsp, 10*16
+    pop rbp
+    pop r13
+    pop r12
+    pop r11
+
+    vzeroupper
+
+    ret
+ret
+intel_aes_gcmDEC ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-gcm-x86-masm.asm b/security/nss/lib/freebl/intel-gcm-x86-masm.asm
new file mode 100644
index 000000000..6362ad859
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-x86-masm.asm
@@ -0,0 +1,1209 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.MODEL FLAT, C
+.XMM
+
+.DATA
+ALIGN 16
+Lone            dq 1,0
+Ltwo            dq 2,0
+Lbswap_mask     db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+Lshuff_mask     dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh
+Lpoly           dq 01h, 0c200000000000000h
+
+.CODE
+
+
+GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4
+    vpclmulqdq  TMP1, SRC2, SRC1, 0h
+    vpclmulqdq  TMP4, SRC2, SRC1, 011h
+
+    vpshufd     TMP2, SRC2, 78
+    vpshufd     TMP3, SRC1, 78
+    vpxor       TMP2, TMP2, SRC2
+    vpxor       TMP3, TMP3, SRC1
+
+    vpclmulqdq  TMP2, TMP2, TMP3, 0h
+    vpxor       TMP2, TMP2, TMP1
+    vpxor       TMP2, TMP2, TMP4
+
+    vpslldq     TMP3, TMP2, 8
+    vpsrldq     TMP2, TMP2, 8
+
+    vpxor       TMP1, TMP1, TMP3
+    vpxor       TMP4, TMP4, TMP2
+
+    vpclmulqdq  TMP2, TMP1, [Lpoly], 010h
+    vpshufd     TMP3, TMP1, 78
+    vpxor       TMP1, TMP2, TMP3
+
+    vpclmulqdq  TMP2, TMP1, [Lpoly], 010h
+    vpshufd     TMP3, TMP1, 78
+    vpxor       TMP1, TMP2, TMP3
+
+    vpxor       DST, TMP1, TMP4
+
+    ENDM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the final GCM tag
+; void intel_aes_gcmTAG(unsigned char Htbl[16*16],
+;                       unsigned char *Tp,
+;                       unsigned int Mlen,
+;                       unsigned int Alen,
+;                       unsigned char* X0,
+;                       unsigned char* TAG);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmTAG PROC
+
+Htbl    textequ <eax>
+Tp      textequ <ecx>
+X0      textequ <edx>
+TAG     textequ <ebx>
+
+T       textequ <xmm0>
+TMP0    textequ <xmm1>
+
+    push    ebx
+
+    mov     Htbl,   [esp + 2*4 + 0*4]
+    mov     Tp,     [esp + 2*4 + 1*4]
+    mov     X0,     [esp + 2*4 + 4*4]
+    mov     TAG,    [esp + 2*4 + 5*4]
+
+    vzeroupper
+    vmovdqu T, XMMWORD PTR[Tp]
+
+    vpxor   TMP0, TMP0, TMP0
+    vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 2*4], 0
+    vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 3*4], 2
+    vpsllq  TMP0, TMP0, 3
+
+    vpxor   T, T, TMP0
+    vmovdqu TMP0, XMMWORD PTR[Htbl]
+    GFMUL   T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+
+    vpshufb T, T, [Lbswap_mask]
+    vpxor   T, T, [X0]
+    vmovdqu XMMWORD PTR[TAG], T
+    vzeroupper
+
+    pop ebx
+
+    ret
+
+intel_aes_gcmTAG ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the H table
+; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmINIT PROC
+
+Htbl    textequ <eax>
+KS      textequ <ecx>
+NR      textequ <edx>
+
+T       textequ <xmm0>
+TMP0    textequ <xmm1>
+
+    mov     Htbl,   [esp + 4*1 + 0*4]
+    mov     KS,     [esp + 4*1 + 1*4]
+    mov     NR,     [esp + 4*1 + 2*4]
+
+    vzeroupper
+    ; AES-ENC(0)
+    vmovdqu T, XMMWORD PTR[KS]
+    lea KS, [16 + KS]
+    dec NR
+Lenc_loop:
+        vaesenc T, T, [KS]
+        lea KS, [16 + KS]
+        dec NR
+        jnz Lenc_loop
+
+    vaesenclast T, T, [KS]
+    vpshufb T, T, [Lbswap_mask]
+
+    ;Calculate H` = GFMUL(H, 2)
+    vpsrad  xmm3, T, 31
+    vpshufd xmm3, xmm3, 0ffh
+    vpand   xmm5, xmm3, [Lpoly]
+    vpsrld  xmm3, T, 31
+    vpslld  xmm4, T, 1
+    vpslldq xmm3, xmm3, 4
+    vpxor   T, xmm4, xmm3
+    vpxor   T, T, xmm5
+
+    vmovdqu TMP0, T
+    vmovdqu XMMWORD PTR[Htbl + 0*16], T
+
+    vpshufd xmm2, T, 78
+    vpxor   xmm2, xmm2, T
+    vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2
+
+    i = 1
+    WHILE i LT 8
+        GFMUL   T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+        vmovdqu XMMWORD PTR[Htbl + i*16], T
+        vpshufd xmm2, T, 78
+        vpxor   xmm2, xmm2, T
+        vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2
+        i = i+1
+        ENDM
+    vzeroupper
+    ret
+intel_aes_gcmINIT ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Authenticate only
+; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmAAD PROC
+
+Htbl    textequ <eax>
+inp     textequ <ecx>
+len     textequ <edx>
+Tp      textequ <ebx>
+hlp0    textequ <esi>
+
+DATA    textequ <xmm0>
+T       textequ <xmm1>
+TMP0    textequ <xmm2>
+TMP1    textequ <xmm3>
+TMP2    textequ <xmm4>
+TMP3    textequ <xmm5>
+TMP4    textequ <xmm6>
+Xhi     textequ <xmm7>
+
+KARATSUBA_AAD MACRO i
+    vpclmulqdq  TMP3, DATA, [Htbl + i*16], 0h
+    vpxor       TMP0, TMP0, TMP3
+    vpclmulqdq  TMP3, DATA, [Htbl + i*16], 011h
+    vpxor       TMP1, TMP1, TMP3
+    vpshufd     TMP3, DATA, 78
+    vpxor       TMP3, TMP3, DATA
+    vpclmulqdq  TMP3, TMP3, [Htbl + 8*16 + i*16], 0h
+    vpxor       TMP2, TMP2, TMP3
+ENDM
+
+    cmp   DWORD PTR[esp + 1*3 + 2*4], 0
+    jnz   LbeginAAD
+    ret
+
+LbeginAAD:
+    push    ebx
+    push    esi
+
+    mov     Htbl,   [esp + 4*3 + 0*4]
+    mov     inp,    [esp + 4*3 + 1*4]
+    mov     len,    [esp + 4*3 + 2*4]
+    mov     Tp,     [esp + 4*3 + 3*4]
+
+    vzeroupper
+
+    vpxor   Xhi, Xhi, Xhi
+
+    vmovdqu T, XMMWORD PTR[Tp]
+    ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+    mov hlp0, len
+    and hlp0, 128-1
+    jz  Lmod_loop
+
+    and len, -128
+    sub hlp0, 16
+
+    ; Prefix block
+    vmovdqu DATA, XMMWORD PTR[inp]
+    vpshufb DATA, DATA, [Lbswap_mask]
+    vpxor   DATA, DATA, T
+
+    vpclmulqdq  TMP0, DATA, XMMWORD PTR[Htbl + hlp0], 0h
+    vpclmulqdq  TMP1, DATA, XMMWORD PTR[Htbl + hlp0], 011h
+    vpshufd     TMP3, DATA, 78
+    vpxor       TMP3, TMP3, DATA
+    vpclmulqdq  TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h
+
+    lea     inp, [inp+16]
+    test    hlp0, hlp0
+    jnz     Lpre_loop
+    jmp     Lred1
+
+    ;hash remaining prefix bocks (up to 7 total prefix blocks)
+Lpre_loop:
+
+        sub hlp0, 16
+
+        vmovdqu DATA, XMMWORD PTR[inp]
+        vpshufb DATA, DATA, [Lbswap_mask]
+
+        vpclmulqdq  TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 0h
+        vpxor       TMP0, TMP0, TMP3
+        vpclmulqdq  TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 011h
+        vpxor       TMP1, TMP1, TMP3
+        vpshufd     TMP3, DATA, 78
+        vpxor       TMP3, TMP3, DATA
+        vpclmulqdq  TMP3, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h
+        vpxor       TMP2, TMP2, TMP3
+
+        test    hlp0, hlp0
+        lea     inp, [inp+16]
+        jnz     Lpre_loop
+
+Lred1:
+
+    vpxor       TMP2, TMP2, TMP0
+    vpxor       TMP2, TMP2, TMP1
+    vpsrldq     TMP3, TMP2, 8
+    vpslldq     TMP2, TMP2, 8
+
+    vpxor       Xhi, TMP1, TMP3
+    vpxor       T, TMP0, TMP2
+
+Lmod_loop:
+
+        sub len, 16*8
+        jb  Ldone
+        ; Block #0
+        vmovdqu DATA, XMMWORD PTR[inp + 16*7]
+        vpshufb DATA, DATA, XMMWORD PTR[Lbswap_mask]
+
+        vpclmulqdq  TMP0, DATA, XMMWORD PTR[Htbl + 0*16], 0h
+        vpclmulqdq  TMP1, DATA, XMMWORD PTR[Htbl + 0*16], 011h
+        vpshufd     TMP3, DATA, 78
+        vpxor       TMP3, TMP3, DATA
+        vpclmulqdq  TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + 0*16], 0h
+
+        ; Block #1
+        vmovdqu DATA, XMMWORD PTR[inp + 16*6]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        KARATSUBA_AAD 1
+
+        ; Block #2
+        vmovdqu DATA, XMMWORD PTR[inp + 16*5]
+        vpshufb DATA, DATA, [Lbswap_mask]
+
+        vpclmulqdq  TMP4, T, [Lpoly], 010h         ;reduction stage 1a
+        vpalignr    T, T, T, 8
+
+        KARATSUBA_AAD 2
+
+        vpxor       T, T, TMP4                          ;reduction stage 1b
+
+        ; Block #3
+        vmovdqu DATA, XMMWORD PTR[inp + 16*4]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        KARATSUBA_AAD 3
+        ; Block #4
+        vmovdqu DATA, XMMWORD PTR[inp + 16*3]
+        vpshufb DATA, DATA, [Lbswap_mask]
+
+        vpclmulqdq  TMP4, T, [Lpoly], 010h        ;reduction stage 2a
+        vpalignr    T, T, T, 8
+
+        KARATSUBA_AAD 4
+
+        vpxor       T, T, TMP4                          ;reduction stage 2b
+        ; Block #5
+        vmovdqu DATA, XMMWORD PTR[inp + 16*2]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        KARATSUBA_AAD 5
+
+        vpxor   T, T, Xhi                               ;reduction finalize
+        ; Block #6
+        vmovdqu DATA, XMMWORD PTR[inp + 16*1]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        KARATSUBA_AAD 6
+        ; Block #7
+        vmovdqu DATA, XMMWORD PTR[inp + 16*0]
+        vpshufb DATA, DATA, [Lbswap_mask]
+        vpxor   DATA, DATA, T
+        KARATSUBA_AAD 7
+        ; Aggregated 8 blocks, now karatsuba fixup
+        vpxor   TMP2, TMP2, TMP0
+        vpxor   TMP2, TMP2, TMP1
+        vpsrldq TMP3, TMP2, 8
+        vpslldq TMP2, TMP2, 8
+
+        vpxor   Xhi, TMP1, TMP3
+        vpxor   T, TMP0, TMP2
+
+        lea inp, [inp + 16*8]
+        jmp Lmod_loop
+
+Ldone:
+    vpclmulqdq  TMP4, T, [Lpoly], 010h
+    vpalignr    T, T, T, 8
+    vpxor       T, T, TMP4
+
+    vpclmulqdq  TMP4, T, [Lpoly], 010h
+    vpalignr    T, T, T, 8
+    vpxor       T, T, TMP4
+
+    vpxor       T, T, Xhi
+    vmovdqu     XMMWORD PTR[Tp], T
+    vzeroupper
+
+    pop esi
+    pop ebx
+    ret
+
+intel_aes_gcmAAD ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Encrypt and Authenticate
+; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmENC PROC
+
+PT      textequ <eax>
+CT      textequ <ecx>
+Htbl    textequ <edx>
+Gctx    textequ <edx>
+len     textequ <DWORD PTR[ebp + 5*4 + 3*4]>
+KS      textequ <esi>
+NR      textequ <DWORD PTR[-40 + KS]>
+
+aluCTR  textequ <ebx>
+aluTMP  textequ <edi>
+
+T       textequ <XMMWORD PTR[16*16 + 1*16 + Gctx]>
+TMP0    textequ <xmm1>
+TMP1    textequ <xmm2>
+TMP2    textequ <xmm3>
+TMP3    textequ <xmm4>
+TMP4    textequ <xmm5>
+TMP5    textequ <xmm6>
+
+CTR0    textequ <xmm0>
+CTR1    textequ <xmm1>
+CTR2    textequ <xmm2>
+CTR3    textequ <xmm3>
+CTR4    textequ <xmm4>
+CTR5    textequ <xmm5>
+CTR6    textequ <xmm6>
+
+ROUND MACRO i
+    vmovdqu xmm7, XMMWORD PTR[i*16 + KS]
+    vaesenc CTR0, CTR0, xmm7
+    vaesenc CTR1, CTR1, xmm7
+    vaesenc CTR2, CTR2, xmm7
+    vaesenc CTR3, CTR3, xmm7
+    vaesenc CTR4, CTR4, xmm7
+    vaesenc CTR5, CTR5, xmm7
+    vaesenc CTR6, CTR6, xmm7
+ENDM
+
+KARATSUBA MACRO i
+    vpshufd TMP4, TMP5, 78
+    vpxor   TMP4, TMP4, TMP5
+    vpclmulqdq  TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+    vpxor       TMP0, TMP0, TMP3
+    vmovdqu     TMP4, XMMWORD PTR[i*16 + Htbl]
+    vpclmulqdq  TMP3, TMP5, TMP4, 011h
+    vpxor       TMP1, TMP1, TMP3
+    vpclmulqdq  TMP3, TMP5, TMP4, 000h
+    vpxor       TMP2, TMP2, TMP3
+ENDM
+
+NEXTCTR MACRO i
+    add     aluCTR, 1
+    mov     aluTMP, aluCTR
+    bswap   aluTMP
+    xor     aluTMP, [3*4 + KS]
+    mov     [3*4 + 8*16 + i*16 + esp], aluTMP
+ENDM
+
+    cmp DWORD PTR[1*4 + 3*4 + esp], 0
+    jne LbeginENC
+    ret
+
+LbeginENC:
+
+    vzeroupper
+    push    ebp
+    push    ebx
+    push    esi
+    push    edi
+
+    mov ebp, esp
+    sub esp, 16*16
+    and esp, -16
+
+    mov PT, [ebp + 5*4 + 0*4]
+    mov CT, [ebp + 5*4 + 1*4]
+    mov Gctx, [ebp + 5*4 + 2*4]
+
+    mov     KS, [16*16 + 3*16 + Gctx]
+    lea     KS, [44 + KS]
+
+    mov     aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+    bswap   aluCTR
+
+
+    vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+    vpxor   TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+    vmovdqu XMMWORD PTR[8*16 + 0*16 + esp], TMP0
+
+    cmp len, 16*7
+    jb  LEncDataSingles
+; Prepare the "top" counters
+    vmovdqu XMMWORD PTR[8*16 + 1*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 2*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 3*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 4*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 5*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[8*16 + 6*16 + esp], TMP0
+
+    vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+    vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+; Encrypt the initial 7 blocks
+    sub len, 16*7
+    vpaddd  CTR1, CTR0, XMMWORD PTR[Lone]
+    vpaddd  CTR2, CTR0, XMMWORD PTR[Ltwo]
+    vpaddd  CTR3, CTR2, XMMWORD PTR[Lone]
+    vpaddd  CTR4, CTR2, XMMWORD PTR[Ltwo]
+    vpaddd  CTR5, CTR4, XMMWORD PTR[Lone]
+    vpaddd  CTR6, CTR4, XMMWORD PTR[Ltwo]
+
+    vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR6, CTR6, XMMWORD PTR[Lbswap_mask]
+
+    vmovdqu xmm7, XMMWORD PTR[0*16 + KS]
+    vpxor   CTR0, CTR0, xmm7
+    vpxor   CTR1, CTR1, xmm7
+    vpxor   CTR2, CTR2, xmm7
+    vpxor   CTR3, CTR3, xmm7
+    vpxor   CTR4, CTR4, xmm7
+    vpxor   CTR5, CTR5, xmm7
+    vpxor   CTR6, CTR6, xmm7
+
+    ROUND   1
+
+    add aluCTR, 7
+    mov aluTMP, aluCTR
+    bswap   aluTMP
+    xor aluTMP, [KS + 3*4]
+    mov [8*16 + 0*16 + 3*4 + esp], aluTMP
+
+    ROUND   2
+    NEXTCTR 1
+    ROUND   3
+    NEXTCTR 2
+    ROUND   4
+    NEXTCTR 3
+    ROUND   5
+    NEXTCTR 4
+    ROUND   6
+    NEXTCTR 5
+    ROUND   7
+    NEXTCTR 6
+    ROUND   8
+    ROUND   9
+    vmovdqu xmm7, XMMWORD PTR[10*16 + KS]
+    cmp     NR, 10
+    je      @f
+
+    ROUND   10
+    ROUND   11
+    vmovdqu xmm7, XMMWORD PTR[12*16 + KS]
+    cmp     NR, 12
+    je      @f
+
+    ROUND   12
+    ROUND   13
+    vmovdqu xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+    vaesenclast CTR0, CTR0, xmm7
+    vaesenclast CTR1, CTR1, xmm7
+    vaesenclast CTR2, CTR2, xmm7
+    vaesenclast CTR3, CTR3, xmm7
+    vaesenclast CTR4, CTR4, xmm7
+    vaesenclast CTR5, CTR5, xmm7
+    vaesenclast CTR6, CTR6, xmm7
+
+    vpxor   CTR0, CTR0, XMMWORD PTR[0*16 + PT]
+    vpxor   CTR1, CTR1, XMMWORD PTR[1*16 + PT]
+    vpxor   CTR2, CTR2, XMMWORD PTR[2*16 + PT]
+    vpxor   CTR3, CTR3, XMMWORD PTR[3*16 + PT]
+    vpxor   CTR4, CTR4, XMMWORD PTR[4*16 + PT]
+    vpxor   CTR5, CTR5, XMMWORD PTR[5*16 + PT]
+    vpxor   CTR6, CTR6, XMMWORD PTR[6*16 + PT]
+
+    vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+    vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+    vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+    vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+    vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+    vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+    vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+
+    vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+    vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+    vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask]
+
+    vmovdqa XMMWORD PTR[1*16 + esp], CTR5
+    vmovdqa XMMWORD PTR[2*16 + esp], CTR4
+    vmovdqa XMMWORD PTR[3*16 + esp], CTR3
+    vmovdqa XMMWORD PTR[4*16 + esp], CTR2
+    vmovdqa XMMWORD PTR[5*16 + esp], CTR1
+    vmovdqa XMMWORD PTR[6*16 + esp], CTR0
+
+    lea CT, [7*16 + CT]
+    lea PT, [7*16 + PT]
+    jmp LEncData7
+
+LEncData7:
+        cmp len, 16*7
+        jb  LEndEnc7
+        sub len, 16*7
+
+        vpshufd TMP4, TMP5, 78
+        vpxor   TMP4, TMP4, TMP5
+        vpclmulqdq  TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+        vmovdqu     TMP4, XMMWORD PTR[0*16 + Htbl]
+        vpclmulqdq  TMP1, TMP5, TMP4, 011h
+        vpclmulqdq  TMP2, TMP5, TMP4, 000h
+
+        vmovdqu TMP5, XMMWORD PTR[1*16 + esp]
+        KARATSUBA 1
+        vmovdqu TMP5, XMMWORD PTR[2*16 + esp]
+        KARATSUBA 2
+        vmovdqu TMP5, XMMWORD PTR[3*16 + esp]
+        KARATSUBA 3
+        vmovdqu TMP5, XMMWORD PTR[4*16 + esp]
+        KARATSUBA 4
+        vmovdqu TMP5, XMMWORD PTR[5*16 + esp]
+        KARATSUBA 5
+        vmovdqu TMP5, XMMWORD PTR[6*16 + esp]
+        vpxor   TMP5, TMP5, T
+        KARATSUBA 6
+
+        vpxor   TMP0, TMP0, TMP1
+        vpxor   TMP0, TMP0, TMP2
+        vpsrldq TMP3, TMP0, 8
+        vpxor   TMP4, TMP1, TMP3
+        vpslldq TMP3, TMP0, 8
+        vpxor   TMP5, TMP2, TMP3
+
+        vpclmulqdq  TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+        vpalignr    TMP5,TMP5,TMP5,8
+        vpxor       TMP5, TMP5, TMP1
+
+        vpclmulqdq  TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+        vpalignr    TMP5,TMP5,TMP5,8
+        vpxor       TMP5, TMP5, TMP1
+
+        vpxor       TMP5, TMP5, TMP4
+        vmovdqu     T, TMP5
+
+        vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + esp]
+        vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + esp]
+        vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + esp]
+        vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + esp]
+        vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + esp]
+        vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + esp]
+        vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + esp]
+
+        ROUND 1
+        NEXTCTR 0
+        ROUND 2
+        NEXTCTR 1
+        ROUND 3
+        NEXTCTR 2
+        ROUND 4
+        NEXTCTR 3
+        ROUND 5
+        NEXTCTR 4
+        ROUND 6
+        NEXTCTR 5
+        ROUND 7
+        NEXTCTR 6
+
+        ROUND 8
+        ROUND 9
+
+        vmovdqu     xmm7, XMMWORD PTR[10*16 + KS]
+        cmp         NR, 10
+        je          @f
+
+        ROUND 10
+        ROUND 11
+        vmovdqu     xmm7, XMMWORD PTR[12*16 + KS]
+        cmp         NR, 12
+        je          @f
+
+        ROUND 12
+        ROUND 13
+        vmovdqu     xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+        vaesenclast CTR0, CTR0, xmm7
+        vaesenclast CTR1, CTR1, xmm7
+        vaesenclast CTR2, CTR2, xmm7
+        vaesenclast CTR3, CTR3, xmm7
+        vaesenclast CTR4, CTR4, xmm7
+        vaesenclast CTR5, CTR5, xmm7
+        vaesenclast CTR6, CTR6, xmm7
+
+        vpxor   CTR0, CTR0, XMMWORD PTR[0*16 + PT]
+        vpxor   CTR1, CTR1, XMMWORD PTR[1*16 + PT]
+        vpxor   CTR2, CTR2, XMMWORD PTR[2*16 + PT]
+        vpxor   CTR3, CTR3, XMMWORD PTR[3*16 + PT]
+        vpxor   CTR4, CTR4, XMMWORD PTR[4*16 + PT]
+        vpxor   CTR5, CTR5, XMMWORD PTR[5*16 + PT]
+        vpxor   CTR6, CTR6, XMMWORD PTR[6*16 + PT]
+
+        vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+        vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+        vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+        vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+        vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+        vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+        vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+
+        vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+        vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+        vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+        vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+        vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+        vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+        vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask]
+
+        vmovdqa XMMWORD PTR[1*16 + esp], CTR5
+        vmovdqa XMMWORD PTR[2*16 + esp], CTR4
+        vmovdqa XMMWORD PTR[3*16 + esp], CTR3
+        vmovdqa XMMWORD PTR[4*16 + esp], CTR2
+        vmovdqa XMMWORD PTR[5*16 + esp], CTR1
+        vmovdqa XMMWORD PTR[6*16 + esp], CTR0
+
+        lea CT, [7*16 + CT]
+        lea PT, [7*16 + PT]
+        jmp LEncData7
+
+LEndEnc7:
+
+    vpshufd TMP4, TMP5, 78
+    vpxor   TMP4, TMP4, TMP5
+    vpclmulqdq  TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+    vmovdqu     TMP4, XMMWORD PTR[0*16 + Htbl]
+    vpclmulqdq  TMP1, TMP5, TMP4, 011h
+    vpclmulqdq  TMP2, TMP5, TMP4, 000h
+
+    vmovdqu TMP5, XMMWORD PTR[1*16 + esp]
+    KARATSUBA 1
+    vmovdqu TMP5, XMMWORD PTR[2*16 + esp]
+    KARATSUBA 2
+    vmovdqu TMP5, XMMWORD PTR[3*16 + esp]
+    KARATSUBA 3
+    vmovdqu TMP5, XMMWORD PTR[4*16 + esp]
+    KARATSUBA 4
+    vmovdqu TMP5, XMMWORD PTR[5*16 + esp]
+    KARATSUBA 5
+    vmovdqu TMP5, XMMWORD PTR[6*16 + esp]
+    vpxor   TMP5, TMP5, T
+    KARATSUBA 6
+
+    vpxor   TMP0, TMP0, TMP1
+    vpxor   TMP0, TMP0, TMP2
+    vpsrldq TMP3, TMP0, 8
+    vpxor   TMP4, TMP1, TMP3
+    vpslldq TMP3, TMP0, 8
+    vpxor   TMP5, TMP2, TMP3
+
+    vpclmulqdq  TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+    vpalignr    TMP5,TMP5,TMP5,8
+    vpxor       TMP5, TMP5, TMP1
+
+    vpclmulqdq  TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+    vpalignr    TMP5,TMP5,TMP5,8
+    vpxor       TMP5, TMP5, TMP1
+
+    vpxor       TMP5, TMP5, TMP4
+    vmovdqu     T, TMP5
+
+    sub aluCTR, 6
+
+LEncDataSingles:
+
+        cmp len, 16
+        jb  LEncDataTail
+        sub len, 16
+
+        vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp]
+        NEXTCTR 0
+
+        vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+        cmp NR, 10
+        je  @f
+        vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+        cmp NR, 12
+        je  @f
+        vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+        vaesenclast TMP1, TMP1, TMP2
+        vpxor   TMP1, TMP1, XMMWORD PTR[PT]
+        vmovdqu XMMWORD PTR[CT], TMP1
+
+        lea PT, [16+PT]
+        lea CT, [16+CT]
+
+        vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+        vpxor   TMP1, TMP1, T
+
+        vmovdqu TMP0, XMMWORD PTR[Htbl]
+        GFMUL   TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+        vmovdqu T, TMP1
+
+        jmp LEncDataSingles
+
+LEncDataTail:
+
+    cmp len, 0
+    je  LEncDataEnd
+
+    vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp]
+
+    vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+    cmp NR, 10
+    je  @f
+    vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+    cmp NR, 12
+    je  @f
+    vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+    vaesenclast TMP1, TMP1, TMP2
+; zero a temp location
+    vpxor   TMP2, TMP2, TMP2
+    vmovdqa XMMWORD PTR[esp], TMP2
+; copy as many bytes as needed
+    xor KS, KS
+    mov aluTMP, edx
+@@:
+        cmp len, KS
+        je  @f
+        mov dl, BYTE PTR[PT + KS]
+        mov BYTE PTR[esp + KS], dl
+        inc KS
+        jmp @b
+@@:
+    vpxor   TMP1, TMP1, XMMWORD PTR[esp]
+    vmovdqa XMMWORD PTR[esp], TMP1
+    xor KS, KS
+@@:
+        cmp len, KS
+        je  @f
+        mov dl, BYTE PTR[esp + KS]
+        mov BYTE PTR[CT + KS], dl
+        inc KS
+        jmp @b
+@@:
+        cmp KS, 16
+        je  @f
+        mov BYTE PTR[esp + KS], 0
+        inc KS
+        jmp @b
+@@:
+    mov edx, aluTMP
+    vmovdqa TMP1, XMMWORD PTR[esp]
+    vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+    vpxor   TMP1, TMP1, T
+
+    vmovdqu TMP0, XMMWORD PTR[Htbl]
+    GFMUL   TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+    vmovdqu T, TMP1
+
+LEncDataEnd:
+    inc     aluCTR
+    bswap   aluCTR
+    mov     [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+    mov esp, ebp
+    pop edi
+    pop esi
+    pop ebx
+    pop ebp
+
+
+    vzeroupper
+
+    ret
+intel_aes_gcmENC ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Decrypt and Authenticate
+; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+NEXTCTR MACRO i
+    add     aluCTR, 1
+    mov     aluTMP, aluCTR
+    bswap   aluTMP
+    xor     aluTMP, [3*4 + KS]
+    mov     [3*4 + i*16 + esp], aluTMP
+ENDM
+
+intel_aes_gcmDEC PROC
+
+    cmp DWORD PTR[1*4 + 3*4 + esp], 0
+    jne LbeginDEC
+    ret
+
+LbeginDEC:
+
+    vzeroupper
+    push    ebp
+    push    ebx
+    push    esi
+    push    edi
+
+    mov ebp, esp
+    sub esp, 8*16
+    and esp, -16
+
+    mov CT, [ebp + 5*4 + 0*4]
+    mov PT, [ebp + 5*4 + 1*4]
+    mov Gctx, [ebp + 5*4 + 2*4]
+
+    mov     KS, [16*16 + 3*16 + Gctx]
+    lea     KS, [44 + KS]
+
+    mov     aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+    bswap   aluCTR
+
+
+    vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+    vpxor   TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+    vmovdqu XMMWORD PTR[0*16 + esp], TMP0
+
+    cmp len, 16*7
+    jb  LDecDataSingles
+    vmovdqu XMMWORD PTR[1*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[2*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[3*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[4*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[5*16 + esp], TMP0
+    vmovdqu XMMWORD PTR[6*16 + esp], TMP0
+    dec aluCTR
+
+LDecData7:
+    cmp len, 16*7
+    jb  LDecData7End
+    sub len, 16*7
+
+    vmovdqu TMP5, XMMWORD PTR[0*16 + CT]
+    vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+    vpxor   TMP5, TMP5, T
+    vpshufd TMP4, TMP5, 78
+    vpxor   TMP4, TMP4, TMP5
+    vpclmulqdq  TMP0, TMP4, XMMWORD PTR[6*16 + 8*16 + Htbl], 000h
+    vmovdqu     TMP4, XMMWORD PTR[6*16 + Htbl]
+    vpclmulqdq  TMP1, TMP5, TMP4, 011h
+    vpclmulqdq  TMP2, TMP5, TMP4, 000h
+
+    NEXTCTR 0
+    vmovdqu TMP5, XMMWORD PTR[1*16 + CT]
+    vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+    KARATSUBA 5
+    NEXTCTR 1
+    vmovdqu TMP5, XMMWORD PTR[2*16 + CT]
+    vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+    KARATSUBA 4
+    NEXTCTR 2
+    vmovdqu TMP5, XMMWORD PTR[3*16 + CT]
+    vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+    KARATSUBA 3
+    NEXTCTR 3
+    vmovdqu TMP5, XMMWORD PTR[4*16 + CT]
+    vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+    KARATSUBA 2
+    NEXTCTR 4
+    vmovdqu TMP5, XMMWORD PTR[5*16 + CT]
+    vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+    KARATSUBA 1
+    NEXTCTR 5
+    vmovdqu TMP5, XMMWORD PTR[6*16 + CT]
+    vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+    KARATSUBA 0
+    NEXTCTR 6
+
+    vpxor   TMP0, TMP0, TMP1
+    vpxor   TMP0, TMP0, TMP2
+    vpsrldq TMP3, TMP0, 8
+    vpxor   TMP4, TMP1, TMP3
+    vpslldq TMP3, TMP0, 8
+    vpxor   TMP5, TMP2, TMP3
+
+    vpclmulqdq  TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+    vpalignr    TMP5,TMP5,TMP5,8
+    vpxor       TMP5, TMP5, TMP1
+
+    vpclmulqdq  TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+    vpalignr    TMP5,TMP5,TMP5,8
+    vpxor       TMP5, TMP5, TMP1
+
+    vpxor       TMP5, TMP5, TMP4
+    vmovdqu     T, TMP5
+
+    vmovdqa CTR0, XMMWORD PTR[0*16 + esp]
+    vmovdqa CTR1, XMMWORD PTR[1*16 + esp]
+    vmovdqa CTR2, XMMWORD PTR[2*16 + esp]
+    vmovdqa CTR3, XMMWORD PTR[3*16 + esp]
+    vmovdqa CTR4, XMMWORD PTR[4*16 + esp]
+    vmovdqa CTR5, XMMWORD PTR[5*16 + esp]
+    vmovdqa CTR6, XMMWORD PTR[6*16 + esp]
+
+    ROUND   1
+    ROUND   2
+    ROUND   3
+    ROUND   4
+    ROUND   5
+    ROUND   6
+    ROUND   7
+    ROUND   8
+    ROUND   9
+    vmovdqu xmm7, XMMWORD PTR[10*16 + KS]
+    cmp     NR, 10
+    je      @f
+
+    ROUND   10
+    ROUND   11
+    vmovdqu xmm7, XMMWORD PTR[12*16 + KS]
+    cmp     NR, 12
+    je      @f
+
+    ROUND   12
+    ROUND   13
+    vmovdqu xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+    vaesenclast CTR0, CTR0, xmm7
+    vaesenclast CTR1, CTR1, xmm7
+    vaesenclast CTR2, CTR2, xmm7
+    vaesenclast CTR3, CTR3, xmm7
+    vaesenclast CTR4, CTR4, xmm7
+    vaesenclast CTR5, CTR5, xmm7
+    vaesenclast CTR6, CTR6, xmm7
+
+    vpxor   CTR0, CTR0, XMMWORD PTR[0*16 + CT]
+    vpxor   CTR1, CTR1, XMMWORD PTR[1*16 + CT]
+    vpxor   CTR2, CTR2, XMMWORD PTR[2*16 + CT]
+    vpxor   CTR3, CTR3, XMMWORD PTR[3*16 + CT]
+    vpxor   CTR4, CTR4, XMMWORD PTR[4*16 + CT]
+    vpxor   CTR5, CTR5, XMMWORD PTR[5*16 + CT]
+    vpxor   CTR6, CTR6, XMMWORD PTR[6*16 + CT]
+
+    vmovdqu XMMWORD PTR[0*16 + PT], CTR0
+    vmovdqu XMMWORD PTR[1*16 + PT], CTR1
+    vmovdqu XMMWORD PTR[2*16 + PT], CTR2
+    vmovdqu XMMWORD PTR[3*16 + PT], CTR3
+    vmovdqu XMMWORD PTR[4*16 + PT], CTR4
+    vmovdqu XMMWORD PTR[5*16 + PT], CTR5
+    vmovdqu XMMWORD PTR[6*16 + PT], CTR6
+
+    lea CT, [7*16 + CT]
+    lea PT, [7*16 + PT]
+    jmp LDecData7
+
+LDecData7End:
+
+    NEXTCTR 0
+
+LDecDataSingles:
+
+        cmp len, 16
+        jb  LDecDataTail
+        sub len, 16
+
+        vmovdqu TMP1, XMMWORD PTR[CT]
+        vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+        vpxor   TMP1, TMP1, T
+
+        vmovdqu TMP0, XMMWORD PTR[Htbl]
+        GFMUL   TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+        vmovdqu T, TMP1
+
+        vmovdqa TMP1, XMMWORD PTR[0*16 + esp]
+        NEXTCTR 0
+
+        vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+        cmp NR, 10
+        je  @f
+        vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+        cmp NR, 12
+        je  @f
+        vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+        vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+        vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+        vaesenclast TMP1, TMP1, TMP2
+        vpxor   TMP1, TMP1, XMMWORD PTR[CT]
+        vmovdqu XMMWORD PTR[PT], TMP1
+
+        lea PT, [16+PT]
+        lea CT, [16+CT]
+        jmp LDecDataSingles
+
+LDecDataTail:
+
+    cmp len, 0
+    je  LDecDataEnd
+
+    vmovdqa TMP1, XMMWORD PTR[0*16 + esp]
+    inc aluCTR
+    vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+    cmp NR, 10
+    je  @f
+    vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+    cmp NR, 12
+    je  @f
+    vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+    vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+    vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+    vaesenclast xmm7, TMP1, TMP2
+
+; copy as many bytes as needed
+    xor KS, KS
+    mov aluTMP, edx
+@@:
+        cmp len, KS
+        je  @f
+        mov dl, BYTE PTR[CT + KS]
+        mov BYTE PTR[esp + KS], dl
+        inc KS
+        jmp @b
+@@:
+        cmp KS, 16
+        je  @f
+        mov BYTE PTR[esp + KS], 0
+        inc KS
+        jmp @b
+@@:
+    mov edx, aluTMP
+    vmovdqa TMP1, XMMWORD PTR[esp]
+    vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+    vpxor   TMP1, TMP1, T
+
+    vmovdqu TMP0, XMMWORD PTR[Htbl]
+    GFMUL   TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+    vmovdqu T, TMP1
+
+    vpxor   xmm7, xmm7, XMMWORD PTR[esp]
+    vmovdqa XMMWORD PTR[esp], xmm7
+    xor     KS, KS
+    mov aluTMP, edx
+@@:
+        cmp len, KS
+        je  @f
+        mov dl, BYTE PTR[esp + KS]
+        mov BYTE PTR[PT + KS], dl
+        inc KS
+        jmp @b
+@@:
+    mov edx, aluTMP
+
+LDecDataEnd:
+
+    bswap   aluCTR
+    mov     [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+    mov esp, ebp
+    pop edi
+    pop esi
+    pop ebx
+    pop ebp
+
+    vzeroupper
+
+    ret
+intel_aes_gcmDEC ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-gcm.h b/security/nss/lib/freebl/intel-gcm.h
new file mode 100644
index 000000000..566e544d8
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm.h
@@ -0,0 +1,83 @@
+/******************************************************************************/
+/* LICENSE:                                                                   */
+/* This submission to NSS is to be made available under the terms of the      */
+/* Mozilla Public License, v. 2.0. You can obtain one at http:                */
+/* //mozilla.org/MPL/2.0/.                                                    */
+/******************************************************************************/
+/* Copyright(c) 2013, Intel Corp.                                             */
+/******************************************************************************/
+/* Reference:                                                                 */
+/* [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication   */
+/*     Instruction and its Usage for Computing the GCM Mode (Rev. 2.01)       */
+/*     http://software.intel.com/sites/default/files/article/165685/clmul-wp-r*/
+/*ev-2.01-2012-09-21.pdf                                                      */
+/* [2] S. Gueron, M. E. Kounavis: Efficient Implementation of the Galois      */
+/*     Counter Mode Using a Carry-less Multiplier and a Fast Reduction        */
+/*     Algorithm. Information Processing Letters 110: 549-553 (2010).         */
+/* [3] S. Gueron: AES Performance on the 2nd Generation Intel(R) Core(TM)     */
+/*     Processor Family (to be posted) (2012).                                */
+/* [4] S. Gueron: Fast GHASH computations for speeding up AES-GCM (to be      */
+/*     published) (2012).                                                     */
+
+#ifndef INTEL_GCM_H
+#define INTEL_GCM_H 1
+
+#include "blapii.h"
+
+typedef struct intel_AES_GCMContextStr intel_AES_GCMContext;
+
+intel_AES_GCMContext *intel_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher,
+                                                  const unsigned char *params, unsigned int blocksize);
+
+void intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit);
+
+SECStatus intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf,
+                                      unsigned int *outlen, unsigned int maxout,
+                                      const unsigned char *inbuf, unsigned int inlen,
+                                      unsigned int blocksize);
+
+SECStatus intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf,
+                                      unsigned int *outlen, unsigned int maxout,
+                                      const unsigned char *inbuf, unsigned int inlen,
+                                      unsigned int blocksize);
+
+/* Prototypes of functions in the assembler file for fast AES-GCM, using
+   Intel AES-NI and CLMUL-NI, as described in [1]
+   [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication
+       Instruction and its Usage for Computing the GCM Mode                */
+
+/* Prepares the constants used in the aggregated reduction method */
+void intel_aes_gcmINIT(unsigned char Htbl[16 * 16],
+                       unsigned char *KS,
+                       int NR);
+
+/* Produces the final GHASH value */
+void intel_aes_gcmTAG(unsigned char Htbl[16 * 16],
+                      unsigned char *Tp,
+                      unsigned long Mlen,
+                      unsigned long Alen,
+                      unsigned char *X0,
+                      unsigned char *TAG);
+
+/* Hashes the Additional Authenticated Data, should be used before enc/dec.
+   Operates on whole blocks only. Partial blocks should be padded externally. */
+void intel_aes_gcmAAD(unsigned char Htbl[16 * 16],
+                      unsigned char *AAD,
+                      unsigned long Alen,
+                      unsigned char *Tp);
+
+/* Encrypts and hashes the Plaintext.
+   Operates on any length of data, however partial block should only be encrypted
+   at the last call, otherwise the result will be incorrect. */
+void intel_aes_gcmENC(const unsigned char *PT,
+                      unsigned char *CT,
+                      void *Gctx,
+                      unsigned long len);
+
+/* Similar to ENC, but decrypts the Ciphertext. */
+void intel_aes_gcmDEC(const unsigned char *CT,
+                      unsigned char *PT,
+                      void *Gctx,
+                      unsigned long len);
+
+#endif
diff --git a/security/nss/lib/freebl/intel-gcm.s b/security/nss/lib/freebl/intel-gcm.s
new file mode 100644
index 000000000..1a3106091
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm.s
@@ -0,0 +1,1340 @@
+# LICENSE:                                                                  
+# This submission to NSS is to be made available under the terms of the
+# Mozilla Public License, v. 2.0. You can obtain one at http:         
+# //mozilla.org/MPL/2.0/. 
+################################################################################
+# Copyright(c) 2012, Intel Corp.
+
+.align  16
+.Lone:
+.quad 1,0
+.Ltwo:
+.quad 2,0
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lshuff_mask:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.Lpoly:
+.quad 0x1, 0xc200000000000000 
+
+
+################################################################################
+# Generates the final GCM tag
+# void intel_aes_gcmTAG(uint8_t Htbl[16*16], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG);
+.type intel_aes_gcmTAG,@function
+.globl intel_aes_gcmTAG
+.align 16
+intel_aes_gcmTAG:
+
+.set  Htbl, %rdi
+.set  Tp, %rsi
+.set  Mlen, %rdx
+.set  Alen, %rcx
+.set  X0, %r8
+.set  TAG, %r9
+
+.set T,%xmm0
+.set TMP0,%xmm1
+
+   vmovdqu  (Tp), T
+   vpshufb  .Lbswap_mask(%rip), T, T
+   vpxor    TMP0, TMP0, TMP0
+   shl      $3, Mlen
+   shl      $3, Alen
+   vpinsrq  $0, Mlen, TMP0, TMP0
+   vpinsrq  $1, Alen, TMP0, TMP0
+   vpxor    TMP0, T, T
+   vmovdqu  (Htbl), TMP0
+   call     GFMUL
+   vpshufb  .Lbswap_mask(%rip), T, T
+   vpxor    (X0), T, T
+   vmovdqu  T, (TAG)
+   
+ret
+.size intel_aes_gcmTAG, .-intel_aes_gcmTAG
+################################################################################
+# Generates the H table
+# void intel_aes_gcmINIT(uint8_t Htbl[16*16], uint8_t *KS, int NR);
+.type intel_aes_gcmINIT,@function
+.globl intel_aes_gcmINIT
+.align 16
+intel_aes_gcmINIT:
+   
+.set  Htbl, %rdi
+.set  KS, %rsi
+.set  NR, %edx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+
+CALCULATE_POWERS_OF_H:
+    vmovdqu      16*0(KS), T
+    vaesenc      16*1(KS), T, T
+    vaesenc      16*2(KS), T, T
+    vaesenc      16*3(KS), T, T
+    vaesenc      16*4(KS), T, T
+    vaesenc      16*5(KS), T, T
+    vaesenc      16*6(KS), T, T
+    vaesenc      16*7(KS), T, T
+    vaesenc      16*8(KS), T, T
+    vaesenc      16*9(KS), T, T
+    vmovdqu      16*10(KS), TMP0
+    cmp          $10, NR
+    je           .LH0done
+    vaesenc      16*10(KS), T, T
+    vaesenc      16*11(KS), T, T
+    vmovdqu      16*12(KS), TMP0
+    cmp          $12, NR
+    je           .LH0done
+    vaesenc      16*12(KS), T, T
+    vaesenc      16*13(KS), T, T
+    vmovdqu      16*14(KS), TMP0
+  
+.LH0done:
+    vaesenclast  TMP0, T, T
+
+    vpshufb      .Lbswap_mask(%rip), T, T  
+
+    vmovdqu	T, TMP0
+    # Calculate H` = GFMUL(H, 2)
+    vpsrld	$7 , T , %xmm3
+    vmovdqu	.Lshuff_mask(%rip), %xmm4
+    vpshufb	%xmm4, %xmm3 , %xmm3
+    movq	$0xff00 , %rax
+    vmovq	%rax, %xmm4
+    vpshufb	%xmm3, %xmm4 , %xmm4
+    vmovdqu	.Lpoly(%rip), %xmm5
+    vpand	%xmm4, %xmm5, %xmm5
+    vpsrld	$31, T, %xmm3
+    vpslld	$1, T, %xmm4
+    vpslldq	$4, %xmm3, %xmm3
+    vpxor	%xmm3, %xmm4, T  #xmm1 holds now p(x)<<1
+
+    #adding p(x)<<1 to xmm5
+    vpxor     %xmm5, T , T
+    vmovdqu   T, TMP0
+    vmovdqu   T, (Htbl)     # H * 2
+    call  GFMUL
+    vmovdqu  T, 16(Htbl)    # H^2 * 2
+    call  GFMUL
+    vmovdqu  T, 32(Htbl)    # H^3 * 2
+    call  GFMUL
+    vmovdqu  T, 48(Htbl)    # H^4 * 2
+    call  GFMUL
+    vmovdqu  T, 64(Htbl)    # H^5 * 2
+    call  GFMUL
+    vmovdqu  T, 80(Htbl)    # H^6 * 2
+    call  GFMUL
+    vmovdqu  T, 96(Htbl)    # H^7 * 2
+    call  GFMUL
+    vmovdqu  T, 112(Htbl)   # H^8 * 2  
+
+    # Precalculations for the reduce 4 step
+    vpshufd  $78, (Htbl), %xmm8
+    vpshufd  $78, 16(Htbl), %xmm9
+    vpshufd  $78, 32(Htbl), %xmm10
+    vpshufd  $78, 48(Htbl), %xmm11
+    vpshufd  $78, 64(Htbl), %xmm12
+    vpshufd  $78, 80(Htbl), %xmm13
+    vpshufd  $78, 96(Htbl), %xmm14
+    vpshufd  $78, 112(Htbl), %xmm15
+
+    vpxor  (Htbl), %xmm8, %xmm8
+    vpxor  16(Htbl), %xmm9, %xmm9
+    vpxor  32(Htbl), %xmm10, %xmm10
+    vpxor  48(Htbl), %xmm11, %xmm11
+    vpxor  64(Htbl), %xmm12, %xmm12
+    vpxor  80(Htbl), %xmm13, %xmm13
+    vpxor  96(Htbl), %xmm14, %xmm14
+    vpxor  112(Htbl), %xmm15, %xmm15
+
+    vmovdqu   %xmm8, 128(Htbl)
+    vmovdqu   %xmm9, 144(Htbl)
+    vmovdqu   %xmm10, 160(Htbl)
+    vmovdqu   %xmm11, 176(Htbl)
+    vmovdqu   %xmm12, 192(Htbl)
+    vmovdqu   %xmm13, 208(Htbl)
+    vmovdqu   %xmm14, 224(Htbl)
+    vmovdqu   %xmm15, 240(Htbl)
+
+    ret
+.size intel_aes_gcmINIT, .-intel_aes_gcmINIT
+################################################################################
+# Authenticate only
+# void intel_aes_gcmAAD(uint8_t Htbl[16*16], uint8_t *AAD, uint64_t Alen, uint8_t *Tp);
+
+.globl  intel_aes_gcmAAD
+.type   intel_aes_gcmAAD,@function
+.align  16
+intel_aes_gcmAAD:
+
+.set DATA, %xmm0
+.set T, %xmm1
+.set BSWAP_MASK, %xmm2
+.set TMP0, %xmm3
+.set TMP1, %xmm4
+.set TMP2, %xmm5
+.set TMP3, %xmm6
+.set TMP4, %xmm7
+.set Xhi, %xmm9
+
+.set Htbl, %rdi
+.set inp, %rsi
+.set len, %rdx
+.set Tp, %rcx
+
+.set hlp0, %r11
+
+.macro KARATSUBA_AAD i
+    vpclmulqdq  $0x00, 16*\i(Htbl), DATA, TMP3
+    vpxor       TMP3, TMP0, TMP0
+    vpclmulqdq  $0x11, 16*\i(Htbl), DATA, TMP3
+    vpxor       TMP3, TMP1, TMP1
+    vpshufd     $78,  DATA, TMP3
+    vpxor       DATA, TMP3, TMP3
+    vpclmulqdq  $0x00, 16*(\i+8)(Htbl), TMP3, TMP3
+    vpxor       TMP3, TMP2, TMP2
+.endm
+
+    test  len, len
+    jnz   .LbeginAAD
+    ret
+
+.LbeginAAD:
+
+   push  hlp0
+   vzeroupper
+   
+   vmovdqa  .Lbswap_mask(%rip), BSWAP_MASK
+   
+   vpxor    Xhi, Xhi, Xhi
+   
+   vmovdqu  (Tp),T
+   vpshufb  BSWAP_MASK,T,T
+
+   # we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+    mov     len, hlp0
+    and	    $~-128, hlp0
+
+    jz      .Lmod_loop
+
+    sub     hlp0, len
+    sub     $16, hlp0
+
+   #hash first prefix block
+	vmovdqu (inp), DATA
+	vpshufb  BSWAP_MASK, DATA, DATA
+	vpxor    T, DATA, DATA
+	
+	vpclmulqdq  $0x00, (Htbl, hlp0), DATA, TMP0
+	vpclmulqdq  $0x11, (Htbl, hlp0), DATA, TMP1
+	vpshufd     $78, DATA, TMP2
+	vpxor       DATA, TMP2, TMP2
+	vpclmulqdq  $0x00, 16*8(Htbl, hlp0), TMP2, TMP2
+	
+	lea	    16(inp), inp
+	test    hlp0, hlp0
+	jnz	    .Lpre_loop
+	jmp	    .Lred1
+
+    #hash remaining prefix bocks (up to 7 total prefix blocks)
+.align 64
+.Lpre_loop:
+
+    sub	$16, hlp0
+
+    vmovdqu     (inp),DATA           # next data block
+    vpshufb     BSWAP_MASK,DATA,DATA
+
+    vpclmulqdq  $0x00, (Htbl,hlp0), DATA, TMP3
+    vpxor       TMP3, TMP0, TMP0
+    vpclmulqdq  $0x11, (Htbl,hlp0), DATA, TMP3
+    vpxor       TMP3, TMP1, TMP1
+    vpshufd	    $78, DATA, TMP3
+    vpxor       DATA, TMP3, TMP3
+    vpclmulqdq  $0x00, 16*8(Htbl,hlp0), TMP3, TMP3
+    vpxor       TMP3, TMP2, TMP2
+
+    test	hlp0, hlp0
+
+    lea	16(inp), inp
+
+    jnz	.Lpre_loop
+	
+.Lred1:
+    vpxor       TMP0, TMP2, TMP2
+    vpxor       TMP1, TMP2, TMP2
+    vpsrldq     $8, TMP2, TMP3
+    vpslldq     $8, TMP2, TMP2
+
+    vpxor       TMP3, TMP1, Xhi
+    vpxor       TMP2, TMP0, T
+	
+.align 64
+.Lmod_loop:
+    sub	$0x80, len
+    jb	.Ldone
+
+    vmovdqu     16*7(inp),DATA		# Ii
+    vpshufb     BSWAP_MASK,DATA,DATA
+
+    vpclmulqdq  $0x00, (Htbl), DATA, TMP0
+    vpclmulqdq  $0x11, (Htbl), DATA, TMP1
+    vpshufd     $78, DATA, TMP2
+    vpxor       DATA, TMP2, TMP2
+    vpclmulqdq  $0x00, 16*8(Htbl), TMP2, TMP2
+    #########################################################
+    vmovdqu     16*6(inp),DATA
+    vpshufb     BSWAP_MASK,DATA,DATA
+    KARATSUBA_AAD 1
+    #########################################################
+    vmovdqu     16*5(inp),DATA
+    vpshufb     BSWAP_MASK,DATA,DATA
+
+    vpclmulqdq  $0x10, .Lpoly(%rip), T, TMP4         #reduction stage 1a
+    vpalignr    $8, T, T, T
+
+    KARATSUBA_AAD 2
+
+    vpxor       TMP4, T, T                 #reduction stage 1b
+    #########################################################
+    vmovdqu		16*4(inp),DATA
+    vpshufb	    BSWAP_MASK,DATA,DATA
+
+    KARATSUBA_AAD 3
+    #########################################################
+    vmovdqu     16*3(inp),DATA
+    vpshufb     BSWAP_MASK,DATA,DATA
+
+    vpclmulqdq  $0x10, .Lpoly(%rip), T, TMP4         #reduction stage 2a
+    vpalignr    $8, T, T, T
+
+    KARATSUBA_AAD 4
+
+    vpxor       TMP4, T, T                 #reduction stage 2b
+    #########################################################
+    vmovdqu     16*2(inp),DATA
+    vpshufb     BSWAP_MASK,DATA,DATA
+
+    KARATSUBA_AAD 5
+
+    vpxor       Xhi, T, T                  #reduction finalize
+    #########################################################
+    vmovdqu     16*1(inp),DATA
+    vpshufb     BSWAP_MASK,DATA,DATA
+
+    KARATSUBA_AAD 6
+    #########################################################
+    vmovdqu     16*0(inp),DATA
+    vpshufb     BSWAP_MASK,DATA,DATA
+    vpxor       T,DATA,DATA
+
+    KARATSUBA_AAD 7
+    #########################################################
+    vpxor       TMP0, TMP2, TMP2              # karatsuba fixup
+    vpxor       TMP1, TMP2, TMP2
+    vpsrldq     $8, TMP2, TMP3
+    vpslldq     $8, TMP2, TMP2
+
+    vpxor       TMP3, TMP1, Xhi
+    vpxor       TMP2, TMP0, T
+
+    lea	16*8(inp), inp
+    jmp .Lmod_loop
+    #########################################################
+
+.Ldone:
+    vpclmulqdq  $0x10, .Lpoly(%rip), T, TMP3
+    vpalignr    $8, T, T, T
+    vpxor       TMP3, T, T
+
+    vpclmulqdq  $0x10, .Lpoly(%rip), T, TMP3
+    vpalignr    $8, T, T, T
+    vpxor       TMP3, T, T
+
+    vpxor       Xhi, T, T
+   
+.Lsave:
+    vpshufb     BSWAP_MASK,T, T
+    vmovdqu     T,(Tp)
+    vzeroupper
+
+    pop hlp0
+    ret
+.size   intel_aes_gcmAAD,.-intel_aes_gcmAAD
+
+################################################################################
+# Encrypt and Authenticate
+# void intel_aes_gcmENC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len);
+.type intel_aes_gcmENC,@function
+.globl intel_aes_gcmENC
+.align 16
+intel_aes_gcmENC:
+
+.set PT,%rdi
+.set CT,%rsi
+.set Htbl, %rdx
+.set len, %rcx
+.set KS,%r9
+.set NR,%r10d
+
+.set Gctx, %rdx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+.set TMP1,%xmm2
+.set TMP2,%xmm3
+.set TMP3,%xmm4
+.set TMP4,%xmm5
+.set TMP5,%xmm6
+.set CTR0,%xmm7
+.set CTR1,%xmm8
+.set CTR2,%xmm9
+.set CTR3,%xmm10
+.set CTR4,%xmm11
+.set CTR5,%xmm12
+.set CTR6,%xmm13
+.set CTR7,%xmm14
+.set CTR,%xmm15
+
+.macro ROUND i
+    vmovdqu \i*16(KS), TMP3
+    vaesenc TMP3, CTR0, CTR0
+    vaesenc TMP3, CTR1, CTR1
+    vaesenc TMP3, CTR2, CTR2
+    vaesenc TMP3, CTR3, CTR3
+    vaesenc TMP3, CTR4, CTR4
+    vaesenc TMP3, CTR5, CTR5
+    vaesenc TMP3, CTR6, CTR6
+    vaesenc TMP3, CTR7, CTR7
+.endm
+
+.macro ROUNDMUL i
+
+    vmovdqu \i*16(%rsp), TMP5
+    vmovdqu \i*16(KS), TMP3
+
+    vaesenc TMP3, CTR0, CTR0
+    vaesenc TMP3, CTR1, CTR1
+    vaesenc TMP3, CTR2, CTR2
+    vaesenc TMP3, CTR3, CTR3
+
+    vpshufd $78, TMP5, TMP4
+    vpxor   TMP5, TMP4, TMP4
+
+    vaesenc TMP3, CTR4, CTR4
+    vaesenc TMP3, CTR5, CTR5
+    vaesenc TMP3, CTR6, CTR6
+    vaesenc TMP3, CTR7, CTR7
+
+    vpclmulqdq  $0x00, 128+\i*16(Htbl), TMP4, TMP3
+    vpxor       TMP3, TMP0, TMP0
+    vmovdqa     \i*16(Htbl), TMP4
+    vpclmulqdq  $0x11, TMP4, TMP5, TMP3
+    vpxor       TMP3, TMP1, TMP1
+    vpclmulqdq  $0x00, TMP4, TMP5, TMP3
+    vpxor       TMP3, TMP2, TMP2
+  
+.endm
+
+.macro KARATSUBA i
+    vmovdqu \i*16(%rsp), TMP5
+
+    vpclmulqdq  $0x11, 16*\i(Htbl), TMP5, TMP3
+    vpxor       TMP3, TMP1, TMP1
+    vpclmulqdq  $0x00, 16*\i(Htbl), TMP5, TMP3
+    vpxor       TMP3, TMP2, TMP2
+    vpshufd     $78, TMP5, TMP3
+    vpxor       TMP5, TMP3, TMP5
+    vpclmulqdq  $0x00, 128+\i*16(Htbl), TMP5, TMP3
+    vpxor       TMP3, TMP0, TMP0
+.endm
+
+    test len, len
+    jnz  .Lbegin
+    ret
+   
+.Lbegin:
+
+    vzeroupper
+    push %rbp
+    push %rbx
+
+    movq %rsp, %rbp   
+    sub  $128, %rsp
+    andq $-16, %rsp
+
+    vmovdqu  288(Gctx), CTR
+    vmovdqu  272(Gctx), T
+    mov  304(Gctx), KS
+    mov  4(KS), NR
+    lea  48(KS), KS
+
+    vpshufb  .Lbswap_mask(%rip), CTR, CTR
+    vpshufb  .Lbswap_mask(%rip), T, T
+
+    cmp  $128, len
+    jb   .LDataSingles
+   
+# Encrypt the first eight blocks
+    sub     $128, len
+    vmovdqa CTR, CTR0
+    vpaddd  .Lone(%rip), CTR0, CTR1
+    vpaddd  .Ltwo(%rip), CTR0, CTR2
+    vpaddd  .Lone(%rip), CTR2, CTR3
+    vpaddd  .Ltwo(%rip), CTR2, CTR4
+    vpaddd  .Lone(%rip), CTR4, CTR5
+    vpaddd  .Ltwo(%rip), CTR4, CTR6
+    vpaddd  .Lone(%rip), CTR6, CTR7
+    vpaddd  .Ltwo(%rip), CTR6, CTR
+
+    vpshufb .Lbswap_mask(%rip), CTR0, CTR0
+    vpshufb .Lbswap_mask(%rip), CTR1, CTR1
+    vpshufb .Lbswap_mask(%rip), CTR2, CTR2
+    vpshufb .Lbswap_mask(%rip), CTR3, CTR3
+    vpshufb .Lbswap_mask(%rip), CTR4, CTR4
+    vpshufb .Lbswap_mask(%rip), CTR5, CTR5
+    vpshufb .Lbswap_mask(%rip), CTR6, CTR6
+    vpshufb .Lbswap_mask(%rip), CTR7, CTR7
+
+    vpxor   (KS), CTR0, CTR0
+    vpxor   (KS), CTR1, CTR1
+    vpxor   (KS), CTR2, CTR2
+    vpxor   (KS), CTR3, CTR3
+    vpxor   (KS), CTR4, CTR4
+    vpxor   (KS), CTR5, CTR5
+    vpxor   (KS), CTR6, CTR6
+    vpxor   (KS), CTR7, CTR7
+
+    ROUND 1
+    ROUND 2
+    ROUND 3
+    ROUND 4
+    ROUND 5
+    ROUND 6
+    ROUND 7
+    ROUND 8
+    ROUND 9
+
+    vmovdqu 160(KS), TMP5
+    cmp $12, NR
+    jb  .LLast1
+
+    ROUND 10
+    ROUND 11
+
+    vmovdqu 192(KS), TMP5
+    cmp $14, NR
+    jb  .LLast1
+
+    ROUND 12
+    ROUND 13
+
+    vmovdqu 224(KS), TMP5
+  
+.LLast1:
+
+    vpxor       (PT), TMP5, TMP3
+    vaesenclast TMP3, CTR0, CTR0
+    vpxor       16(PT), TMP5, TMP3
+    vaesenclast TMP3, CTR1, CTR1
+    vpxor       32(PT), TMP5, TMP3
+    vaesenclast TMP3, CTR2, CTR2
+    vpxor       48(PT), TMP5, TMP3
+    vaesenclast TMP3, CTR3, CTR3
+    vpxor       64(PT), TMP5, TMP3
+    vaesenclast TMP3, CTR4, CTR4
+    vpxor       80(PT), TMP5, TMP3
+    vaesenclast TMP3, CTR5, CTR5
+    vpxor       96(PT), TMP5, TMP3
+    vaesenclast TMP3, CTR6, CTR6
+    vpxor       112(PT), TMP5, TMP3
+    vaesenclast TMP3, CTR7, CTR7
+    
+    vmovdqu     .Lbswap_mask(%rip), TMP3
+   
+    vmovdqu CTR0, (CT)
+    vpshufb TMP3, CTR0, CTR0
+    vmovdqu CTR1, 16(CT)
+    vpshufb TMP3, CTR1, CTR1
+    vmovdqu CTR2, 32(CT)
+    vpshufb TMP3, CTR2, CTR2
+    vmovdqu CTR3, 48(CT)
+    vpshufb TMP3, CTR3, CTR3
+    vmovdqu CTR4, 64(CT)
+    vpshufb TMP3, CTR4, CTR4
+    vmovdqu CTR5, 80(CT)
+    vpshufb TMP3, CTR5, CTR5
+    vmovdqu CTR6, 96(CT)
+    vpshufb TMP3, CTR6, CTR6
+    vmovdqu CTR7, 112(CT)
+    vpshufb TMP3, CTR7, CTR7
+
+    lea 128(CT), CT
+    lea 128(PT), PT
+    jmp .LDataOctets
+
+# Encrypt 8 blocks each time while hashing previous 8 blocks
+.align 64
+.LDataOctets:
+        cmp $128, len
+        jb  .LEndOctets
+        sub $128, len
+
+        vmovdqa CTR7, TMP5
+        vmovdqa CTR6, 1*16(%rsp)
+        vmovdqa CTR5, 2*16(%rsp)
+        vmovdqa CTR4, 3*16(%rsp)
+        vmovdqa CTR3, 4*16(%rsp)
+        vmovdqa CTR2, 5*16(%rsp)
+        vmovdqa CTR1, 6*16(%rsp)
+        vmovdqa CTR0, 7*16(%rsp)
+
+        vmovdqa CTR, CTR0
+        vpaddd  .Lone(%rip), CTR0, CTR1
+        vpaddd  .Ltwo(%rip), CTR0, CTR2
+        vpaddd  .Lone(%rip), CTR2, CTR3
+        vpaddd  .Ltwo(%rip), CTR2, CTR4
+        vpaddd  .Lone(%rip), CTR4, CTR5
+        vpaddd  .Ltwo(%rip), CTR4, CTR6
+        vpaddd  .Lone(%rip), CTR6, CTR7
+        vpaddd  .Ltwo(%rip), CTR6, CTR
+
+        vmovdqu (KS), TMP4
+        vpshufb TMP3, CTR0, CTR0
+        vpxor   TMP4, CTR0, CTR0
+        vpshufb TMP3, CTR1, CTR1
+        vpxor   TMP4, CTR1, CTR1
+        vpshufb TMP3, CTR2, CTR2
+        vpxor   TMP4, CTR2, CTR2
+        vpshufb TMP3, CTR3, CTR3
+        vpxor   TMP4, CTR3, CTR3
+        vpshufb TMP3, CTR4, CTR4
+        vpxor   TMP4, CTR4, CTR4
+        vpshufb TMP3, CTR5, CTR5
+        vpxor   TMP4, CTR5, CTR5
+        vpshufb TMP3, CTR6, CTR6
+        vpxor   TMP4, CTR6, CTR6
+        vpshufb TMP3, CTR7, CTR7
+        vpxor   TMP4, CTR7, CTR7
+
+        vmovdqu     16*0(Htbl), TMP3
+        vpclmulqdq  $0x11, TMP3, TMP5, TMP1
+        vpclmulqdq  $0x00, TMP3, TMP5, TMP2      
+        vpshufd     $78, TMP5, TMP3
+        vpxor       TMP5, TMP3, TMP5
+        vmovdqu     128+0*16(Htbl), TMP3      
+        vpclmulqdq  $0x00, TMP3, TMP5, TMP0
+
+        ROUNDMUL 1
+
+        ROUNDMUL 2
+
+        ROUNDMUL 3
+
+        ROUNDMUL 4
+
+        ROUNDMUL 5
+
+        ROUNDMUL 6
+
+        vpxor   7*16(%rsp), T, TMP5
+        vmovdqu 7*16(KS), TMP3
+
+        vaesenc TMP3, CTR0, CTR0
+        vaesenc TMP3, CTR1, CTR1
+        vaesenc TMP3, CTR2, CTR2
+        vaesenc TMP3, CTR3, CTR3
+
+        vpshufd $78, TMP5, TMP4
+        vpxor   TMP5, TMP4, TMP4
+
+        vaesenc TMP3, CTR4, CTR4
+        vaesenc TMP3, CTR5, CTR5
+        vaesenc TMP3, CTR6, CTR6
+        vaesenc TMP3, CTR7, CTR7
+
+        vpclmulqdq  $0x11, 7*16(Htbl), TMP5, TMP3
+        vpxor       TMP3, TMP1, TMP1
+        vpclmulqdq  $0x00, 7*16(Htbl), TMP5, TMP3
+        vpxor       TMP3, TMP2, TMP2
+        vpclmulqdq  $0x00, 128+7*16(Htbl), TMP4, TMP3
+        vpxor       TMP3, TMP0, TMP0
+
+        ROUND 8    
+        vmovdqa .Lpoly(%rip), TMP5
+
+        vpxor   TMP1, TMP0, TMP0
+        vpxor   TMP2, TMP0, TMP0
+        vpsrldq $8, TMP0, TMP3
+        vpxor   TMP3, TMP1, TMP4
+        vpslldq $8, TMP0, TMP3
+        vpxor   TMP3, TMP2, T
+
+        vpclmulqdq  $0x10, TMP5, T, TMP1
+        vpalignr    $8, T, T, T
+        vpxor       T, TMP1, T
+
+        ROUND 9
+
+        vpclmulqdq  $0x10, TMP5, T, TMP1
+        vpalignr    $8, T, T, T
+        vpxor       T, TMP1, T
+
+        vmovdqu 160(KS), TMP5
+        cmp     $10, NR
+        jbe     .LLast2
+
+        ROUND 10
+        ROUND 11
+
+        vmovdqu 192(KS), TMP5
+        cmp     $12, NR
+        jbe     .LLast2
+
+        ROUND 12
+        ROUND 13
+
+        vmovdqu 224(KS), TMP5
+
+.LLast2:
+      
+        vpxor       (PT), TMP5, TMP3
+        vaesenclast TMP3, CTR0, CTR0
+        vpxor       16(PT), TMP5, TMP3
+        vaesenclast TMP3, CTR1, CTR1
+        vpxor       32(PT), TMP5, TMP3
+        vaesenclast TMP3, CTR2, CTR2
+        vpxor       48(PT), TMP5, TMP3
+        vaesenclast TMP3, CTR3, CTR3
+        vpxor       64(PT), TMP5, TMP3
+        vaesenclast TMP3, CTR4, CTR4
+        vpxor       80(PT), TMP5, TMP3
+        vaesenclast TMP3, CTR5, CTR5
+        vpxor       96(PT), TMP5, TMP3
+        vaesenclast TMP3, CTR6, CTR6
+        vpxor       112(PT), TMP5, TMP3
+        vaesenclast TMP3, CTR7, CTR7
+
+        vmovdqu .Lbswap_mask(%rip), TMP3
+
+        vmovdqu CTR0, (CT)
+        vpshufb TMP3, CTR0, CTR0
+        vmovdqu CTR1, 16(CT)
+        vpshufb TMP3, CTR1, CTR1
+        vmovdqu CTR2, 32(CT)
+        vpshufb TMP3, CTR2, CTR2
+        vmovdqu CTR3, 48(CT)
+        vpshufb TMP3, CTR3, CTR3
+        vmovdqu CTR4, 64(CT)
+        vpshufb TMP3, CTR4, CTR4
+        vmovdqu CTR5, 80(CT)
+        vpshufb TMP3, CTR5, CTR5
+        vmovdqu CTR6, 96(CT)
+        vpshufb TMP3, CTR6, CTR6
+        vmovdqu CTR7,112(CT)
+        vpshufb TMP3, CTR7, CTR7
+
+        vpxor   TMP4, T, T
+
+        lea 128(CT), CT
+        lea 128(PT), PT
+    jmp  .LDataOctets
+
+.LEndOctets:
+    
+    vmovdqa CTR7, TMP5
+    vmovdqa CTR6, 1*16(%rsp)
+    vmovdqa CTR5, 2*16(%rsp)
+    vmovdqa CTR4, 3*16(%rsp)
+    vmovdqa CTR3, 4*16(%rsp)
+    vmovdqa CTR2, 5*16(%rsp)
+    vmovdqa CTR1, 6*16(%rsp)
+    vmovdqa CTR0, 7*16(%rsp)
+
+    vmovdqu     16*0(Htbl), TMP3
+    vpclmulqdq  $0x11, TMP3, TMP5, TMP1
+    vpclmulqdq  $0x00, TMP3, TMP5, TMP2      
+    vpshufd     $78, TMP5, TMP3
+    vpxor       TMP5, TMP3, TMP5
+    vmovdqu     128+0*16(Htbl), TMP3      
+    vpclmulqdq  $0x00, TMP3, TMP5, TMP0
+
+    KARATSUBA 1
+    KARATSUBA 2
+    KARATSUBA 3      
+    KARATSUBA 4
+    KARATSUBA 5
+    KARATSUBA 6
+
+    vmovdqu     7*16(%rsp), TMP5
+    vpxor       T, TMP5, TMP5
+    vmovdqu     16*7(Htbl), TMP4            
+    vpclmulqdq  $0x11, TMP4, TMP5, TMP3
+    vpxor       TMP3, TMP1, TMP1
+    vpclmulqdq  $0x00, TMP4, TMP5, TMP3
+    vpxor       TMP3, TMP2, TMP2      
+    vpshufd     $78, TMP5, TMP3
+    vpxor       TMP5, TMP3, TMP5
+    vmovdqu     128+7*16(Htbl), TMP4      
+    vpclmulqdq  $0x00, TMP4, TMP5, TMP3
+    vpxor       TMP3, TMP0, TMP0
+
+    vpxor       TMP1, TMP0, TMP0
+    vpxor       TMP2, TMP0, TMP0
+
+    vpsrldq     $8, TMP0, TMP3
+    vpxor       TMP3, TMP1, TMP4
+    vpslldq     $8, TMP0, TMP3
+    vpxor       TMP3, TMP2, T
+
+    vmovdqa     .Lpoly(%rip), TMP2
+
+    vpalignr    $8, T, T, TMP1
+    vpclmulqdq  $0x10, TMP2, T, T
+    vpxor       T, TMP1, T
+
+    vpalignr    $8, T, T, TMP1
+    vpclmulqdq  $0x10, TMP2, T, T
+    vpxor       T, TMP1, T
+
+    vpxor       TMP4, T, T
+
+#Here we encrypt any remaining whole block
+.LDataSingles:
+
+    cmp $16, len
+    jb  .LDataTail
+    sub $16, len
+
+    vpshufb .Lbswap_mask(%rip), CTR, TMP1
+    vpaddd  .Lone(%rip), CTR, CTR
+
+    vpxor   (KS), TMP1, TMP1
+    vaesenc 16*1(KS), TMP1, TMP1
+    vaesenc 16*2(KS), TMP1, TMP1
+    vaesenc 16*3(KS), TMP1, TMP1
+    vaesenc 16*4(KS), TMP1, TMP1
+    vaesenc 16*5(KS), TMP1, TMP1
+    vaesenc 16*6(KS), TMP1, TMP1
+    vaesenc 16*7(KS), TMP1, TMP1
+    vaesenc 16*8(KS), TMP1, TMP1
+    vaesenc 16*9(KS), TMP1, TMP1
+    vmovdqu 16*10(KS), TMP2
+    cmp     $10, NR
+    je      .LLast3
+    vaesenc 16*10(KS), TMP1, TMP1
+    vaesenc 16*11(KS), TMP1, TMP1
+    vmovdqu 16*12(KS), TMP2
+    cmp     $12, NR
+    je      .LLast3
+    vaesenc 16*12(KS), TMP1, TMP1
+    vaesenc 16*13(KS), TMP1, TMP1
+    vmovdqu 16*14(KS), TMP2
+
+.LLast3:
+    vaesenclast TMP2, TMP1, TMP1
+
+    vpxor   (PT), TMP1, TMP1
+    vmovdqu TMP1, (CT)
+    addq    $16, CT
+    addq    $16, PT
+
+    vpshufb .Lbswap_mask(%rip), TMP1, TMP1
+    vpxor   TMP1, T, T
+    vmovdqu (Htbl), TMP0
+    call    GFMUL
+
+    jmp .LDataSingles
+
+#Here we encypt the final partial block, if there is one
+.LDataTail:
+
+    test    len, len
+    jz      DATA_END
+# First prepare the counter block
+    vpshufb .Lbswap_mask(%rip), CTR, TMP1
+    vpaddd  .Lone(%rip), CTR, CTR
+
+    vpxor   (KS), TMP1, TMP1
+    vaesenc 16*1(KS), TMP1, TMP1
+    vaesenc 16*2(KS), TMP1, TMP1
+    vaesenc 16*3(KS), TMP1, TMP1
+    vaesenc 16*4(KS), TMP1, TMP1
+    vaesenc 16*5(KS), TMP1, TMP1
+    vaesenc 16*6(KS), TMP1, TMP1
+    vaesenc 16*7(KS), TMP1, TMP1
+    vaesenc 16*8(KS), TMP1, TMP1
+    vaesenc 16*9(KS), TMP1, TMP1
+    vmovdqu 16*10(KS), TMP2
+    cmp     $10, NR
+    je      .LLast4
+    vaesenc 16*10(KS), TMP1, TMP1
+    vaesenc 16*11(KS), TMP1, TMP1
+    vmovdqu 16*12(KS), TMP2
+    cmp     $12, NR
+    je      .LLast4
+    vaesenc 16*12(KS), TMP1, TMP1
+    vaesenc 16*13(KS), TMP1, TMP1
+    vmovdqu 16*14(KS), TMP2
+  
+.LLast4:
+    vaesenclast TMP2, TMP1, TMP1
+#Zero a temp location
+    vpxor   TMP2, TMP2, TMP2
+    vmovdqa TMP2, (%rsp)
+    
+# Copy the required bytes only (could probably use rep movsb)
+    xor KS, KS  
+.LEncCpy:
+        cmp     KS, len
+        je      .LEncCpyEnd
+        movb    (PT, KS, 1), %r8b
+        movb    %r8b, (%rsp, KS, 1)
+        inc     KS
+        jmp .LEncCpy
+.LEncCpyEnd:
+# Xor with the counter block
+    vpxor   (%rsp), TMP1, TMP0
+# Again, store at temp location
+    vmovdqa TMP0, (%rsp)
+# Copy only the required bytes to CT, and zero the rest for the hash
+    xor KS, KS
+.LEncCpy2:
+    cmp     KS, len
+    je      .LEncCpy3
+    movb    (%rsp, KS, 1), %r8b
+    movb    %r8b, (CT, KS, 1)
+    inc     KS
+    jmp .LEncCpy2
+.LEncCpy3:
+    cmp     $16, KS
+    je      .LEndCpy3
+    movb    $0, (%rsp, KS, 1)
+    inc     KS
+    jmp .LEncCpy3
+.LEndCpy3:
+   vmovdqa  (%rsp), TMP0
+
+   vpshufb  .Lbswap_mask(%rip), TMP0, TMP0
+   vpxor    TMP0, T, T
+   vmovdqu  (Htbl), TMP0
+   call     GFMUL
+
+DATA_END:
+
+   vpshufb  .Lbswap_mask(%rip), T, T
+   vpshufb  .Lbswap_mask(%rip), CTR, CTR
+   vmovdqu  T, 272(Gctx)
+   vmovdqu  CTR, 288(Gctx)
+
+   movq   %rbp, %rsp
+
+   popq   %rbx
+   popq   %rbp
+   ret
+   .size intel_aes_gcmENC, .-intel_aes_gcmENC
+  
+#########################
+# Decrypt and Authenticate
+# void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len);
+.type intel_aes_gcmDEC,@function
+.globl intel_aes_gcmDEC
+.align 16
+intel_aes_gcmDEC:
+# parameter 1: CT    # input
+# parameter 2: PT    # output
+# parameter 3: %rdx  # Gctx
+# parameter 4: %rcx  # len
+
+.macro DEC_KARATSUBA i
+    vmovdqu     (7-\i)*16(CT), TMP5
+    vpshufb     .Lbswap_mask(%rip), TMP5, TMP5
+
+    vpclmulqdq  $0x11, 16*\i(Htbl), TMP5, TMP3
+    vpxor       TMP3, TMP1, TMP1
+    vpclmulqdq  $0x00, 16*\i(Htbl), TMP5, TMP3
+    vpxor       TMP3, TMP2, TMP2
+    vpshufd     $78, TMP5, TMP3
+    vpxor       TMP5, TMP3, TMP5
+    vpclmulqdq  $0x00, 128+\i*16(Htbl), TMP5, TMP3
+    vpxor       TMP3, TMP0, TMP0
+.endm
+
+.set PT,%rsi
+.set CT,%rdi
+.set Htbl, %rdx
+.set len, %rcx
+.set KS,%r9
+.set NR,%r10d
+
+.set Gctx, %rdx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+.set TMP1,%xmm2
+.set TMP2,%xmm3
+.set TMP3,%xmm4
+.set TMP4,%xmm5
+.set TMP5,%xmm6
+.set CTR0,%xmm7
+.set CTR1,%xmm8
+.set CTR2,%xmm9
+.set CTR3,%xmm10
+.set CTR4,%xmm11
+.set CTR5,%xmm12
+.set CTR6,%xmm13
+.set CTR7,%xmm14
+.set CTR,%xmm15
+
+    test  len, len
+    jnz   .LbeginDec
+    ret
+   
+.LbeginDec:
+
+    pushq   %rbp
+    pushq   %rbx
+    movq    %rsp, %rbp   
+    sub     $128, %rsp
+    andq    $-16, %rsp
+    vmovdqu 288(Gctx), CTR
+    vmovdqu 272(Gctx), T
+    mov     304(Gctx), KS
+    mov     4(KS), NR
+    lea     48(KS), KS
+
+    vpshufb .Lbswap_mask(%rip), CTR, CTR
+    vpshufb .Lbswap_mask(%rip), T, T
+     
+    vmovdqu .Lbswap_mask(%rip), TMP3
+    jmp     .LDECOctets
+      
+# Decrypt 8 blocks each time while hashing them at the same time
+.align 64
+.LDECOctets:
+   
+        cmp $128, len
+        jb  .LDECSingles
+        sub $128, len
+
+        vmovdqa CTR, CTR0
+        vpaddd  .Lone(%rip), CTR0, CTR1
+        vpaddd  .Ltwo(%rip), CTR0, CTR2
+        vpaddd  .Lone(%rip), CTR2, CTR3
+        vpaddd  .Ltwo(%rip), CTR2, CTR4
+        vpaddd  .Lone(%rip), CTR4, CTR5
+        vpaddd  .Ltwo(%rip), CTR4, CTR6
+        vpaddd  .Lone(%rip), CTR6, CTR7
+        vpaddd  .Ltwo(%rip), CTR6, CTR
+
+        vpshufb TMP3, CTR0, CTR0
+        vpshufb TMP3, CTR1, CTR1
+        vpshufb TMP3, CTR2, CTR2
+        vpshufb TMP3, CTR3, CTR3
+        vpshufb TMP3, CTR4, CTR4
+        vpshufb TMP3, CTR5, CTR5
+        vpshufb TMP3, CTR6, CTR6
+        vpshufb TMP3, CTR7, CTR7
+
+        vmovdqu (KS), TMP3
+        vpxor  TMP3, CTR0, CTR0
+        vpxor  TMP3, CTR1, CTR1
+        vpxor  TMP3, CTR2, CTR2
+        vpxor  TMP3, CTR3, CTR3
+        vpxor  TMP3, CTR4, CTR4
+        vpxor  TMP3, CTR5, CTR5
+        vpxor  TMP3, CTR6, CTR6
+        vpxor  TMP3, CTR7, CTR7
+
+        vmovdqu     7*16(CT), TMP5
+        vpshufb     .Lbswap_mask(%rip), TMP5, TMP5
+        vmovdqu     16*0(Htbl), TMP3
+        vpclmulqdq  $0x11, TMP3, TMP5, TMP1
+        vpclmulqdq  $0x00, TMP3, TMP5, TMP2      
+        vpshufd     $78, TMP5, TMP3
+        vpxor       TMP5, TMP3, TMP5
+        vmovdqu     128+0*16(Htbl), TMP3      
+        vpclmulqdq  $0x00, TMP3, TMP5, TMP0
+
+        ROUND 1
+        DEC_KARATSUBA 1
+
+        ROUND 2
+        DEC_KARATSUBA 2
+
+        ROUND 3
+        DEC_KARATSUBA 3
+
+        ROUND 4
+        DEC_KARATSUBA 4
+
+        ROUND 5
+        DEC_KARATSUBA 5
+
+        ROUND 6
+        DEC_KARATSUBA 6
+
+        ROUND 7
+
+        vmovdqu     0*16(CT), TMP5
+        vpshufb     .Lbswap_mask(%rip), TMP5, TMP5
+        vpxor       T, TMP5, TMP5
+        vmovdqu     16*7(Htbl), TMP4
+            
+        vpclmulqdq  $0x11, TMP4, TMP5, TMP3
+        vpxor       TMP3, TMP1, TMP1
+        vpclmulqdq  $0x00, TMP4, TMP5, TMP3
+        vpxor       TMP3, TMP2, TMP2
+
+        vpshufd     $78, TMP5, TMP3
+        vpxor       TMP5, TMP3, TMP5
+        vmovdqu     128+7*16(Htbl), TMP4
+
+        vpclmulqdq  $0x00, TMP4, TMP5, TMP3
+        vpxor       TMP3, TMP0, TMP0
+
+        ROUND 8      
+
+        vpxor       TMP1, TMP0, TMP0
+        vpxor       TMP2, TMP0, TMP0
+
+        vpsrldq     $8, TMP0, TMP3
+        vpxor       TMP3, TMP1, TMP4
+        vpslldq     $8, TMP0, TMP3
+        vpxor       TMP3, TMP2, T
+        vmovdqa	  .Lpoly(%rip), TMP2
+
+        vpalignr    $8, T, T, TMP1
+        vpclmulqdq  $0x10, TMP2, T, T
+        vpxor       T, TMP1, T
+
+        ROUND 9
+
+        vpalignr    $8, T, T, TMP1
+        vpclmulqdq  $0x10, TMP2, T, T
+        vpxor       T, TMP1, T
+
+        vmovdqu     160(KS), TMP5
+        cmp         $10, NR
+
+        jbe  .LDECLast1
+
+        ROUND 10
+        ROUND 11
+
+        vmovdqu     192(KS), TMP5
+        cmp         $12, NR       
+
+        jbe  .LDECLast1
+
+        ROUND 12
+        ROUND 13
+
+        vmovdqu  224(KS), TMP5
+
+.LDECLast1:      
+      
+        vpxor   (CT), TMP5, TMP3
+        vaesenclast TMP3, CTR0, CTR0
+        vpxor   16(CT), TMP5, TMP3
+        vaesenclast TMP3, CTR1, CTR1
+        vpxor   32(CT), TMP5, TMP3
+        vaesenclast TMP3, CTR2, CTR2
+        vpxor   48(CT), TMP5, TMP3
+        vaesenclast TMP3, CTR3, CTR3
+        vpxor   64(CT), TMP5, TMP3
+        vaesenclast TMP3, CTR4, CTR4
+        vpxor   80(CT), TMP5, TMP3
+        vaesenclast TMP3, CTR5, CTR5
+        vpxor   96(CT), TMP5, TMP3
+        vaesenclast TMP3, CTR6, CTR6
+        vpxor   112(CT), TMP5, TMP3
+        vaesenclast TMP3, CTR7, CTR7
+
+        vmovdqu .Lbswap_mask(%rip), TMP3
+
+        vmovdqu CTR0, (PT)
+        vmovdqu CTR1, 16(PT)
+        vmovdqu CTR2, 32(PT)
+        vmovdqu CTR3, 48(PT)
+        vmovdqu CTR4, 64(PT)
+        vmovdqu CTR5, 80(PT)
+        vmovdqu CTR6, 96(PT)
+        vmovdqu CTR7,112(PT)
+
+        vpxor   TMP4, T, T
+
+        lea 128(CT), CT
+        lea 128(PT), PT
+   jmp  .LDECOctets
+   
+#Here we decrypt and hash any remaining whole block
+.LDECSingles:
+
+    cmp   $16, len
+    jb    .LDECTail
+    sub   $16, len
+
+    vmovdqu  (CT), TMP1
+    vpshufb  .Lbswap_mask(%rip), TMP1, TMP1
+    vpxor    TMP1, T, T
+    vmovdqu  (Htbl), TMP0
+    call     GFMUL
+
+
+    vpshufb  .Lbswap_mask(%rip), CTR, TMP1
+    vpaddd   .Lone(%rip), CTR, CTR
+
+    vpxor    (KS), TMP1, TMP1
+    vaesenc  16*1(KS), TMP1, TMP1
+    vaesenc  16*2(KS), TMP1, TMP1
+    vaesenc  16*3(KS), TMP1, TMP1
+    vaesenc  16*4(KS), TMP1, TMP1
+    vaesenc  16*5(KS), TMP1, TMP1
+    vaesenc  16*6(KS), TMP1, TMP1
+    vaesenc  16*7(KS), TMP1, TMP1
+    vaesenc  16*8(KS), TMP1, TMP1
+    vaesenc  16*9(KS), TMP1, TMP1
+    vmovdqu  16*10(KS), TMP2
+    cmp      $10, NR
+    je       .LDECLast2
+    vaesenc  16*10(KS), TMP1, TMP1
+    vaesenc  16*11(KS), TMP1, TMP1
+    vmovdqu  16*12(KS), TMP2
+    cmp      $12, NR
+    je       .LDECLast2
+    vaesenc  16*12(KS), TMP1, TMP1
+    vaesenc  16*13(KS), TMP1, TMP1
+    vmovdqu  16*14(KS), TMP2
+.LDECLast2:
+    vaesenclast TMP2, TMP1, TMP1
+
+    vpxor    (CT), TMP1, TMP1
+    vmovdqu  TMP1, (PT)
+    addq     $16, CT
+    addq     $16, PT  
+    jmp   .LDECSingles
+
+#Here we decrypt the final partial block, if there is one
+.LDECTail:
+   test   len, len
+   jz     .LDEC_END
+
+   vpshufb  .Lbswap_mask(%rip), CTR, TMP1
+   vpaddd .Lone(%rip), CTR, CTR
+
+   vpxor  (KS), TMP1, TMP1
+   vaesenc  16*1(KS), TMP1, TMP1
+   vaesenc  16*2(KS), TMP1, TMP1
+   vaesenc  16*3(KS), TMP1, TMP1
+   vaesenc  16*4(KS), TMP1, TMP1
+   vaesenc  16*5(KS), TMP1, TMP1
+   vaesenc  16*6(KS), TMP1, TMP1
+   vaesenc  16*7(KS), TMP1, TMP1
+   vaesenc  16*8(KS), TMP1, TMP1
+   vaesenc  16*9(KS), TMP1, TMP1
+   vmovdqu  16*10(KS), TMP2
+   cmp      $10, NR
+   je       .LDECLast3
+   vaesenc  16*10(KS), TMP1, TMP1
+   vaesenc  16*11(KS), TMP1, TMP1
+   vmovdqu  16*12(KS), TMP2
+   cmp      $12, NR
+   je       .LDECLast3
+   vaesenc  16*12(KS), TMP1, TMP1
+   vaesenc  16*13(KS), TMP1, TMP1
+   vmovdqu  16*14(KS), TMP2
+
+.LDECLast3:
+   vaesenclast TMP2, TMP1, TMP1
+  
+   vpxor   TMP2, TMP2, TMP2
+   vmovdqa TMP2, (%rsp) 
+# Copy the required bytes only (could probably use rep movsb)
+    xor KS, KS  
+.LDecCpy:
+        cmp     KS, len
+        je      .LDecCpy2
+        movb    (CT, KS, 1), %r8b
+        movb    %r8b, (%rsp, KS, 1)
+        inc     KS
+        jmp     .LDecCpy
+.LDecCpy2:
+        cmp     $16, KS
+        je      .LDecCpyEnd
+        movb    $0, (%rsp, KS, 1)
+        inc     KS
+        jmp     .LDecCpy2
+.LDecCpyEnd:
+# Xor with the counter block
+    vmovdqa (%rsp), TMP0
+    vpxor   TMP0, TMP1, TMP1
+# Again, store at temp location
+    vmovdqa TMP1, (%rsp)
+# Copy only the required bytes to PT, and zero the rest for the hash
+    xor KS, KS
+.LDecCpy3:
+    cmp     KS, len
+    je      .LDecCpyEnd3
+    movb    (%rsp, KS, 1), %r8b
+    movb    %r8b, (PT, KS, 1)
+    inc     KS
+    jmp     .LDecCpy3
+.LDecCpyEnd3:
+   vpshufb  .Lbswap_mask(%rip), TMP0, TMP0
+   vpxor    TMP0, T, T
+   vmovdqu  (Htbl), TMP0
+   call     GFMUL
+.LDEC_END:
+
+   vpshufb  .Lbswap_mask(%rip), T, T
+   vpshufb  .Lbswap_mask(%rip), CTR, CTR
+   vmovdqu  T, 272(Gctx)
+   vmovdqu  CTR, 288(Gctx)
+
+   movq   %rbp, %rsp
+
+   popq   %rbx
+   popq   %rbp
+   ret
+  .size intel_aes_gcmDEC, .-intel_aes_gcmDEC
+#########################
+# a = T
+# b = TMP0 - remains unchanged
+# res = T
+# uses also TMP1,TMP2,TMP3,TMP4
+# __m128i GFMUL(__m128i A, __m128i B);
+.type GFMUL,@function
+.globl GFMUL
+GFMUL:  
+    vpclmulqdq  $0x00, TMP0, T, TMP1
+    vpclmulqdq  $0x11, TMP0, T, TMP4
+
+    vpshufd     $78, T, TMP2
+    vpshufd     $78, TMP0, TMP3
+    vpxor       T, TMP2, TMP2
+    vpxor       TMP0, TMP3, TMP3
+
+    vpclmulqdq  $0x00, TMP3, TMP2, TMP2
+    vpxor       TMP1, TMP2, TMP2
+    vpxor       TMP4, TMP2, TMP2
+
+    vpslldq     $8, TMP2, TMP3
+    vpsrldq     $8, TMP2, TMP2
+
+    vpxor       TMP3, TMP1, TMP1
+    vpxor       TMP2, TMP4, TMP4
+
+    vpclmulqdq  $0x10, .Lpoly(%rip), TMP1, TMP2
+    vpshufd     $78, TMP1, TMP3
+    vpxor       TMP3, TMP2, TMP1
+
+    vpclmulqdq  $0x10, .Lpoly(%rip), TMP1, TMP2
+    vpshufd     $78, TMP1, TMP3
+    vpxor       TMP3, TMP2, TMP1
+
+    vpxor       TMP4, TMP1, T
+    ret
+.size GFMUL, .-GFMUL
+
diff --git a/security/nss/lib/freebl/jpake.c b/security/nss/lib/freebl/jpake.c
new file mode 100644
index 000000000..741c7a876
--- /dev/null
+++ b/security/nss/lib/freebl/jpake.c
@@ -0,0 +1,495 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "secerr.h"
+#include "secitem.h"
+#include "secmpi.h"
+
+/* Hash an item's length and then its value. Only items smaller than 2^16 bytes
+ * are allowed. Lengths are hashed in network byte order. This is designed
+ * to match the OpenSSL J-PAKE implementation.
+ */
+static mp_err
+hashSECItem(HASHContext *hash, const SECItem *it)
+{
+    unsigned char length[2];
+
+    if (it->len > 0xffff)
+        return MP_BADARG;
+
+    length[0] = (unsigned char)(it->len >> 8);
+    length[1] = (unsigned char)(it->len);
+    hash->hashobj->update(hash->hash_context, length, 2);
+    hash->hashobj->update(hash->hash_context, it->data, it->len);
+    return MP_OKAY;
+}
+
+/* Hash all public components of the signature, each prefixed with its
+   length, and then convert the hash to an mp_int. */
+static mp_err
+hashPublicParams(HASH_HashType hashType, const SECItem *g,
+                 const SECItem *gv, const SECItem *gx,
+                 const SECItem *signerID, mp_int *h)
+{
+    mp_err err;
+    unsigned char hBuf[HASH_LENGTH_MAX];
+    SECItem hItem;
+    HASHContext hash;
+
+    hash.hashobj = HASH_GetRawHashObject(hashType);
+    if (hash.hashobj == NULL || hash.hashobj->length > sizeof hBuf) {
+        return MP_BADARG;
+    }
+    hash.hash_context = hash.hashobj->create();
+    if (hash.hash_context == NULL) {
+        return MP_MEM;
+    }
+
+    hItem.data = hBuf;
+    hItem.len = hash.hashobj->length;
+
+    hash.hashobj->begin(hash.hash_context);
+    CHECK_MPI_OK(hashSECItem(&hash, g));
+    CHECK_MPI_OK(hashSECItem(&hash, gv));
+    CHECK_MPI_OK(hashSECItem(&hash, gx));
+    CHECK_MPI_OK(hashSECItem(&hash, signerID));
+    hash.hashobj->end(hash.hash_context, hItem.data, &hItem.len,
+                      sizeof hBuf);
+    SECITEM_TO_MPINT(hItem, h);
+
+cleanup:
+    if (hash.hash_context != NULL) {
+        hash.hashobj->destroy(hash.hash_context, PR_TRUE);
+    }
+
+    return err;
+}
+
+/* Generate a Schnorr signature for round 1 or round 2 */
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+           const SECItem *signerID, const SECItem *x,
+           const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+           SECItem *gv, SECItem *r)
+{
+    SECStatus rv = SECSuccess;
+    mp_err err;
+    mp_int p;
+    mp_int q;
+    mp_int g;
+    mp_int X;
+    mp_int GX;
+    mp_int V;
+    mp_int GV;
+    mp_int h;
+    mp_int tmp;
+    mp_int R;
+    SECItem v;
+
+    if (!arena ||
+        !pqg || !pqg->prime.data || pqg->prime.len == 0 ||
+        !pqg->subPrime.data || pqg->subPrime.len == 0 ||
+        !pqg->base.data || pqg->base.len == 0 ||
+        !signerID || !signerID->data || signerID->len == 0 ||
+        !x || !x->data || x->len == 0 ||
+        (testRandom && (!testRandom->data || testRandom->len == 0)) ||
+        (gxIn == NULL && (!gxOut || gxOut->data != NULL)) ||
+        (gxIn != NULL && (!gxIn->data || gxIn->len == 0 || gxOut != NULL)) ||
+        !gv || gv->data != NULL ||
+        !r || r->data != NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&g) = 0;
+    MP_DIGITS(&X) = 0;
+    MP_DIGITS(&GX) = 0;
+    MP_DIGITS(&V) = 0;
+    MP_DIGITS(&GV) = 0;
+    MP_DIGITS(&h) = 0;
+    MP_DIGITS(&tmp) = 0;
+    MP_DIGITS(&R) = 0;
+
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&g));
+    CHECK_MPI_OK(mp_init(&X));
+    CHECK_MPI_OK(mp_init(&GX));
+    CHECK_MPI_OK(mp_init(&V));
+    CHECK_MPI_OK(mp_init(&GV));
+    CHECK_MPI_OK(mp_init(&h));
+    CHECK_MPI_OK(mp_init(&tmp));
+    CHECK_MPI_OK(mp_init(&R));
+
+    SECITEM_TO_MPINT(pqg->prime, &p);
+    SECITEM_TO_MPINT(pqg->subPrime, &q);
+    SECITEM_TO_MPINT(pqg->base, &g);
+    SECITEM_TO_MPINT(*x, &X);
+
+    /* gx = g^x */
+    if (gxIn == NULL) {
+        CHECK_MPI_OK(mp_exptmod(&g, &X, &p, &GX));
+        MPINT_TO_SECITEM(&GX, gxOut, arena);
+        gxIn = gxOut;
+    } else {
+        SECITEM_TO_MPINT(*gxIn, &GX);
+    }
+
+    /* v is a random value in the q subgroup */
+    if (testRandom == NULL) {
+        v.data = NULL;
+        rv = DSA_NewRandom(arena, &pqg->subPrime, &v);
+        if (rv != SECSuccess) {
+            goto cleanup;
+        }
+    } else {
+        v.data = testRandom->data;
+        v.len = testRandom->len;
+    }
+    SECITEM_TO_MPINT(v, &V);
+
+    /* gv = g^v (mod q), random v, 1 <= v < q */
+    CHECK_MPI_OK(mp_exptmod(&g, &V, &p, &GV));
+    MPINT_TO_SECITEM(&GV, gv, arena);
+
+    /* h = H(g, gv, gx, signerID) */
+    CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gxIn, signerID,
+                                  &h));
+
+    /* r = v - x*h (mod q) */
+    CHECK_MPI_OK(mp_mulmod(&X, &h, &q, &tmp));
+    CHECK_MPI_OK(mp_submod(&V, &tmp, &q, &R));
+    MPINT_TO_SECITEM(&R, r, arena);
+
+cleanup:
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&g);
+    mp_clear(&X);
+    mp_clear(&GX);
+    mp_clear(&V);
+    mp_clear(&GV);
+    mp_clear(&h);
+    mp_clear(&tmp);
+    mp_clear(&R);
+
+    if (rv == SECSuccess && err != MP_OKAY) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/* Verify a Schnorr signature generated by the peer in round 1 or round 2. */
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+             const SECItem *signerID, const SECItem *peerID,
+             const SECItem *gx, const SECItem *gv, const SECItem *r)
+{
+    SECStatus rv = SECSuccess;
+    mp_err err;
+    mp_int p;
+    mp_int q;
+    mp_int g;
+    mp_int p_minus_1;
+    mp_int GX;
+    mp_int h;
+    mp_int one;
+    mp_int R;
+    mp_int gr;
+    mp_int gxh;
+    mp_int gr_gxh;
+    SECItem calculated;
+
+    if (!arena ||
+        !pqg || !pqg->prime.data || pqg->prime.len == 0 ||
+        !pqg->subPrime.data || pqg->subPrime.len == 0 ||
+        !pqg->base.data || pqg->base.len == 0 ||
+        !signerID || !signerID->data || signerID->len == 0 ||
+        !peerID || !peerID->data || peerID->len == 0 ||
+        !gx || !gx->data || gx->len == 0 ||
+        !gv || !gv->data || gv->len == 0 ||
+        !r || !r->data || r->len == 0 ||
+        SECITEM_CompareItem(signerID, peerID) == SECEqual) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&g) = 0;
+    MP_DIGITS(&p_minus_1) = 0;
+    MP_DIGITS(&GX) = 0;
+    MP_DIGITS(&h) = 0;
+    MP_DIGITS(&one) = 0;
+    MP_DIGITS(&R) = 0;
+    MP_DIGITS(&gr) = 0;
+    MP_DIGITS(&gxh) = 0;
+    MP_DIGITS(&gr_gxh) = 0;
+    calculated.data = NULL;
+
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&g));
+    CHECK_MPI_OK(mp_init(&p_minus_1));
+    CHECK_MPI_OK(mp_init(&GX));
+    CHECK_MPI_OK(mp_init(&h));
+    CHECK_MPI_OK(mp_init(&one));
+    CHECK_MPI_OK(mp_init(&R));
+    CHECK_MPI_OK(mp_init(&gr));
+    CHECK_MPI_OK(mp_init(&gxh));
+    CHECK_MPI_OK(mp_init(&gr_gxh));
+
+    SECITEM_TO_MPINT(pqg->prime, &p);
+    SECITEM_TO_MPINT(pqg->subPrime, &q);
+    SECITEM_TO_MPINT(pqg->base, &g);
+    SECITEM_TO_MPINT(*gx, &GX);
+    SECITEM_TO_MPINT(*r, &R);
+
+    CHECK_MPI_OK(mp_sub_d(&p, 1, &p_minus_1));
+    CHECK_MPI_OK(mp_exptmod(&GX, &q, &p, &one));
+    /* Check g^x is in [1, p-2], R is in [0, q-1], and (g^x)^q mod p == 1 */
+    if (!(mp_cmp_z(&GX) > 0 &&
+          mp_cmp(&GX, &p_minus_1) < 0 &&
+          mp_cmp(&R, &q) < 0 &&
+          mp_cmp_d(&one, 1) == 0)) {
+        goto badSig;
+    }
+
+    CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gx, peerID,
+                                  &h));
+
+    /* Calculate g^v = g^r * g^x^h */
+    CHECK_MPI_OK(mp_exptmod(&g, &R, &p, &gr));
+    CHECK_MPI_OK(mp_exptmod(&GX, &h, &p, &gxh));
+    CHECK_MPI_OK(mp_mulmod(&gr, &gxh, &p, &gr_gxh));
+
+    /* Compare calculated g^v to given g^v */
+    MPINT_TO_SECITEM(&gr_gxh, &calculated, arena);
+    if (calculated.len == gv->len &&
+        NSS_SecureMemcmp(calculated.data, gv->data, calculated.len) == 0) {
+        rv = SECSuccess;
+    } else {
+    badSig:
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        rv = SECFailure;
+    }
+
+cleanup:
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&g);
+    mp_clear(&p_minus_1);
+    mp_clear(&GX);
+    mp_clear(&h);
+    mp_clear(&one);
+    mp_clear(&R);
+    mp_clear(&gr);
+    mp_clear(&gxh);
+    mp_clear(&gr_gxh);
+
+    if (rv == SECSuccess && err != MP_OKAY) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/* Calculate base = gx1*gx3*gx4 (mod p), i.e. g^(x1+x3+x4) (mod p) */
+static mp_err
+jpake_Round2Base(const SECItem *gx1, const SECItem *gx3,
+                 const SECItem *gx4, const mp_int *p, mp_int *base)
+{
+    mp_err err;
+    mp_int GX1;
+    mp_int GX3;
+    mp_int GX4;
+    mp_int tmp;
+
+    MP_DIGITS(&GX1) = 0;
+    MP_DIGITS(&GX3) = 0;
+    MP_DIGITS(&GX4) = 0;
+    MP_DIGITS(&tmp) = 0;
+
+    CHECK_MPI_OK(mp_init(&GX1));
+    CHECK_MPI_OK(mp_init(&GX3));
+    CHECK_MPI_OK(mp_init(&GX4));
+    CHECK_MPI_OK(mp_init(&tmp));
+
+    SECITEM_TO_MPINT(*gx1, &GX1);
+    SECITEM_TO_MPINT(*gx3, &GX3);
+    SECITEM_TO_MPINT(*gx4, &GX4);
+
+    /* In round 2, the peer/attacker sends us g^x3 and g^x4 and the protocol
+       requires that these values are distinct. */
+    if (mp_cmp(&GX3, &GX4) == 0) {
+        return MP_BADARG;
+    }
+
+    CHECK_MPI_OK(mp_mul(&GX1, &GX3, &tmp));
+    CHECK_MPI_OK(mp_mul(&tmp, &GX4, &tmp));
+    CHECK_MPI_OK(mp_mod(&tmp, p, base));
+
+cleanup:
+    mp_clear(&GX1);
+    mp_clear(&GX3);
+    mp_clear(&GX4);
+    mp_clear(&tmp);
+    return err;
+}
+
+SECStatus
+JPAKE_Round2(PLArenaPool *arena,
+             const SECItem *p, const SECItem *q, const SECItem *gx1,
+             const SECItem *gx3, const SECItem *gx4, SECItem *base,
+             const SECItem *x2, const SECItem *s, SECItem *x2s)
+{
+    mp_err err;
+    mp_int P;
+    mp_int Q;
+    mp_int X2;
+    mp_int S;
+    mp_int result;
+
+    if (!arena ||
+        !p || !p->data || p->len == 0 ||
+        !q || !q->data || q->len == 0 ||
+        !gx1 || !gx1->data || gx1->len == 0 ||
+        !gx3 || !gx3->data || gx3->len == 0 ||
+        !gx4 || !gx4->data || gx4->len == 0 ||
+        !base || base->data != NULL ||
+        (x2s != NULL && (x2s->data != NULL ||
+                         !x2 || !x2->data || x2->len == 0 ||
+                         !s || !s->data || s->len == 0))) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    MP_DIGITS(&P) = 0;
+    MP_DIGITS(&Q) = 0;
+    MP_DIGITS(&X2) = 0;
+    MP_DIGITS(&S) = 0;
+    MP_DIGITS(&result) = 0;
+
+    CHECK_MPI_OK(mp_init(&P));
+    CHECK_MPI_OK(mp_init(&Q));
+    CHECK_MPI_OK(mp_init(&result));
+
+    if (x2s != NULL) {
+        CHECK_MPI_OK(mp_init(&X2));
+        CHECK_MPI_OK(mp_init(&S));
+
+        SECITEM_TO_MPINT(*q, &Q);
+        SECITEM_TO_MPINT(*x2, &X2);
+
+        SECITEM_TO_MPINT(*s, &S);
+        /* S must be in [1, Q-1] */
+        if (mp_cmp_z(&S) <= 0 || mp_cmp(&S, &Q) >= 0) {
+            err = MP_BADARG;
+            goto cleanup;
+        }
+
+        CHECK_MPI_OK(mp_mulmod(&X2, &S, &Q, &result));
+        MPINT_TO_SECITEM(&result, x2s, arena);
+    }
+
+    SECITEM_TO_MPINT(*p, &P);
+    CHECK_MPI_OK(jpake_Round2Base(gx1, gx3, gx4, &P, &result));
+    MPINT_TO_SECITEM(&result, base, arena);
+
+cleanup:
+    mp_clear(&P);
+    mp_clear(&Q);
+    mp_clear(&X2);
+    mp_clear(&S);
+    mp_clear(&result);
+
+    if (err != MP_OKAY) {
+        MP_TO_SEC_ERROR(err);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+            const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+            const SECItem *B, SECItem *K)
+{
+    mp_err err;
+    mp_int P;
+    mp_int Q;
+    mp_int tmp;
+    mp_int exponent;
+    mp_int divisor;
+    mp_int base;
+
+    if (!arena ||
+        !p || !p->data || p->len == 0 ||
+        !q || !q->data || q->len == 0 ||
+        !x2 || !x2->data || x2->len == 0 ||
+        !gx4 || !gx4->data || gx4->len == 0 ||
+        !x2s || !x2s->data || x2s->len == 0 ||
+        !B || !B->data || B->len == 0 ||
+        !K || K->data != NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    MP_DIGITS(&P) = 0;
+    MP_DIGITS(&Q) = 0;
+    MP_DIGITS(&tmp) = 0;
+    MP_DIGITS(&exponent) = 0;
+    MP_DIGITS(&divisor) = 0;
+    MP_DIGITS(&base) = 0;
+
+    CHECK_MPI_OK(mp_init(&P));
+    CHECK_MPI_OK(mp_init(&Q));
+    CHECK_MPI_OK(mp_init(&tmp));
+    CHECK_MPI_OK(mp_init(&exponent));
+    CHECK_MPI_OK(mp_init(&divisor));
+    CHECK_MPI_OK(mp_init(&base));
+
+    /* exponent = -x2s (mod q) */
+    SECITEM_TO_MPINT(*q, &Q);
+    SECITEM_TO_MPINT(*x2s, &tmp);
+    /*  q == 0 (mod q), so q - x2s == -x2s (mod q) */
+    CHECK_MPI_OK(mp_sub(&Q, &tmp, &exponent));
+
+    /* divisor = gx4^-x2s = 1/(gx4^x2s) (mod p) */
+    SECITEM_TO_MPINT(*p, &P);
+    SECITEM_TO_MPINT(*gx4, &tmp);
+    CHECK_MPI_OK(mp_exptmod(&tmp, &exponent, &P, &divisor));
+
+    /* base = B*divisor = B/(gx4^x2s) (mod p) */
+    SECITEM_TO_MPINT(*B, &tmp);
+    CHECK_MPI_OK(mp_mulmod(&divisor, &tmp, &P, &base));
+
+    /* tmp = base^x2 (mod p) */
+    SECITEM_TO_MPINT(*x2, &exponent);
+    CHECK_MPI_OK(mp_exptmod(&base, &exponent, &P, &tmp));
+
+    MPINT_TO_SECITEM(&tmp, K, arena);
+
+cleanup:
+    mp_clear(&P);
+    mp_clear(&Q);
+    mp_clear(&tmp);
+    mp_clear(&exponent);
+    mp_clear(&divisor);
+    mp_clear(&base);
+
+    if (err != MP_OKAY) {
+        MP_TO_SEC_ERROR(err);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/ldvector.c b/security/nss/lib/freebl/ldvector.c
new file mode 100644
index 000000000..2447a0c9f
--- /dev/null
+++ b/security/nss/lib/freebl/ldvector.c
@@ -0,0 +1,353 @@
+/*
+ * ldvector.c - platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+extern int FREEBL_InitStubs(void);
+#endif
+
+#include "loader.h"
+#include "alghmac.h"
+#include "hmacct.h"
+#include "blapii.h"
+
+static const struct FREEBLVectorStr vector =
+    {
+
+      sizeof vector,
+      FREEBL_VERSION,
+
+      RSA_NewKey,
+      RSA_PublicKeyOp,
+      RSA_PrivateKeyOp,
+      DSA_NewKey,
+      DSA_SignDigest,
+      DSA_VerifyDigest,
+      DSA_NewKeyFromSeed,
+      DSA_SignDigestWithSeed,
+      DH_GenParam,
+      DH_NewKey,
+      DH_Derive,
+      KEA_Derive,
+      KEA_Verify,
+      RC4_CreateContext,
+      RC4_DestroyContext,
+      RC4_Encrypt,
+      RC4_Decrypt,
+      RC2_CreateContext,
+      RC2_DestroyContext,
+      RC2_Encrypt,
+      RC2_Decrypt,
+      RC5_CreateContext,
+      RC5_DestroyContext,
+      RC5_Encrypt,
+      RC5_Decrypt,
+      DES_CreateContext,
+      DES_DestroyContext,
+      DES_Encrypt,
+      DES_Decrypt,
+      AES_CreateContext,
+      AES_DestroyContext,
+      AES_Encrypt,
+      AES_Decrypt,
+      MD5_Hash,
+      MD5_HashBuf,
+      MD5_NewContext,
+      MD5_DestroyContext,
+      MD5_Begin,
+      MD5_Update,
+      MD5_End,
+      MD5_FlattenSize,
+      MD5_Flatten,
+      MD5_Resurrect,
+      MD5_TraceState,
+      MD2_Hash,
+      MD2_NewContext,
+      MD2_DestroyContext,
+      MD2_Begin,
+      MD2_Update,
+      MD2_End,
+      MD2_FlattenSize,
+      MD2_Flatten,
+      MD2_Resurrect,
+      SHA1_Hash,
+      SHA1_HashBuf,
+      SHA1_NewContext,
+      SHA1_DestroyContext,
+      SHA1_Begin,
+      SHA1_Update,
+      SHA1_End,
+      SHA1_TraceState,
+      SHA1_FlattenSize,
+      SHA1_Flatten,
+      SHA1_Resurrect,
+      RNG_RNGInit,
+      RNG_RandomUpdate,
+      RNG_GenerateGlobalRandomBytes,
+      RNG_RNGShutdown,
+      PQG_ParamGen,
+      PQG_ParamGenSeedLen,
+      PQG_VerifyParams,
+
+      /* End of Version 3.001. */
+
+      RSA_PrivateKeyOpDoubleChecked,
+      RSA_PrivateKeyCheck,
+      BL_Cleanup,
+
+      /* End of Version 3.002. */
+
+      SHA256_NewContext,
+      SHA256_DestroyContext,
+      SHA256_Begin,
+      SHA256_Update,
+      SHA256_End,
+      SHA256_HashBuf,
+      SHA256_Hash,
+      SHA256_TraceState,
+      SHA256_FlattenSize,
+      SHA256_Flatten,
+      SHA256_Resurrect,
+
+      SHA512_NewContext,
+      SHA512_DestroyContext,
+      SHA512_Begin,
+      SHA512_Update,
+      SHA512_End,
+      SHA512_HashBuf,
+      SHA512_Hash,
+      SHA512_TraceState,
+      SHA512_FlattenSize,
+      SHA512_Flatten,
+      SHA512_Resurrect,
+
+      SHA384_NewContext,
+      SHA384_DestroyContext,
+      SHA384_Begin,
+      SHA384_Update,
+      SHA384_End,
+      SHA384_HashBuf,
+      SHA384_Hash,
+      SHA384_TraceState,
+      SHA384_FlattenSize,
+      SHA384_Flatten,
+      SHA384_Resurrect,
+
+      /* End of Version 3.003. */
+
+      AESKeyWrap_CreateContext,
+      AESKeyWrap_DestroyContext,
+      AESKeyWrap_Encrypt,
+      AESKeyWrap_Decrypt,
+
+      /* End of Version 3.004. */
+
+      BLAPI_SHVerify,
+      BLAPI_VerifySelf,
+
+      /* End of Version 3.005. */
+
+      EC_NewKey,
+      EC_NewKeyFromSeed,
+      EC_ValidatePublicKey,
+      ECDH_Derive,
+      ECDSA_SignDigest,
+      ECDSA_VerifyDigest,
+      ECDSA_SignDigestWithSeed,
+
+      /* End of Version 3.006. */
+      /* End of Version 3.007. */
+
+      AES_InitContext,
+      AESKeyWrap_InitContext,
+      DES_InitContext,
+      RC2_InitContext,
+      RC4_InitContext,
+
+      AES_AllocateContext,
+      AESKeyWrap_AllocateContext,
+      DES_AllocateContext,
+      RC2_AllocateContext,
+      RC4_AllocateContext,
+
+      MD2_Clone,
+      MD5_Clone,
+      SHA1_Clone,
+      SHA256_Clone,
+      SHA384_Clone,
+      SHA512_Clone,
+
+      TLS_PRF,
+      HASH_GetRawHashObject,
+
+      HMAC_Create,
+      HMAC_Init,
+      HMAC_Begin,
+      HMAC_Update,
+      HMAC_Clone,
+      HMAC_Finish,
+      HMAC_Destroy,
+
+      RNG_SystemInfoForRNG,
+
+      /* End of Version 3.008. */
+
+      FIPS186Change_GenerateX,
+      FIPS186Change_ReduceModQForDSA,
+
+      /* End of Version 3.009. */
+      Camellia_InitContext,
+      Camellia_AllocateContext,
+      Camellia_CreateContext,
+      Camellia_DestroyContext,
+      Camellia_Encrypt,
+      Camellia_Decrypt,
+
+      PQG_DestroyParams,
+      PQG_DestroyVerify,
+
+      /* End of Version 3.010. */
+
+      SEED_InitContext,
+      SEED_AllocateContext,
+      SEED_CreateContext,
+      SEED_DestroyContext,
+      SEED_Encrypt,
+      SEED_Decrypt,
+
+      BL_Init,
+      BL_SetForkState,
+
+      PRNGTEST_Instantiate,
+      PRNGTEST_Reseed,
+      PRNGTEST_Generate,
+
+      PRNGTEST_Uninstantiate,
+
+      /* End of Version 3.011. */
+
+      RSA_PopulatePrivateKey,
+
+      DSA_NewRandom,
+
+      JPAKE_Sign,
+      JPAKE_Verify,
+      JPAKE_Round2,
+      JPAKE_Final,
+
+      /* End of Version 3.012 */
+
+      TLS_P_hash,
+      SHA224_NewContext,
+      SHA224_DestroyContext,
+      SHA224_Begin,
+      SHA224_Update,
+      SHA224_End,
+      SHA224_HashBuf,
+      SHA224_Hash,
+      SHA224_TraceState,
+      SHA224_FlattenSize,
+      SHA224_Flatten,
+      SHA224_Resurrect,
+      SHA224_Clone,
+      BLAPI_SHVerifyFile,
+
+      /* End of Version 3.013 */
+
+      PQG_ParamGenV2,
+      PRNGTEST_RunHealthTests,
+
+      /* End of Version 3.014 */
+
+      HMAC_ConstantTime,
+      SSLv3_MAC_ConstantTime,
+
+      /* End of Version 3.015 */
+
+      RSA_SignRaw,
+      RSA_CheckSignRaw,
+      RSA_CheckSignRecoverRaw,
+      RSA_EncryptRaw,
+      RSA_DecryptRaw,
+      RSA_EncryptOAEP,
+      RSA_DecryptOAEP,
+      RSA_EncryptBlock,
+      RSA_DecryptBlock,
+      RSA_SignPSS,
+      RSA_CheckSignPSS,
+      RSA_Sign,
+      RSA_CheckSign,
+      RSA_CheckSignRecover,
+
+      /* End of Version 3.016 */
+
+      EC_FillParams,
+      EC_DecodeParams,
+      EC_CopyParams,
+
+      /* End of Version 3.017 */
+
+      ChaCha20Poly1305_InitContext,
+      ChaCha20Poly1305_CreateContext,
+      ChaCha20Poly1305_DestroyContext,
+      ChaCha20Poly1305_Seal,
+      ChaCha20Poly1305_Open,
+
+      /* End of Version 3.018 */
+
+      EC_GetPointSize
+
+      /* End of Version 3.019 */
+    };
+
+const FREEBLVector*
+FREEBL_GetVector(void)
+{
+#ifdef FREEBL_NO_DEPEND
+    SECStatus rv;
+#endif
+
+#define NSS_VERSION_VARIABLE __nss_freebl_version
+#include "verref.h"
+
+#ifdef FREEBL_NO_DEPEND
+    /* this entry point is only valid if nspr and nss-util has been loaded */
+    rv = FREEBL_InitStubs();
+    if (rv != SECSuccess) {
+        return NULL;
+    }
+#endif
+    /* make sure the Full self tests have been run before continuing */
+    BL_POSTRan(PR_FALSE);
+
+    return &vector;
+}
+
+#ifdef FREEBL_LOWHASH
+static const struct NSSLOWVectorStr nssvector =
+    {
+      sizeof nssvector,
+      NSSLOW_VERSION,
+      FREEBL_GetVector,
+      NSSLOW_Init,
+      NSSLOW_Shutdown,
+      NSSLOW_Reset,
+      NSSLOWHASH_NewContext,
+      NSSLOWHASH_Begin,
+      NSSLOWHASH_Update,
+      NSSLOWHASH_End,
+      NSSLOWHASH_Destroy,
+      NSSLOWHASH_Length
+    };
+
+const NSSLOWVector*
+NSSLOW_GetVector(void)
+{
+    /* POST check and  stub init happens in FREEBL_GetVector() and
+     * NSSLOW_Init() respectively */
+    return &nssvector;
+}
+#endif
diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c
new file mode 100644
index 000000000..792171b08
--- /dev/null
+++ b/security/nss/lib/freebl/loader.c
@@ -0,0 +1,2126 @@
+/*
+ * loader.c - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "loader.h"
+#include "prmem.h"
+#include "prerror.h"
+#include "prinit.h"
+#include "prenv.h"
+#include "blname.c"
+
+#include "prio.h"
+#include "prprf.h"
+#include <stdio.h>
+#include "prsystem.h"
+
+static const char *NameOfThisSharedLib =
+    SHLIB_PREFIX "softokn" SOFTOKEN_SHLIB_VERSION "." SHLIB_SUFFIX;
+
+static PRLibrary *blLib = NULL;
+
+#define LSB(x) ((x)&0xff)
+#define MSB(x) ((x) >> 8)
+
+static const FREEBLVector *vector;
+static const char *libraryName = NULL;
+
+#include "genload.c"
+
+/* This function must be run only once. */
+/*  determine if hybrid platform, then actually load the DSO. */
+static PRStatus
+freebl_LoadDSO(void)
+{
+    PRLibrary *handle;
+    const char *name = getLibName();
+
+    if (!name) {
+        PR_SetError(PR_LOAD_LIBRARY_ERROR, 0);
+        return PR_FAILURE;
+    }
+
+    handle = loader_LoadLibrary(name);
+    if (handle) {
+        PRFuncPtr address = PR_FindFunctionSymbol(handle, "FREEBL_GetVector");
+        if (address) {
+            FREEBLGetVectorFn *getVector = (FREEBLGetVectorFn *)address;
+            const FREEBLVector *dsoVector = getVector();
+            if (dsoVector) {
+                unsigned short dsoVersion = dsoVector->version;
+                unsigned short myVersion = FREEBL_VERSION;
+                if (MSB(dsoVersion) == MSB(myVersion) &&
+                    LSB(dsoVersion) >= LSB(myVersion) &&
+                    dsoVector->length >= sizeof(FREEBLVector)) {
+                    vector = dsoVector;
+                    libraryName = name;
+                    blLib = handle;
+                    return PR_SUCCESS;
+                }
+            }
+        }
+#ifdef DEBUG
+        if (blLib) {
+            PRStatus status = PR_UnloadLibrary(blLib);
+            PORT_Assert(PR_SUCCESS == status);
+        }
+#else
+        if (blLib)
+            PR_UnloadLibrary(blLib);
+#endif
+    }
+    return PR_FAILURE;
+}
+
+static const PRCallOnceType pristineCallOnce;
+static PRCallOnceType loadFreeBLOnce;
+
+static PRStatus
+freebl_RunLoaderOnce(void)
+{
+    PRStatus status;
+
+    status = PR_CallOnce(&loadFreeBLOnce, &freebl_LoadDSO);
+    return status;
+}
+
+SECStatus
+BL_Init(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_BL_Init)();
+}
+
+RSAPrivateKey *
+RSA_NewKey(int keySizeInBits, SECItem *publicExponent)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_RSA_NewKey)(keySizeInBits, publicExponent);
+}
+
+SECStatus
+RSA_PublicKeyOp(RSAPublicKey *key,
+                unsigned char *output,
+                const unsigned char *input)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_PublicKeyOp)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyOp(RSAPrivateKey *key,
+                 unsigned char *output,
+                 const unsigned char *input)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_PrivateKeyOp)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+                              unsigned char *output,
+                              const unsigned char *input)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_PrivateKeyOpDoubleChecked)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyCheck(const RSAPrivateKey *key)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_PrivateKeyCheck)(key);
+}
+
+SECStatus
+DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DSA_NewKey)(params, privKey);
+}
+
+SECStatus
+DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DSA_SignDigest)(key, signature, digest);
+}
+
+SECStatus
+DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature,
+                 const SECItem *digest)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DSA_VerifyDigest)(key, signature, digest);
+}
+
+SECStatus
+DSA_NewKeyFromSeed(const PQGParams *params, const unsigned char *seed,
+                   DSAPrivateKey **privKey)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DSA_NewKeyFromSeed)(params, seed, privKey);
+}
+
+SECStatus
+DSA_SignDigestWithSeed(DSAPrivateKey *key, SECItem *signature,
+                       const SECItem *digest, const unsigned char *seed)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DSA_SignDigestWithSeed)(key, signature, digest, seed);
+}
+
+SECStatus
+DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DSA_NewRandom)(arena, q, seed);
+}
+
+SECStatus
+DH_GenParam(int primeLen, DHParams **params)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DH_GenParam)(primeLen, params);
+}
+
+SECStatus
+DH_NewKey(DHParams *params, DHPrivateKey **privKey)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DH_NewKey)(params, privKey);
+}
+
+SECStatus
+DH_Derive(SECItem *publicValue, SECItem *prime, SECItem *privateValue,
+          SECItem *derivedSecret, unsigned int maxOutBytes)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DH_Derive)(publicValue, prime, privateValue,
+                                 derivedSecret, maxOutBytes);
+}
+
+SECStatus
+KEA_Derive(SECItem *prime, SECItem *public1, SECItem *public2,
+           SECItem *private1, SECItem *private2, SECItem *derivedSecret)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_KEA_Derive)(prime, public1, public2,
+                                  private1, private2, derivedSecret);
+}
+
+PRBool
+KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return PR_FALSE;
+    return (vector->p_KEA_Verify)(Y, prime, subPrime);
+}
+
+RC4Context *
+RC4_CreateContext(const unsigned char *key, int len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_RC4_CreateContext)(key, len);
+}
+
+void
+RC4_DestroyContext(RC4Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_RC4_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+RC4_Encrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RC4_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+SECStatus
+RC4_Decrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RC4_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+RC2Context *
+RC2_CreateContext(const unsigned char *key, unsigned int len,
+                  const unsigned char *iv, int mode, unsigned effectiveKeyLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_RC2_CreateContext)(key, len, iv, mode, effectiveKeyLen);
+}
+
+void
+RC2_DestroyContext(RC2Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_RC2_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+RC2_Encrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RC2_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+SECStatus
+RC2_Decrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RC2_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+RC5Context *
+RC5_CreateContext(const SECItem *key, unsigned int rounds,
+                  unsigned int wordSize, const unsigned char *iv, int mode)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_RC5_CreateContext)(key, rounds, wordSize, iv, mode);
+}
+
+void
+RC5_DestroyContext(RC5Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_RC5_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RC5_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+SECStatus
+RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RC5_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+DESContext *
+DES_CreateContext(const unsigned char *key, const unsigned char *iv,
+                  int mode, PRBool encrypt)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_DES_CreateContext)(key, iv, mode, encrypt);
+}
+
+void
+DES_DestroyContext(DESContext *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_DES_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+DES_Encrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DES_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+SECStatus
+DES_Decrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen,
+            unsigned int maxOutputLen, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DES_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+                   int mode, PRBool encrypt)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SEED_CreateContext)(key, iv, mode, encrypt);
+}
+
+void
+SEED_DestroyContext(SEEDContext *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SEED_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen,
+             unsigned int maxOutputLen, const unsigned char *input,
+             unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SEED_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+                                    inputLen);
+}
+
+SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen,
+             unsigned int maxOutputLen, const unsigned char *input,
+             unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SEED_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+                                    inputLen);
+}
+
+AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+                  int mode, int encrypt,
+                  unsigned int keylen, unsigned int blocklen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_AES_CreateContext)(key, iv, mode, encrypt, keylen,
+                                         blocklen);
+}
+
+void
+AES_DestroyContext(AESContext *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_AES_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_AES_Encrypt)(cx, output, outputLen, maxOutputLen,
+                                   input, inputLen);
+}
+
+SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_AES_Decrypt)(cx, output, outputLen, maxOutputLen,
+                                   input, inputLen);
+}
+
+SECStatus
+MD5_Hash(unsigned char *dest, const char *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_MD5_Hash)(dest, src);
+}
+
+SECStatus
+MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_MD5_HashBuf)(dest, src, src_length);
+}
+
+MD5Context *
+MD5_NewContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_MD5_NewContext)();
+}
+
+void
+MD5_DestroyContext(MD5Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD5_DestroyContext)(cx, freeit);
+}
+
+void
+MD5_Begin(MD5Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD5_Begin)(cx);
+}
+
+void
+MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD5_Update)(cx, input, inputLen);
+}
+
+void
+MD5_End(MD5Context *cx, unsigned char *digest,
+        unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD5_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+unsigned int
+MD5_FlattenSize(MD5Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return 0;
+    return (vector->p_MD5_FlattenSize)(cx);
+}
+
+SECStatus
+MD5_Flatten(MD5Context *cx, unsigned char *space)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_MD5_Flatten)(cx, space);
+}
+
+MD5Context *
+MD5_Resurrect(unsigned char *space, void *arg)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_MD5_Resurrect)(space, arg);
+}
+
+void
+MD5_TraceState(MD5Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD5_TraceState)(cx);
+}
+
+SECStatus
+MD2_Hash(unsigned char *dest, const char *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_MD2_Hash)(dest, src);
+}
+
+MD2Context *
+MD2_NewContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_MD2_NewContext)();
+}
+
+void
+MD2_DestroyContext(MD2Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD2_DestroyContext)(cx, freeit);
+}
+
+void
+MD2_Begin(MD2Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD2_Begin)(cx);
+}
+
+void
+MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD2_Update)(cx, input, inputLen);
+}
+
+void
+MD2_End(MD2Context *cx, unsigned char *digest,
+        unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD2_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+unsigned int
+MD2_FlattenSize(MD2Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return 0;
+    return (vector->p_MD2_FlattenSize)(cx);
+}
+
+SECStatus
+MD2_Flatten(MD2Context *cx, unsigned char *space)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_MD2_Flatten)(cx, space);
+}
+
+MD2Context *
+MD2_Resurrect(unsigned char *space, void *arg)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_MD2_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA1_Hash(unsigned char *dest, const char *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA1_Hash)(dest, src);
+}
+
+SECStatus
+SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA1_HashBuf)(dest, src, src_length);
+}
+
+SHA1Context *
+SHA1_NewContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA1_NewContext)();
+}
+
+void
+SHA1_DestroyContext(SHA1Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA1_DestroyContext)(cx, freeit);
+}
+
+void
+SHA1_Begin(SHA1Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA1_Begin)(cx);
+}
+
+void
+SHA1_Update(SHA1Context *cx, const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA1_Update)(cx, input, inputLen);
+}
+
+void
+SHA1_End(SHA1Context *cx, unsigned char *digest,
+         unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA1_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA1_TraceState(SHA1Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA1_TraceState)(cx);
+}
+
+unsigned int
+SHA1_FlattenSize(SHA1Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return 0;
+    return (vector->p_SHA1_FlattenSize)(cx);
+}
+
+SECStatus
+SHA1_Flatten(SHA1Context *cx, unsigned char *space)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA1_Flatten)(cx, space);
+}
+
+SHA1Context *
+SHA1_Resurrect(unsigned char *space, void *arg)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA1_Resurrect)(space, arg);
+}
+
+SECStatus
+RNG_RNGInit(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RNG_RNGInit)();
+}
+
+SECStatus
+RNG_RandomUpdate(const void *data, size_t bytes)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RNG_RandomUpdate)(data, bytes);
+}
+
+SECStatus
+RNG_GenerateGlobalRandomBytes(void *dest, size_t len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RNG_GenerateGlobalRandomBytes)(dest, len);
+}
+
+void
+RNG_RNGShutdown(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_RNG_RNGShutdown)();
+}
+
+SECStatus
+PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_PQG_ParamGen)(j, pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes,
+                    PQGParams **pParams, PQGVerify **pVfy)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_PQG_ParamGenSeedLen)(j, seedBytes, pParams, pVfy);
+}
+
+SECStatus
+PQG_VerifyParams(const PQGParams *params, const PQGVerify *vfy,
+                 SECStatus *result)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_PQG_VerifyParams)(params, vfy, result);
+}
+
+void
+PQG_DestroyParams(PQGParams *params)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_PQG_DestroyParams)(params);
+}
+
+void
+PQG_DestroyVerify(PQGVerify *vfy)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_PQG_DestroyVerify)(vfy);
+}
+
+void
+BL_Cleanup(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_BL_Cleanup)();
+}
+
+void
+BL_Unload(void)
+{
+    /* This function is not thread-safe, but doesn't need to be, because it is
+     * only called from functions that are also defined as not thread-safe,
+     * namely C_Finalize in softoken, and the SSL bypass shutdown callback called
+     * from NSS_Shutdown. */
+    char *disableUnload = NULL;
+    vector = NULL;
+    disableUnload = PR_GetEnvSecure("NSS_DISABLE_UNLOAD");
+    if (blLib && !disableUnload) {
+#ifdef DEBUG
+        PRStatus status = PR_UnloadLibrary(blLib);
+        PORT_Assert(PR_SUCCESS == status);
+#else
+        PR_UnloadLibrary(blLib);
+#endif
+    }
+    blLib = NULL;
+    loadFreeBLOnce = pristineCallOnce;
+}
+
+/* ============== New for 3.003 =============================== */
+
+SECStatus
+SHA256_Hash(unsigned char *dest, const char *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA256_Hash)(dest, src);
+}
+
+SECStatus
+SHA256_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA256_HashBuf)(dest, src, src_length);
+}
+
+SHA256Context *
+SHA256_NewContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA256_NewContext)();
+}
+
+void
+SHA256_DestroyContext(SHA256Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA256_DestroyContext)(cx, freeit);
+}
+
+void
+SHA256_Begin(SHA256Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA256_Begin)(cx);
+}
+
+void
+SHA256_Update(SHA256Context *cx, const unsigned char *input,
+              unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA256_Update)(cx, input, inputLen);
+}
+
+void
+SHA256_End(SHA256Context *cx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA256_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA256_TraceState(SHA256Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA256_TraceState)(cx);
+}
+
+unsigned int
+SHA256_FlattenSize(SHA256Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return 0;
+    return (vector->p_SHA256_FlattenSize)(cx);
+}
+
+SECStatus
+SHA256_Flatten(SHA256Context *cx, unsigned char *space)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA256_Flatten)(cx, space);
+}
+
+SHA256Context *
+SHA256_Resurrect(unsigned char *space, void *arg)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA256_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA512_Hash(unsigned char *dest, const char *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA512_Hash)(dest, src);
+}
+
+SECStatus
+SHA512_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA512_HashBuf)(dest, src, src_length);
+}
+
+SHA512Context *
+SHA512_NewContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA512_NewContext)();
+}
+
+void
+SHA512_DestroyContext(SHA512Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA512_DestroyContext)(cx, freeit);
+}
+
+void
+SHA512_Begin(SHA512Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA512_Begin)(cx);
+}
+
+void
+SHA512_Update(SHA512Context *cx, const unsigned char *input,
+              unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA512_Update)(cx, input, inputLen);
+}
+
+void
+SHA512_End(SHA512Context *cx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA512_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA512_TraceState(SHA512Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA512_TraceState)(cx);
+}
+
+unsigned int
+SHA512_FlattenSize(SHA512Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return 0;
+    return (vector->p_SHA512_FlattenSize)(cx);
+}
+
+SECStatus
+SHA512_Flatten(SHA512Context *cx, unsigned char *space)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA512_Flatten)(cx, space);
+}
+
+SHA512Context *
+SHA512_Resurrect(unsigned char *space, void *arg)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA512_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA384_Hash(unsigned char *dest, const char *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA384_Hash)(dest, src);
+}
+
+SECStatus
+SHA384_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA384_HashBuf)(dest, src, src_length);
+}
+
+SHA384Context *
+SHA384_NewContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA384_NewContext)();
+}
+
+void
+SHA384_DestroyContext(SHA384Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA384_DestroyContext)(cx, freeit);
+}
+
+void
+SHA384_Begin(SHA384Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA384_Begin)(cx);
+}
+
+void
+SHA384_Update(SHA384Context *cx, const unsigned char *input,
+              unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA384_Update)(cx, input, inputLen);
+}
+
+void
+SHA384_End(SHA384Context *cx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA384_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA384_TraceState(SHA384Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA384_TraceState)(cx);
+}
+
+unsigned int
+SHA384_FlattenSize(SHA384Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return 0;
+    return (vector->p_SHA384_FlattenSize)(cx);
+}
+
+SECStatus
+SHA384_Flatten(SHA384Context *cx, unsigned char *space)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA384_Flatten)(cx, space);
+}
+
+SHA384Context *
+SHA384_Resurrect(unsigned char *space, void *arg)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA384_Resurrect)(space, arg);
+}
+
+AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+                         int encrypt, unsigned int keylen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return vector->p_AESKeyWrap_CreateContext(key, iv, encrypt, keylen);
+}
+
+void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    vector->p_AESKeyWrap_DestroyContext(cx, freeit);
+}
+
+SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+                   unsigned int *outputLen, unsigned int maxOutputLen,
+                   const unsigned char *input, unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return vector->p_AESKeyWrap_Encrypt(cx, output, outputLen, maxOutputLen,
+                                        input, inputLen);
+}
+SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+                   unsigned int *outputLen, unsigned int maxOutputLen,
+                   const unsigned char *input, unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return vector->p_AESKeyWrap_Decrypt(cx, output, outputLen, maxOutputLen,
+                                        input, inputLen);
+}
+
+PRBool
+BLAPI_SHVerify(const char *name, PRFuncPtr addr)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return PR_FALSE;
+    return vector->p_BLAPI_SHVerify(name, addr);
+}
+
+/*
+ * The Caller is expected to pass NULL as the name, which will
+ * trigger the p_BLAPI_VerifySelf() to return 'TRUE'. Pass the real
+ * name of the shared library we loaded (the static libraryName set
+ * in freebl_LoadDSO) to p_BLAPI_VerifySelf.
+ */
+PRBool
+BLAPI_VerifySelf(const char *name)
+{
+    PORT_Assert(!name);
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return PR_FALSE;
+    return vector->p_BLAPI_VerifySelf(libraryName);
+}
+
+/* ============== New for 3.006 =============================== */
+
+SECStatus
+EC_NewKey(ECParams *params, ECPrivateKey **privKey)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_EC_NewKey)(params, privKey);
+}
+
+SECStatus
+EC_NewKeyFromSeed(ECParams *params, ECPrivateKey **privKey,
+                  const unsigned char *seed, int seedlen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_EC_NewKeyFromSeed)(params, privKey, seed, seedlen);
+}
+
+SECStatus
+EC_ValidatePublicKey(ECParams *params, SECItem *publicValue)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_EC_ValidatePublicKey)(params, publicValue);
+}
+
+SECStatus
+ECDH_Derive(SECItem *publicValue, ECParams *params, SECItem *privateValue,
+            PRBool withCofactor, SECItem *derivedSecret)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_ECDH_Derive)(publicValue, params, privateValue,
+                                   withCofactor, derivedSecret);
+}
+
+SECStatus
+ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature,
+                 const SECItem *digest)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_ECDSA_SignDigest)(key, signature, digest);
+}
+
+SECStatus
+ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature,
+                   const SECItem *digest)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_ECDSA_VerifyDigest)(key, signature, digest);
+}
+
+SECStatus
+ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
+                         const SECItem *digest, const unsigned char *seed, const int seedlen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_ECDSA_SignDigestWithSeed)(key, signature, digest,
+                                                seed, seedlen);
+}
+
+/* ============== New for 3.008 =============================== */
+
+AESContext *
+AES_AllocateContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_AES_AllocateContext)();
+}
+
+AESKeyWrapContext *
+AESKeyWrap_AllocateContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_AESKeyWrap_AllocateContext)();
+}
+
+DESContext *
+DES_AllocateContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_DES_AllocateContext)();
+}
+
+RC2Context *
+RC2_AllocateContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_RC2_AllocateContext)();
+}
+
+RC4Context *
+RC4_AllocateContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_RC4_AllocateContext)();
+}
+
+SECStatus
+AES_InitContext(AESContext *cx, const unsigned char *key,
+                unsigned int keylen, const unsigned char *iv, int mode,
+                unsigned int encrypt, unsigned int blocklen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_AES_InitContext)(cx, key, keylen, iv, mode, encrypt,
+                                       blocklen);
+}
+
+SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx, const unsigned char *key,
+                       unsigned int keylen, const unsigned char *iv, int mode,
+                       unsigned int encrypt, unsigned int blocklen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_AESKeyWrap_InitContext)(cx, key, keylen, iv, mode,
+                                              encrypt, blocklen);
+}
+
+SECStatus
+DES_InitContext(DESContext *cx, const unsigned char *key,
+                unsigned int keylen, const unsigned char *iv, int mode,
+                unsigned int encrypt, unsigned int xtra)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_DES_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra);
+}
+
+SECStatus
+SEED_InitContext(SEEDContext *cx, const unsigned char *key,
+                 unsigned int keylen, const unsigned char *iv, int mode,
+                 unsigned int encrypt, unsigned int xtra)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SEED_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra);
+}
+
+SECStatus
+RC2_InitContext(RC2Context *cx, const unsigned char *key,
+                unsigned int keylen, const unsigned char *iv, int mode,
+                unsigned int effectiveKeyLen, unsigned int xtra)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RC2_InitContext)(cx, key, keylen, iv, mode,
+                                       effectiveKeyLen, xtra);
+}
+
+SECStatus
+RC4_InitContext(RC4Context *cx, const unsigned char *key,
+                unsigned int keylen, const unsigned char *x1, int x2,
+                unsigned int x3, unsigned int x4)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RC4_InitContext)(cx, key, keylen, x1, x2, x3, x4);
+}
+
+void
+MD2_Clone(MD2Context *dest, MD2Context *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD2_Clone)(dest, src);
+}
+
+void
+MD5_Clone(MD5Context *dest, MD5Context *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_MD5_Clone)(dest, src);
+}
+
+void
+SHA1_Clone(SHA1Context *dest, SHA1Context *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA1_Clone)(dest, src);
+}
+
+void
+SHA256_Clone(SHA256Context *dest, SHA256Context *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA256_Clone)(dest, src);
+}
+
+void
+SHA384_Clone(SHA384Context *dest, SHA384Context *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA384_Clone)(dest, src);
+}
+
+void
+SHA512_Clone(SHA512Context *dest, SHA512Context *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA512_Clone)(dest, src);
+}
+
+SECStatus
+TLS_PRF(const SECItem *secret, const char *label,
+        SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_TLS_PRF)(secret, label, seed, result, isFIPS);
+}
+
+const SECHashObject *
+HASH_GetRawHashObject(HASH_HashType hashType)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_HASH_GetRawHashObject)(hashType);
+}
+
+void
+HMAC_Destroy(HMACContext *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_HMAC_Destroy)(cx, freeit);
+}
+
+HMACContext *
+HMAC_Create(const SECHashObject *hashObj, const unsigned char *secret,
+            unsigned int secret_len, PRBool isFIPS)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_HMAC_Create)(hashObj, secret, secret_len, isFIPS);
+}
+
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hashObj,
+          const unsigned char *secret, unsigned int secret_len, PRBool isFIPS)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_HMAC_Init)(cx, hashObj, secret, secret_len, isFIPS);
+}
+
+void
+HMAC_Begin(HMACContext *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_HMAC_Begin)(cx);
+}
+
+void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_HMAC_Update)(cx, data, data_len);
+}
+
+SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+            unsigned int max_result_len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_HMAC_Finish)(cx, result, result_len, max_result_len);
+}
+
+HMACContext *
+HMAC_Clone(HMACContext *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_HMAC_Clone)(cx);
+}
+
+void
+RNG_SystemInfoForRNG(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_RNG_SystemInfoForRNG)();
+}
+
+SECStatus
+FIPS186Change_GenerateX(unsigned char *XKEY, const unsigned char *XSEEDj,
+                        unsigned char *x_j)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_FIPS186Change_GenerateX)(XKEY, XSEEDj, x_j);
+}
+
+SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+                               const unsigned char *q,
+                               unsigned char *xj)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_FIPS186Change_ReduceModQForDSA)(w, q, xj);
+}
+
+/* === new for Camellia === */
+SECStatus
+Camellia_InitContext(CamelliaContext *cx, const unsigned char *key,
+                     unsigned int keylen, const unsigned char *iv, int mode,
+                     unsigned int encrypt, unsigned int unused)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_Camellia_InitContext)(cx, key, keylen, iv, mode, encrypt,
+                                            unused);
+}
+
+CamelliaContext *
+Camellia_AllocateContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_Camellia_AllocateContext)();
+}
+
+CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+                       int mode, int encrypt,
+                       unsigned int keylen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_Camellia_CreateContext)(key, iv, mode, encrypt, keylen);
+}
+
+void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_Camellia_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+                 unsigned int *outputLen, unsigned int maxOutputLen,
+                 const unsigned char *input, unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_Camellia_Encrypt)(cx, output, outputLen, maxOutputLen,
+                                        input, inputLen);
+}
+
+SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+                 unsigned int *outputLen, unsigned int maxOutputLen,
+                 const unsigned char *input, unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_Camellia_Decrypt)(cx, output, outputLen, maxOutputLen,
+                                        input, inputLen);
+}
+
+void
+BL_SetForkState(PRBool forked)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_BL_SetForkState)(forked);
+}
+
+SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+                     const PRUint8 *nonce, unsigned int nonce_len,
+                     const PRUint8 *personal_string, unsigned int ps_len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_PRNGTEST_Instantiate)(entropy, entropy_len,
+                                            nonce, nonce_len,
+                                            personal_string, ps_len);
+}
+
+SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+                const PRUint8 *additional, unsigned int additional_len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_PRNGTEST_Reseed)(entropy, entropy_len,
+                                       additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+                  const PRUint8 *additional, unsigned int additional_len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_PRNGTEST_Generate)(bytes, bytes_len,
+                                         additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Uninstantiate()
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_PRNGTEST_Uninstantiate)();
+}
+
+SECStatus
+RSA_PopulatePrivateKey(RSAPrivateKey *key)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_PopulatePrivateKey)(key);
+}
+
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+           const SECItem *signerID, const SECItem *x,
+           const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+           SECItem *gv, SECItem *r)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_JPAKE_Sign)(arena, pqg, hashType, signerID, x,
+                                  testRandom, gxIn, gxOut, gv, r);
+}
+
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg,
+             HASH_HashType hashType, const SECItem *signerID,
+             const SECItem *peerID, const SECItem *gx,
+             const SECItem *gv, const SECItem *r)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_JPAKE_Verify)(arena, pqg, hashType, signerID, peerID,
+                                    gx, gv, r);
+}
+
+SECStatus
+JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+             const SECItem *gx1, const SECItem *gx3, const SECItem *gx4,
+             SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_JPAKE_Round2)(arena, p, q, gx1, gx3, gx4, base, x2, s, x2s);
+}
+
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+            const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+            const SECItem *B, SECItem *K)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_JPAKE_Final)(arena, p, q, x2, gx4, x2s, B, K);
+}
+
+SECStatus
+TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label,
+           SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_TLS_P_hash)(hashAlg, secret, label, seed, result, isFIPS);
+}
+
+SECStatus
+SHA224_Hash(unsigned char *dest, const char *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA224_Hash)(dest, src);
+}
+
+SECStatus
+SHA224_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA224_HashBuf)(dest, src, src_length);
+}
+
+SHA224Context *
+SHA224_NewContext(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA224_NewContext)();
+}
+
+void
+SHA224_DestroyContext(SHA224Context *cx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA224_DestroyContext)(cx, freeit);
+}
+
+void
+SHA224_Begin(SHA256Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA224_Begin)(cx);
+}
+
+void
+SHA224_Update(SHA224Context *cx, const unsigned char *input,
+              unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA224_Update)(cx, input, inputLen);
+}
+
+void
+SHA224_End(SHA224Context *cx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA224_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA224_TraceState(SHA224Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA224_TraceState)(cx);
+}
+
+unsigned int
+SHA224_FlattenSize(SHA224Context *cx)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return 0;
+    return (vector->p_SHA224_FlattenSize)(cx);
+}
+
+SECStatus
+SHA224_Flatten(SHA224Context *cx, unsigned char *space)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SHA224_Flatten)(cx, space);
+}
+
+SHA224Context *
+SHA224_Resurrect(unsigned char *space, void *arg)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_SHA224_Resurrect)(space, arg);
+}
+
+void
+SHA224_Clone(SHA224Context *dest, SHA224Context *src)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_SHA224_Clone)(dest, src);
+}
+
+PRBool
+BLAPI_SHVerifyFile(const char *name)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return PR_FALSE;
+    return vector->p_BLAPI_SHVerifyFile(name);
+}
+
+/* === new for DSA-2 === */
+SECStatus
+PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes,
+               PQGParams **pParams, PQGVerify **pVfy)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_PQG_ParamGenV2)(L, N, seedBytes, pParams, pVfy);
+}
+
+SECStatus
+PRNGTEST_RunHealthTests(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return vector->p_PRNGTEST_RunHealthTests();
+}
+
+SECStatus
+SSLv3_MAC_ConstantTime(
+    unsigned char *result,
+    unsigned int *resultLen,
+    unsigned int maxResultLen,
+    const SECHashObject *hashObj,
+    const unsigned char *secret,
+    unsigned int secretLen,
+    const unsigned char *header,
+    unsigned int headerLen,
+    const unsigned char *body,
+    unsigned int bodyLen,
+    unsigned int bodyTotalLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_SSLv3_MAC_ConstantTime)(
+        result, resultLen, maxResultLen,
+        hashObj,
+        secret, secretLen,
+        header, headerLen,
+        body, bodyLen, bodyTotalLen);
+}
+
+SECStatus
+HMAC_ConstantTime(
+    unsigned char *result,
+    unsigned int *resultLen,
+    unsigned int maxResultLen,
+    const SECHashObject *hashObj,
+    const unsigned char *secret,
+    unsigned int secretLen,
+    const unsigned char *header,
+    unsigned int headerLen,
+    const unsigned char *body,
+    unsigned int bodyLen,
+    unsigned int bodyTotalLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_HMAC_ConstantTime)(
+        result, resultLen, maxResultLen,
+        hashObj,
+        secret, secretLen,
+        header, headerLen,
+        body, bodyLen, bodyTotalLen);
+}
+
+SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+            unsigned char *output,
+            unsigned int *outputLen,
+            unsigned int maxOutputLen,
+            const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_SignRaw)(key, output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+                 const unsigned char *sig,
+                 unsigned int sigLen,
+                 const unsigned char *hash,
+                 unsigned int hashLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_CheckSignRaw)(key, sig, sigLen, hash, hashLen);
+}
+
+SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+                        unsigned char *data,
+                        unsigned int *dataLen,
+                        unsigned int maxDataLen,
+                        const unsigned char *sig,
+                        unsigned int sigLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_CheckSignRecoverRaw)(key, data, dataLen, maxDataLen,
+                                               sig, sigLen);
+}
+
+SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+               unsigned char *output,
+               unsigned int *outputLen,
+               unsigned int maxOutputLen,
+               const unsigned char *input,
+               unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_EncryptRaw)(key, output, outputLen, maxOutputLen,
+                                      input, inputLen);
+}
+
+SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+               unsigned char *output,
+               unsigned int *outputLen,
+               unsigned int maxOutputLen,
+               const unsigned char *input,
+               unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_DecryptRaw)(key, output, outputLen, maxOutputLen,
+                                      input, inputLen);
+}
+
+SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *label,
+                unsigned int labelLen,
+                const unsigned char *seed,
+                unsigned int seedLen,
+                unsigned char *output,
+                unsigned int *outputLen,
+                unsigned int maxOutputLen,
+                const unsigned char *input,
+                unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_EncryptOAEP)(key, hashAlg, maskHashAlg, label,
+                                       labelLen, seed, seedLen, output,
+                                       outputLen, maxOutputLen, input, inputLen);
+}
+
+SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *label,
+                unsigned int labelLen,
+                unsigned char *output,
+                unsigned int *outputLen,
+                unsigned int maxOutputLen,
+                const unsigned char *input,
+                unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_DecryptOAEP)(key, hashAlg, maskHashAlg, label,
+                                       labelLen, output, outputLen,
+                                       maxOutputLen, input, inputLen);
+}
+
+SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+                 unsigned char *output,
+                 unsigned int *outputLen,
+                 unsigned int maxOutputLen,
+                 const unsigned char *input,
+                 unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_EncryptBlock)(key, output, outputLen, maxOutputLen,
+                                        input, inputLen);
+}
+
+SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+                 unsigned char *output,
+                 unsigned int *outputLen,
+                 unsigned int maxOutputLen,
+                 const unsigned char *input,
+                 unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_DecryptBlock)(key, output, outputLen, maxOutputLen,
+                                        input, inputLen);
+}
+
+SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+            HASH_HashType hashAlg,
+            HASH_HashType maskHashAlg,
+            const unsigned char *salt,
+            unsigned int saltLen,
+            unsigned char *output,
+            unsigned int *outputLen,
+            unsigned int maxOutputLen,
+            const unsigned char *input,
+            unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_SignPSS)(key, hashAlg, maskHashAlg, salt, saltLen,
+                                   output, outputLen, maxOutputLen, input,
+                                   inputLen);
+}
+
+SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+                 HASH_HashType hashAlg,
+                 HASH_HashType maskHashAlg,
+                 unsigned int saltLen,
+                 const unsigned char *sig,
+                 unsigned int sigLen,
+                 const unsigned char *hash,
+                 unsigned int hashLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_CheckSignPSS)(key, hashAlg, maskHashAlg, saltLen,
+                                        sig, sigLen, hash, hashLen);
+}
+
+SECStatus
+RSA_Sign(RSAPrivateKey *key,
+         unsigned char *output,
+         unsigned int *outputLen,
+         unsigned int maxOutputLen,
+         const unsigned char *input,
+         unsigned int inputLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_Sign)(key, output, outputLen, maxOutputLen, input,
+                                inputLen);
+}
+
+SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+              const unsigned char *sig,
+              unsigned int sigLen,
+              const unsigned char *data,
+              unsigned int dataLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_CheckSign)(key, sig, sigLen, data, dataLen);
+}
+
+SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+                     unsigned char *output,
+                     unsigned int *outputLen,
+                     unsigned int maxOutputLen,
+                     const unsigned char *sig,
+                     unsigned int sigLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_RSA_CheckSignRecover)(key, output, outputLen, maxOutputLen,
+                                            sig, sigLen);
+}
+
+SECStatus
+EC_FillParams(PLArenaPool *arena,
+              const SECItem *encodedParams,
+              ECParams *params)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_EC_FillParams)(arena, encodedParams, params);
+}
+
+SECStatus
+EC_DecodeParams(const SECItem *encodedParams,
+                ECParams **ecparams)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_EC_DecodeParams)(encodedParams, ecparams);
+}
+
+SECStatus
+EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+              const ECParams *srcParams)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_EC_CopyParams)(arena, dstParams, srcParams);
+}
+
+SECStatus
+ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+                             const unsigned char *key, unsigned int keyLen,
+                             unsigned int tagLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_ChaCha20Poly1305_InitContext)(ctx, key, keyLen, tagLen);
+}
+
+ChaCha20Poly1305Context *
+ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen,
+                               unsigned int tagLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_ChaCha20Poly1305_CreateContext)(key, keyLen, tagLen);
+}
+
+void
+ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_ChaCha20Poly1305_DestroyContext)(ctx, freeit);
+}
+
+SECStatus
+ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx,
+                      unsigned char *output, unsigned int *outputLen,
+                      unsigned int maxOutputLen,
+                      const unsigned char *input, unsigned int inputLen,
+                      const unsigned char *nonce, unsigned int nonceLen,
+                      const unsigned char *ad, unsigned int adLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_ChaCha20Poly1305_Seal)(
+        ctx, output, outputLen, maxOutputLen, input, inputLen,
+        nonce, nonceLen, ad, adLen);
+}
+
+SECStatus
+ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx,
+                      unsigned char *output, unsigned int *outputLen,
+                      unsigned int maxOutputLen,
+                      const unsigned char *input, unsigned int inputLen,
+                      const unsigned char *nonce, unsigned int nonceLen,
+                      const unsigned char *ad, unsigned int adLen)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_ChaCha20Poly1305_Open)(
+        ctx, output, outputLen, maxOutputLen, input, inputLen,
+        nonce, nonceLen, ad, adLen);
+}
+
+int
+EC_GetPointSize(const ECParams *params)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return SECFailure;
+    return (vector->p_EC_GetPointSize)(params);
+}
diff --git a/security/nss/lib/freebl/loader.h b/security/nss/lib/freebl/loader.h
new file mode 100644
index 000000000..ed392cc47
--- /dev/null
+++ b/security/nss/lib/freebl/loader.h
@@ -0,0 +1,788 @@
+/*
+ * loader.h - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _LOADER_H_
+#define _LOADER_H_ 1
+
+#include "blapi.h"
+
+#define FREEBL_VERSION 0x0313
+
+struct FREEBLVectorStr {
+
+    unsigned short length;  /* of this struct in bytes */
+    unsigned short version; /* of this struct. */
+
+    RSAPrivateKey *(*p_RSA_NewKey)(int keySizeInBits,
+                                   SECItem *publicExponent);
+
+    SECStatus (*p_RSA_PublicKeyOp)(RSAPublicKey *key,
+                                   unsigned char *output,
+                                   const unsigned char *input);
+
+    SECStatus (*p_RSA_PrivateKeyOp)(RSAPrivateKey *key,
+                                    unsigned char *output,
+                                    const unsigned char *input);
+
+    SECStatus (*p_DSA_NewKey)(const PQGParams *params,
+                              DSAPrivateKey **privKey);
+
+    SECStatus (*p_DSA_SignDigest)(DSAPrivateKey *key,
+                                  SECItem *signature,
+                                  const SECItem *digest);
+
+    SECStatus (*p_DSA_VerifyDigest)(DSAPublicKey *key,
+                                    const SECItem *signature,
+                                    const SECItem *digest);
+
+    SECStatus (*p_DSA_NewKeyFromSeed)(const PQGParams *params,
+                                      const unsigned char *seed,
+                                      DSAPrivateKey **privKey);
+
+    SECStatus (*p_DSA_SignDigestWithSeed)(DSAPrivateKey *key,
+                                          SECItem *signature,
+                                          const SECItem *digest,
+                                          const unsigned char *seed);
+
+    SECStatus (*p_DH_GenParam)(int primeLen, DHParams **params);
+
+    SECStatus (*p_DH_NewKey)(DHParams *params,
+                             DHPrivateKey **privKey);
+
+    SECStatus (*p_DH_Derive)(SECItem *publicValue,
+                             SECItem *prime,
+                             SECItem *privateValue,
+                             SECItem *derivedSecret,
+                             unsigned int maxOutBytes);
+
+    SECStatus (*p_KEA_Derive)(SECItem *prime,
+                              SECItem *public1,
+                              SECItem *public2,
+                              SECItem *private1,
+                              SECItem *private2,
+                              SECItem *derivedSecret);
+
+    PRBool (*p_KEA_Verify)(SECItem *Y, SECItem *prime, SECItem *subPrime);
+
+    RC4Context *(*p_RC4_CreateContext)(const unsigned char *key, int len);
+
+    void (*p_RC4_DestroyContext)(RC4Context *cx, PRBool freeit);
+
+    SECStatus (*p_RC4_Encrypt)(RC4Context *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_RC4_Decrypt)(RC4Context *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    RC2Context *(*p_RC2_CreateContext)(const unsigned char *key,
+                                       unsigned int len, const unsigned char *iv,
+                                       int mode, unsigned effectiveKeyLen);
+
+    void (*p_RC2_DestroyContext)(RC2Context *cx, PRBool freeit);
+
+    SECStatus (*p_RC2_Encrypt)(RC2Context *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_RC2_Decrypt)(RC2Context *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    RC5Context *(*p_RC5_CreateContext)(const SECItem *key, unsigned int rounds,
+                                       unsigned int wordSize, const unsigned char *iv, int mode);
+
+    void (*p_RC5_DestroyContext)(RC5Context *cx, PRBool freeit);
+
+    SECStatus (*p_RC5_Encrypt)(RC5Context *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_RC5_Decrypt)(RC5Context *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    DESContext *(*p_DES_CreateContext)(const unsigned char *key,
+                                       const unsigned char *iv,
+                                       int mode, PRBool encrypt);
+
+    void (*p_DES_DestroyContext)(DESContext *cx, PRBool freeit);
+
+    SECStatus (*p_DES_Encrypt)(DESContext *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_DES_Decrypt)(DESContext *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    AESContext *(*p_AES_CreateContext)(const unsigned char *key,
+                                       const unsigned char *iv,
+                                       int mode, int encrypt, unsigned int keylen,
+                                       unsigned int blocklen);
+
+    void (*p_AES_DestroyContext)(AESContext *cx, PRBool freeit);
+
+    SECStatus (*p_AES_Encrypt)(AESContext *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_AES_Decrypt)(AESContext *cx, unsigned char *output,
+                               unsigned int *outputLen, unsigned int maxOutputLen,
+                               const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_MD5_Hash)(unsigned char *dest, const char *src);
+
+    SECStatus (*p_MD5_HashBuf)(unsigned char *dest, const unsigned char *src,
+                               PRUint32 src_length);
+
+    MD5Context *(*p_MD5_NewContext)(void);
+
+    void (*p_MD5_DestroyContext)(MD5Context *cx, PRBool freeit);
+
+    void (*p_MD5_Begin)(MD5Context *cx);
+
+    void (*p_MD5_Update)(MD5Context *cx,
+                         const unsigned char *input, unsigned int inputLen);
+
+    void (*p_MD5_End)(MD5Context *cx, unsigned char *digest,
+                      unsigned int *digestLen, unsigned int maxDigestLen);
+
+    unsigned int (*p_MD5_FlattenSize)(MD5Context *cx);
+
+    SECStatus (*p_MD5_Flatten)(MD5Context *cx, unsigned char *space);
+
+    MD5Context *(*p_MD5_Resurrect)(unsigned char *space, void *arg);
+
+    void (*p_MD5_TraceState)(MD5Context *cx);
+
+    SECStatus (*p_MD2_Hash)(unsigned char *dest, const char *src);
+
+    MD2Context *(*p_MD2_NewContext)(void);
+
+    void (*p_MD2_DestroyContext)(MD2Context *cx, PRBool freeit);
+
+    void (*p_MD2_Begin)(MD2Context *cx);
+
+    void (*p_MD2_Update)(MD2Context *cx,
+                         const unsigned char *input, unsigned int inputLen);
+
+    void (*p_MD2_End)(MD2Context *cx, unsigned char *digest,
+                      unsigned int *digestLen, unsigned int maxDigestLen);
+
+    unsigned int (*p_MD2_FlattenSize)(MD2Context *cx);
+
+    SECStatus (*p_MD2_Flatten)(MD2Context *cx, unsigned char *space);
+
+    MD2Context *(*p_MD2_Resurrect)(unsigned char *space, void *arg);
+
+    SECStatus (*p_SHA1_Hash)(unsigned char *dest, const char *src);
+
+    SECStatus (*p_SHA1_HashBuf)(unsigned char *dest, const unsigned char *src,
+                                PRUint32 src_length);
+
+    SHA1Context *(*p_SHA1_NewContext)(void);
+
+    void (*p_SHA1_DestroyContext)(SHA1Context *cx, PRBool freeit);
+
+    void (*p_SHA1_Begin)(SHA1Context *cx);
+
+    void (*p_SHA1_Update)(SHA1Context *cx, const unsigned char *input,
+                          unsigned int inputLen);
+
+    void (*p_SHA1_End)(SHA1Context *cx, unsigned char *digest,
+                       unsigned int *digestLen, unsigned int maxDigestLen);
+
+    void (*p_SHA1_TraceState)(SHA1Context *cx);
+
+    unsigned int (*p_SHA1_FlattenSize)(SHA1Context *cx);
+
+    SECStatus (*p_SHA1_Flatten)(SHA1Context *cx, unsigned char *space);
+
+    SHA1Context *(*p_SHA1_Resurrect)(unsigned char *space, void *arg);
+
+    SECStatus (*p_RNG_RNGInit)(void);
+
+    SECStatus (*p_RNG_RandomUpdate)(const void *data, size_t bytes);
+
+    SECStatus (*p_RNG_GenerateGlobalRandomBytes)(void *dest, size_t len);
+
+    void (*p_RNG_RNGShutdown)(void);
+
+    SECStatus (*p_PQG_ParamGen)(unsigned int j, PQGParams **pParams,
+                                PQGVerify **pVfy);
+
+    SECStatus (*p_PQG_ParamGenSeedLen)(unsigned int j, unsigned int seedBytes,
+                                       PQGParams **pParams, PQGVerify **pVfy);
+
+    SECStatus (*p_PQG_VerifyParams)(const PQGParams *params,
+                                    const PQGVerify *vfy, SECStatus *result);
+
+    /* Version 3.001 came to here */
+
+    SECStatus (*p_RSA_PrivateKeyOpDoubleChecked)(RSAPrivateKey *key,
+                                                 unsigned char *output,
+                                                 const unsigned char *input);
+
+    SECStatus (*p_RSA_PrivateKeyCheck)(const RSAPrivateKey *key);
+
+    void (*p_BL_Cleanup)(void);
+
+    /* Version 3.002 came to here */
+
+    SHA256Context *(*p_SHA256_NewContext)(void);
+    void (*p_SHA256_DestroyContext)(SHA256Context *cx, PRBool freeit);
+    void (*p_SHA256_Begin)(SHA256Context *cx);
+    void (*p_SHA256_Update)(SHA256Context *cx, const unsigned char *input,
+                            unsigned int inputLen);
+    void (*p_SHA256_End)(SHA256Context *cx, unsigned char *digest,
+                         unsigned int *digestLen, unsigned int maxDigestLen);
+    SECStatus (*p_SHA256_HashBuf)(unsigned char *dest, const unsigned char *src,
+                                  PRUint32 src_length);
+    SECStatus (*p_SHA256_Hash)(unsigned char *dest, const char *src);
+    void (*p_SHA256_TraceState)(SHA256Context *cx);
+    unsigned int (*p_SHA256_FlattenSize)(SHA256Context *cx);
+    SECStatus (*p_SHA256_Flatten)(SHA256Context *cx, unsigned char *space);
+    SHA256Context *(*p_SHA256_Resurrect)(unsigned char *space, void *arg);
+
+    SHA512Context *(*p_SHA512_NewContext)(void);
+    void (*p_SHA512_DestroyContext)(SHA512Context *cx, PRBool freeit);
+    void (*p_SHA512_Begin)(SHA512Context *cx);
+    void (*p_SHA512_Update)(SHA512Context *cx, const unsigned char *input,
+                            unsigned int inputLen);
+    void (*p_SHA512_End)(SHA512Context *cx, unsigned char *digest,
+                         unsigned int *digestLen, unsigned int maxDigestLen);
+    SECStatus (*p_SHA512_HashBuf)(unsigned char *dest, const unsigned char *src,
+                                  PRUint32 src_length);
+    SECStatus (*p_SHA512_Hash)(unsigned char *dest, const char *src);
+    void (*p_SHA512_TraceState)(SHA512Context *cx);
+    unsigned int (*p_SHA512_FlattenSize)(SHA512Context *cx);
+    SECStatus (*p_SHA512_Flatten)(SHA512Context *cx, unsigned char *space);
+    SHA512Context *(*p_SHA512_Resurrect)(unsigned char *space, void *arg);
+
+    SHA384Context *(*p_SHA384_NewContext)(void);
+    void (*p_SHA384_DestroyContext)(SHA384Context *cx, PRBool freeit);
+    void (*p_SHA384_Begin)(SHA384Context *cx);
+    void (*p_SHA384_Update)(SHA384Context *cx, const unsigned char *input,
+                            unsigned int inputLen);
+    void (*p_SHA384_End)(SHA384Context *cx, unsigned char *digest,
+                         unsigned int *digestLen, unsigned int maxDigestLen);
+    SECStatus (*p_SHA384_HashBuf)(unsigned char *dest, const unsigned char *src,
+                                  PRUint32 src_length);
+    SECStatus (*p_SHA384_Hash)(unsigned char *dest, const char *src);
+    void (*p_SHA384_TraceState)(SHA384Context *cx);
+    unsigned int (*p_SHA384_FlattenSize)(SHA384Context *cx);
+    SECStatus (*p_SHA384_Flatten)(SHA384Context *cx, unsigned char *space);
+    SHA384Context *(*p_SHA384_Resurrect)(unsigned char *space, void *arg);
+
+    /* Version 3.003 came to here */
+
+    AESKeyWrapContext *(*p_AESKeyWrap_CreateContext)(const unsigned char *key,
+                                                     const unsigned char *iv, int encrypt, unsigned int keylen);
+
+    void (*p_AESKeyWrap_DestroyContext)(AESKeyWrapContext *cx, PRBool freeit);
+
+    SECStatus (*p_AESKeyWrap_Encrypt)(AESKeyWrapContext *cx,
+                                      unsigned char *output,
+                                      unsigned int *outputLen, unsigned int maxOutputLen,
+                                      const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_AESKeyWrap_Decrypt)(AESKeyWrapContext *cx,
+                                      unsigned char *output,
+                                      unsigned int *outputLen, unsigned int maxOutputLen,
+                                      const unsigned char *input, unsigned int inputLen);
+
+    /* Version 3.004 came to here */
+
+    PRBool (*p_BLAPI_SHVerify)(const char *name, PRFuncPtr addr);
+    PRBool (*p_BLAPI_VerifySelf)(const char *name);
+
+    /* Version 3.005 came to here */
+
+    SECStatus (*p_EC_NewKey)(ECParams *params,
+                             ECPrivateKey **privKey);
+
+    SECStatus (*p_EC_NewKeyFromSeed)(ECParams *params,
+                                     ECPrivateKey **privKey,
+                                     const unsigned char *seed,
+                                     int seedlen);
+
+    SECStatus (*p_EC_ValidatePublicKey)(ECParams *params,
+                                        SECItem *publicValue);
+
+    SECStatus (*p_ECDH_Derive)(SECItem *publicValue,
+                               ECParams *params,
+                               SECItem *privateValue,
+                               PRBool withCofactor,
+                               SECItem *derivedSecret);
+
+    SECStatus (*p_ECDSA_SignDigest)(ECPrivateKey *key,
+                                    SECItem *signature,
+                                    const SECItem *digest);
+
+    SECStatus (*p_ECDSA_VerifyDigest)(ECPublicKey *key,
+                                      const SECItem *signature,
+                                      const SECItem *digest);
+
+    SECStatus (*p_ECDSA_SignDigestWithSeed)(ECPrivateKey *key,
+                                            SECItem *signature,
+                                            const SECItem *digest,
+                                            const unsigned char *seed,
+                                            const int seedlen);
+
+    /* Version 3.006 came to here */
+
+    /* no modification to FREEBLVectorStr itself
+   * but ECParamStr was modified
+   */
+
+    /* Version 3.007 came to here */
+
+    SECStatus (*p_AES_InitContext)(AESContext *cx,
+                                   const unsigned char *key,
+                                   unsigned int keylen,
+                                   const unsigned char *iv,
+                                   int mode,
+                                   unsigned int encrypt,
+                                   unsigned int blocklen);
+    SECStatus (*p_AESKeyWrap_InitContext)(AESKeyWrapContext *cx,
+                                          const unsigned char *key,
+                                          unsigned int keylen,
+                                          const unsigned char *iv,
+                                          int mode,
+                                          unsigned int encrypt,
+                                          unsigned int blocklen);
+    SECStatus (*p_DES_InitContext)(DESContext *cx,
+                                   const unsigned char *key,
+                                   unsigned int keylen,
+                                   const unsigned char *iv,
+                                   int mode,
+                                   unsigned int encrypt,
+                                   unsigned int);
+    SECStatus (*p_RC2_InitContext)(RC2Context *cx,
+                                   const unsigned char *key,
+                                   unsigned int keylen,
+                                   const unsigned char *iv,
+                                   int mode,
+                                   unsigned int effectiveKeyLen,
+                                   unsigned int);
+    SECStatus (*p_RC4_InitContext)(RC4Context *cx,
+                                   const unsigned char *key,
+                                   unsigned int keylen,
+                                   const unsigned char *,
+                                   int,
+                                   unsigned int,
+                                   unsigned int);
+
+    AESContext *(*p_AES_AllocateContext)(void);
+    AESKeyWrapContext *(*p_AESKeyWrap_AllocateContext)(void);
+    DESContext *(*p_DES_AllocateContext)(void);
+    RC2Context *(*p_RC2_AllocateContext)(void);
+    RC4Context *(*p_RC4_AllocateContext)(void);
+
+    void (*p_MD2_Clone)(MD2Context *dest, MD2Context *src);
+    void (*p_MD5_Clone)(MD5Context *dest, MD5Context *src);
+    void (*p_SHA1_Clone)(SHA1Context *dest, SHA1Context *src);
+    void (*p_SHA256_Clone)(SHA256Context *dest, SHA256Context *src);
+    void (*p_SHA384_Clone)(SHA384Context *dest, SHA384Context *src);
+    void (*p_SHA512_Clone)(SHA512Context *dest, SHA512Context *src);
+
+    SECStatus (*p_TLS_PRF)(const SECItem *secret, const char *label,
+                           SECItem *seed, SECItem *result, PRBool isFIPS);
+
+    const SECHashObject *(*p_HASH_GetRawHashObject)(HASH_HashType hashType);
+
+    HMACContext *(*p_HMAC_Create)(const SECHashObject *hashObj,
+                                  const unsigned char *secret,
+                                  unsigned int secret_len, PRBool isFIPS);
+    SECStatus (*p_HMAC_Init)(HMACContext *cx, const SECHashObject *hash_obj,
+                             const unsigned char *secret,
+                             unsigned int secret_len, PRBool isFIPS);
+    void (*p_HMAC_Begin)(HMACContext *cx);
+    void (*p_HMAC_Update)(HMACContext *cx, const unsigned char *data,
+                          unsigned int data_len);
+    HMACContext *(*p_HMAC_Clone)(HMACContext *cx);
+    SECStatus (*p_HMAC_Finish)(HMACContext *cx, unsigned char *result,
+                               unsigned int *result_len,
+                               unsigned int max_result_len);
+    void (*p_HMAC_Destroy)(HMACContext *cx, PRBool freeit);
+
+    void (*p_RNG_SystemInfoForRNG)(void);
+
+    /* Version 3.008 came to here */
+
+    SECStatus (*p_FIPS186Change_GenerateX)(unsigned char *XKEY,
+                                           const unsigned char *XSEEDj,
+                                           unsigned char *x_j);
+    SECStatus (*p_FIPS186Change_ReduceModQForDSA)(const unsigned char *w,
+                                                  const unsigned char *q,
+                                                  unsigned char *xj);
+
+    /* Version 3.009 came to here */
+
+    SECStatus (*p_Camellia_InitContext)(CamelliaContext *cx,
+                                        const unsigned char *key,
+                                        unsigned int keylen,
+                                        const unsigned char *iv,
+                                        int mode,
+                                        unsigned int encrypt,
+                                        unsigned int unused);
+
+    CamelliaContext *(*p_Camellia_AllocateContext)(void);
+    CamelliaContext *(*p_Camellia_CreateContext)(const unsigned char *key,
+                                                 const unsigned char *iv,
+                                                 int mode, int encrypt,
+                                                 unsigned int keylen);
+    void (*p_Camellia_DestroyContext)(CamelliaContext *cx, PRBool freeit);
+
+    SECStatus (*p_Camellia_Encrypt)(CamelliaContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen);
+
+    SECStatus (*p_Camellia_Decrypt)(CamelliaContext *cx, unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen);
+
+    void (*p_PQG_DestroyParams)(PQGParams *params);
+
+    void (*p_PQG_DestroyVerify)(PQGVerify *vfy);
+
+    /* Version 3.010 came to here */
+
+    SECStatus (*p_SEED_InitContext)(SEEDContext *cx,
+                                    const unsigned char *key,
+                                    unsigned int keylen,
+                                    const unsigned char *iv,
+                                    int mode,
+                                    unsigned int encrypt,
+                                    unsigned int);
+
+    SEEDContext *(*p_SEED_AllocateContext)(void);
+
+    SEEDContext *(*p_SEED_CreateContext)(const unsigned char *key,
+                                         const unsigned char *iv,
+                                         int mode, PRBool encrypt);
+
+    void (*p_SEED_DestroyContext)(SEEDContext *cx, PRBool freeit);
+
+    SECStatus (*p_SEED_Encrypt)(SEEDContext *cx, unsigned char *output,
+                                unsigned int *outputLen, unsigned int maxOutputLen,
+                                const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_SEED_Decrypt)(SEEDContext *cx, unsigned char *output,
+                                unsigned int *outputLen, unsigned int maxOutputLen,
+                                const unsigned char *input, unsigned int inputLen);
+
+    SECStatus (*p_BL_Init)(void);
+    void (*p_BL_SetForkState)(PRBool);
+
+    SECStatus (*p_PRNGTEST_Instantiate)(const PRUint8 *entropy,
+                                        unsigned int entropy_len,
+                                        const PRUint8 *nonce,
+                                        unsigned int nonce_len,
+                                        const PRUint8 *personal_string,
+                                        unsigned int ps_len);
+
+    SECStatus (*p_PRNGTEST_Reseed)(const PRUint8 *entropy,
+                                   unsigned int entropy_len,
+                                   const PRUint8 *additional,
+                                   unsigned int additional_len);
+
+    SECStatus (*p_PRNGTEST_Generate)(PRUint8 *bytes,
+                                     unsigned int bytes_len,
+                                     const PRUint8 *additional,
+                                     unsigned int additional_len);
+
+    SECStatus (*p_PRNGTEST_Uninstantiate)(void);
+    /* Version 3.011 came to here */
+
+    SECStatus (*p_RSA_PopulatePrivateKey)(RSAPrivateKey *key);
+
+    SECStatus (*p_DSA_NewRandom)(PLArenaPool *arena, const SECItem *q,
+                                 SECItem *seed);
+
+    SECStatus (*p_JPAKE_Sign)(PLArenaPool *arena, const PQGParams *pqg,
+                              HASH_HashType hashType, const SECItem *signerID,
+                              const SECItem *x, const SECItem *testRandom,
+                              const SECItem *gxIn, SECItem *gxOut,
+                              SECItem *gv, SECItem *r);
+
+    SECStatus (*p_JPAKE_Verify)(PLArenaPool *arena, const PQGParams *pqg,
+                                HASH_HashType hashType, const SECItem *signerID,
+                                const SECItem *peerID, const SECItem *gx,
+                                const SECItem *gv, const SECItem *r);
+
+    SECStatus (*p_JPAKE_Round2)(PLArenaPool *arena, const SECItem *p,
+                                const SECItem *q, const SECItem *gx1,
+                                const SECItem *gx3, const SECItem *gx4,
+                                SECItem *base, const SECItem *x2,
+                                const SECItem *s, SECItem *x2s);
+
+    SECStatus (*p_JPAKE_Final)(PLArenaPool *arena, const SECItem *p,
+                               const SECItem *q, const SECItem *x2,
+                               const SECItem *gx4, const SECItem *x2s,
+                               const SECItem *B, SECItem *K);
+
+    /* Version 3.012 came to here */
+
+    SECStatus (*p_TLS_P_hash)(HASH_HashType hashAlg,
+                              const SECItem *secret,
+                              const char *label,
+                              SECItem *seed,
+                              SECItem *result,
+                              PRBool isFIPS);
+
+    SHA224Context *(*p_SHA224_NewContext)(void);
+    void (*p_SHA224_DestroyContext)(SHA224Context *cx, PRBool freeit);
+    void (*p_SHA224_Begin)(SHA224Context *cx);
+    void (*p_SHA224_Update)(SHA224Context *cx, const unsigned char *input,
+                            unsigned int inputLen);
+    void (*p_SHA224_End)(SHA224Context *cx, unsigned char *digest,
+                         unsigned int *digestLen, unsigned int maxDigestLen);
+    SECStatus (*p_SHA224_HashBuf)(unsigned char *dest, const unsigned char *src,
+                                  PRUint32 src_length);
+    SECStatus (*p_SHA224_Hash)(unsigned char *dest, const char *src);
+    void (*p_SHA224_TraceState)(SHA224Context *cx);
+    unsigned int (*p_SHA224_FlattenSize)(SHA224Context *cx);
+    SECStatus (*p_SHA224_Flatten)(SHA224Context *cx, unsigned char *space);
+    SHA224Context *(*p_SHA224_Resurrect)(unsigned char *space, void *arg);
+    void (*p_SHA224_Clone)(SHA224Context *dest, SHA224Context *src);
+    PRBool (*p_BLAPI_SHVerifyFile)(const char *name);
+
+    /* Version 3.013 came to here */
+
+    SECStatus (*p_PQG_ParamGenV2)(unsigned int L, unsigned int N,
+                                  unsigned int seedBytes,
+                                  PQGParams **pParams, PQGVerify **pVfy);
+    SECStatus (*p_PRNGTEST_RunHealthTests)(void);
+
+    /* Version 3.014 came to here */
+
+    SECStatus (*p_HMAC_ConstantTime)(
+        unsigned char *result,
+        unsigned int *resultLen,
+        unsigned int maxResultLen,
+        const SECHashObject *hashObj,
+        const unsigned char *secret,
+        unsigned int secretLen,
+        const unsigned char *header,
+        unsigned int headerLen,
+        const unsigned char *body,
+        unsigned int bodyLen,
+        unsigned int bodyTotalLen);
+
+    SECStatus (*p_SSLv3_MAC_ConstantTime)(
+        unsigned char *result,
+        unsigned int *resultLen,
+        unsigned int maxResultLen,
+        const SECHashObject *hashObj,
+        const unsigned char *secret,
+        unsigned int secretLen,
+        const unsigned char *header,
+        unsigned int headerLen,
+        const unsigned char *body,
+        unsigned int bodyLen,
+        unsigned int bodyTotalLen);
+
+    /* Version 3.015 came to here */
+
+    SECStatus (*p_RSA_SignRaw)(RSAPrivateKey *key,
+                               unsigned char *output,
+                               unsigned int *outputLen,
+                               unsigned int maxOutputLen,
+                               const unsigned char *input,
+                               unsigned int inputLen);
+    SECStatus (*p_RSA_CheckSignRaw)(RSAPublicKey *key,
+                                    const unsigned char *sig,
+                                    unsigned int sigLen,
+                                    const unsigned char *hash,
+                                    unsigned int hashLen);
+    SECStatus (*p_RSA_CheckSignRecoverRaw)(RSAPublicKey *key,
+                                           unsigned char *data,
+                                           unsigned int *dataLen,
+                                           unsigned int maxDataLen,
+                                           const unsigned char *sig,
+                                           unsigned int sigLen);
+    SECStatus (*p_RSA_EncryptRaw)(RSAPublicKey *key,
+                                  unsigned char *output,
+                                  unsigned int *outputLen,
+                                  unsigned int maxOutputLen,
+                                  const unsigned char *input,
+                                  unsigned int inputLen);
+    SECStatus (*p_RSA_DecryptRaw)(RSAPrivateKey *key,
+                                  unsigned char *output,
+                                  unsigned int *outputLen,
+                                  unsigned int maxOutputLen,
+                                  const unsigned char *input,
+                                  unsigned int inputLen);
+    SECStatus (*p_RSA_EncryptOAEP)(RSAPublicKey *key,
+                                   HASH_HashType hashAlg,
+                                   HASH_HashType maskHashAlg,
+                                   const unsigned char *label,
+                                   unsigned int labelLen,
+                                   const unsigned char *seed,
+                                   unsigned int seedLen,
+                                   unsigned char *output,
+                                   unsigned int *outputLen,
+                                   unsigned int maxOutputLen,
+                                   const unsigned char *input,
+                                   unsigned int inputLen);
+    SECStatus (*p_RSA_DecryptOAEP)(RSAPrivateKey *key,
+                                   HASH_HashType hashAlg,
+                                   HASH_HashType maskHashAlg,
+                                   const unsigned char *label,
+                                   unsigned int labelLen,
+                                   unsigned char *output,
+                                   unsigned int *outputLen,
+                                   unsigned int maxOutputLen,
+                                   const unsigned char *input,
+                                   unsigned int inputLen);
+    SECStatus (*p_RSA_EncryptBlock)(RSAPublicKey *key,
+                                    unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen);
+    SECStatus (*p_RSA_DecryptBlock)(RSAPrivateKey *key,
+                                    unsigned char *output,
+                                    unsigned int *outputLen,
+                                    unsigned int maxOutputLen,
+                                    const unsigned char *input,
+                                    unsigned int inputLen);
+    SECStatus (*p_RSA_SignPSS)(RSAPrivateKey *key,
+                               HASH_HashType hashAlg,
+                               HASH_HashType maskHashAlg,
+                               const unsigned char *salt,
+                               unsigned int saltLen,
+                               unsigned char *output,
+                               unsigned int *outputLen,
+                               unsigned int maxOutputLen,
+                               const unsigned char *input,
+                               unsigned int inputLen);
+    SECStatus (*p_RSA_CheckSignPSS)(RSAPublicKey *key,
+                                    HASH_HashType hashAlg,
+                                    HASH_HashType maskHashAlg,
+                                    unsigned int saltLen,
+                                    const unsigned char *sig,
+                                    unsigned int sigLen,
+                                    const unsigned char *hash,
+                                    unsigned int hashLen);
+    SECStatus (*p_RSA_Sign)(RSAPrivateKey *key,
+                            unsigned char *output,
+                            unsigned int *outputLen,
+                            unsigned int maxOutputLen,
+                            const unsigned char *input,
+                            unsigned int inputLen);
+    SECStatus (*p_RSA_CheckSign)(RSAPublicKey *key,
+                                 const unsigned char *sig,
+                                 unsigned int sigLen,
+                                 const unsigned char *data,
+                                 unsigned int dataLen);
+    SECStatus (*p_RSA_CheckSignRecover)(RSAPublicKey *key,
+                                        unsigned char *output,
+                                        unsigned int *outputLen,
+                                        unsigned int maxOutputLen,
+                                        const unsigned char *sig,
+                                        unsigned int sigLen);
+
+    /* Version 3.016 came to here */
+
+    SECStatus (*p_EC_FillParams)(PLArenaPool *arena,
+                                 const SECItem *encodedParams, ECParams *params);
+    SECStatus (*p_EC_DecodeParams)(const SECItem *encodedParams,
+                                   ECParams **ecparams);
+    SECStatus (*p_EC_CopyParams)(PLArenaPool *arena, ECParams *dstParams,
+                                 const ECParams *srcParams);
+
+    /* Version 3.017 came to here */
+
+    SECStatus (*p_ChaCha20Poly1305_InitContext)(ChaCha20Poly1305Context *ctx,
+                                                const unsigned char *key,
+                                                unsigned int keyLen,
+                                                unsigned int tagLen);
+
+    ChaCha20Poly1305Context *(*p_ChaCha20Poly1305_CreateContext)(
+        const unsigned char *key, unsigned int keyLen, unsigned int tagLen);
+
+    void (*p_ChaCha20Poly1305_DestroyContext)(ChaCha20Poly1305Context *ctx,
+                                              PRBool freeit);
+
+    SECStatus (*p_ChaCha20Poly1305_Seal)(
+        const ChaCha20Poly1305Context *ctx, unsigned char *output,
+        unsigned int *outputLen, unsigned int maxOutputLen,
+        const unsigned char *input, unsigned int inputLen,
+        const unsigned char *nonce, unsigned int nonceLen,
+        const unsigned char *ad, unsigned int adLen);
+
+    SECStatus (*p_ChaCha20Poly1305_Open)(
+        const ChaCha20Poly1305Context *ctx, unsigned char *output,
+        unsigned int *outputLen, unsigned int maxOutputLen,
+        const unsigned char *input, unsigned int inputLen,
+        const unsigned char *nonce, unsigned int nonceLen,
+        const unsigned char *ad, unsigned int adLen);
+
+    /* Version 3.018 came to here */
+
+    int (*p_EC_GetPointSize)(const ECParams *);
+
+    /* Version 3.019 came to here */
+
+    /* Add new function pointers at the end of this struct and bump
+     * FREEBL_VERSION at the beginning of this file. */
+};
+
+typedef struct FREEBLVectorStr FREEBLVector;
+
+#ifdef FREEBL_LOWHASH
+#include "nsslowhash.h"
+
+#define NSSLOW_VERSION 0x0300
+
+struct NSSLOWVectorStr {
+    unsigned short length;  /* of this struct in bytes */
+    unsigned short version; /* of this struct. */
+    const FREEBLVector *(*p_FREEBL_GetVector)(void);
+    NSSLOWInitContext *(*p_NSSLOW_Init)(void);
+    void (*p_NSSLOW_Shutdown)(NSSLOWInitContext *context);
+    void (*p_NSSLOW_Reset)(NSSLOWInitContext *context);
+    NSSLOWHASHContext *(*p_NSSLOWHASH_NewContext)(
+        NSSLOWInitContext *initContext,
+        HASH_HashType hashType);
+    void (*p_NSSLOWHASH_Begin)(NSSLOWHASHContext *context);
+    void (*p_NSSLOWHASH_Update)(NSSLOWHASHContext *context,
+                                const unsigned char *buf,
+                                unsigned int len);
+    void (*p_NSSLOWHASH_End)(NSSLOWHASHContext *context,
+                             unsigned char *buf,
+                             unsigned int *ret, unsigned int len);
+    void (*p_NSSLOWHASH_Destroy)(NSSLOWHASHContext *context);
+    unsigned int (*p_NSSLOWHASH_Length)(NSSLOWHASHContext *context);
+};
+
+typedef struct NSSLOWVectorStr NSSLOWVector;
+#endif
+
+SEC_BEGIN_PROTOS
+
+#ifdef FREEBL_LOWHASH
+typedef const NSSLOWVector *NSSLOWGetVectorFn(void);
+
+extern NSSLOWGetVectorFn NSSLOW_GetVector;
+#endif
+
+typedef const FREEBLVector *FREEBLGetVectorFn(void);
+
+extern FREEBLGetVectorFn FREEBL_GetVector;
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/lowhash_vector.c b/security/nss/lib/freebl/lowhash_vector.c
new file mode 100644
index 000000000..7690c98da
--- /dev/null
+++ b/security/nss/lib/freebl/lowhash_vector.c
@@ -0,0 +1,217 @@
+/*
+ * loader.c - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define _GNU_SOURCE 1
+#include "loader.h"
+#include "prmem.h"
+#include "prerror.h"
+#include "prinit.h"
+#include "prenv.h"
+#include "blname.c"
+
+#include "prio.h"
+#include "prprf.h"
+#include <stdio.h>
+#include "prsystem.h"
+#include "nsslowhash.h"
+#include <dlfcn.h>
+#include "pratom.h"
+
+static PRLibrary *blLib;
+
+#define LSB(x) ((x)&0xff)
+#define MSB(x) ((x) >> 8)
+
+static const NSSLOWVector *vector;
+static const char *libraryName = NULL;
+
+/* pretty much only glibc uses this, make sure we don't have any depenencies
+ * on nspr.. */
+#undef PORT_Alloc
+#undef PORT_Free
+#define PORT_Alloc malloc
+#define PR_Malloc malloc
+#define PORT_Free free
+#define PR_Free free
+#define PR_GetDirectorySeparator() '/'
+#define PR_LoadLibraryWithFlags(libspec, flags) \
+    (PRLibrary *)dlopen(libSpec.value.pathname, RTLD_NOW | RTLD_LOCAL)
+#define PR_GetLibraryFilePathname(name, addr) \
+    freebl_lowhash_getLibraryFilePath(addr)
+
+static char *
+freebl_lowhash_getLibraryFilePath(void *addr)
+{
+    Dl_info dli;
+    if (dladdr(addr, &dli) == 0) {
+        return NULL;
+    }
+    return strdup(dli.dli_fname);
+}
+
+/*
+ * The PR_LoadLibraryWithFlags call above defines this variable away, so we
+ * don't need it..
+ */
+#ifdef nodef
+static const char *NameOfThisSharedLib =
+    SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX;
+#endif
+
+#include "genload.c"
+
+/* This function must be run only once. */
+/*  determine if hybrid platform, then actually load the DSO. */
+static PRStatus
+freebl_LoadDSO(void)
+{
+    PRLibrary *handle;
+    const char *name = getLibName();
+
+    if (!name) {
+        /*PR_SetError(PR_LOAD_LIBRARY_ERROR,0); */
+        return PR_FAILURE;
+    }
+    handle = loader_LoadLibrary(name);
+    if (handle) {
+        void *address = dlsym(handle, "NSSLOW_GetVector");
+        if (address) {
+            NSSLOWGetVectorFn *getVector = (NSSLOWGetVectorFn *)address;
+            const NSSLOWVector *dsoVector = getVector();
+            if (dsoVector) {
+                unsigned short dsoVersion = dsoVector->version;
+                unsigned short myVersion = NSSLOW_VERSION;
+                if (MSB(dsoVersion) == MSB(myVersion) &&
+                    LSB(dsoVersion) >= LSB(myVersion) &&
+                    dsoVector->length >= sizeof(NSSLOWVector)) {
+                    vector = dsoVector;
+                    libraryName = name;
+                    blLib = handle;
+                    return PR_SUCCESS;
+                }
+            }
+        }
+        (void)dlclose(handle);
+    }
+    return PR_FAILURE;
+}
+
+static PRCallOnceType loadFreeBLOnce;
+
+static PRStatus
+freebl_RunLoaderOnce(void)
+{
+    /* Don't have NSPR, so can use the real PR_CallOnce, implement a stripped
+     * down version. */
+    if (loadFreeBLOnce.initialized) {
+        return loadFreeBLOnce.status;
+    }
+    if (__sync_lock_test_and_set(&loadFreeBLOnce.inProgress, 1) == 0) {
+        loadFreeBLOnce.status = freebl_LoadDSO();
+        loadFreeBLOnce.initialized = 1;
+    } else {
+        /* shouldn't have a lot of takers on the else clause, which is good
+         * since we don't have condition variables yet.
+         * 'initialized' only ever gets set (not cleared) so we don't
+         * need the traditional locks. */
+        while (!loadFreeBLOnce.initialized) {
+            sleep(1); /* don't have condition variables, just give up the CPU */
+        }
+    }
+
+    return loadFreeBLOnce.status;
+}
+
+const FREEBLVector *
+FREEBL_GetVector(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+        return NULL;
+    }
+    if (vector) {
+        return (vector->p_FREEBL_GetVector)();
+    }
+    return NULL;
+}
+
+NSSLOWInitContext *
+NSSLOW_Init(void)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_NSSLOW_Init)();
+}
+
+void
+NSSLOW_Shutdown(NSSLOWInitContext *context)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_NSSLOW_Shutdown)(context);
+}
+
+void
+NSSLOW_Reset(NSSLOWInitContext *context)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_NSSLOW_Reset)(context);
+}
+
+NSSLOWHASHContext *
+NSSLOWHASH_NewContext(
+    NSSLOWInitContext *initContext,
+    HASH_HashType hashType)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return NULL;
+    return (vector->p_NSSLOWHASH_NewContext)(initContext, hashType);
+}
+
+void
+NSSLOWHASH_Begin(NSSLOWHASHContext *context)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_NSSLOWHASH_Begin)(context);
+}
+
+void
+NSSLOWHASH_Update(NSSLOWHASHContext *context,
+                  const unsigned char *buf,
+                  unsigned int len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_NSSLOWHASH_Update)(context, buf, len);
+}
+
+void
+NSSLOWHASH_End(NSSLOWHASHContext *context,
+               unsigned char *buf,
+               unsigned int *ret, unsigned int len)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_NSSLOWHASH_End)(context, buf, ret, len);
+}
+
+void
+NSSLOWHASH_Destroy(NSSLOWHASHContext *context)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return;
+    (vector->p_NSSLOWHASH_Destroy)(context);
+}
+
+unsigned int
+NSSLOWHASH_Length(NSSLOWHASHContext *context)
+{
+    if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+        return -1;
+    return (vector->p_NSSLOWHASH_Length)(context);
+}
diff --git a/security/nss/lib/freebl/manifest.mn b/security/nss/lib/freebl/manifest.mn
new file mode 100644
index 000000000..1ef983907
--- /dev/null
+++ b/security/nss/lib/freebl/manifest.mn
@@ -0,0 +1,195 @@
+# 
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# NOTE: any ifdefs in this file must be defined on the gmake command line
+# (if anywhere).  They cannot come from Makefile or config.mk 
+
+CORE_DEPTH = ../..
+
+MODULE = nss
+
+# copied from Linux.mk. We have a chicken and egg issue here. We need to set
+# Library name before we call the platform code in coreconf, but we need to
+# Pick up the automatic setting of FREEBL_LOWHASH before we  can set the
+# Library name... so for now we mimic the code in Linux.mk to get the
+# automatic setting early...
+#
+# On Linux 2.6 or later, build libfreebl3.so with no NSPR and libnssutil3.so
+# dependencies by default.  Set FREEBL_NO_DEPEND to 0 in the environment to
+# override this.
+#
+#
+include $(CORE_DEPTH)/coreconf/arch.mk
+ifeq ($(OS_ARCH),Linux)
+ifneq ($(OS_TARGET),Android)
+ifeq (2.6,$(firstword $(sort 2.6 $(OS_RELEASE))))
+ifndef FREEBL_NO_DEPEND
+FREEBL_NO_DEPEND = 1
+FREEBL_LOWHASH = 1
+endif
+endif
+endif
+endif
+
+
+LIBRARY_NAME = freebl
+LIBRARY_VERSION = 3
+
+ifdef FREEBL_CHILD_BUILD
+  ifdef USE_ABI32_INT32
+    LIBRARY_NAME = freebl_32int
+  endif
+  ifdef USE_ABI32_INT64
+    LIBRARY_NAME = freebl_32int64
+  endif
+  ifdef USE_ABI32_FPU
+    LIBRARY_NAME = freebl_32fpu
+  endif
+  ifdef USE_ABI64_INT
+    LIBRARY_NAME = freebl_64int
+  endif
+  ifdef USE_ABI64_FPU
+    LIBRARY_NAME = freebl_64fpu
+  endif
+  ifdef FREEBL_LOWHASH
+    LIBRARY_NAME = freeblpriv
+  endif
+  ifdef USE_STUB_BUILD
+    # for the stub build, reset name to the default (from freeblpriv)
+    LIBRARY_NAME = freebl
+  endif
+endif
+
+# if the library name contains _, we prefix the version with _
+ifneq (,$(findstring _,$(LIBRARY_NAME)))
+  LIBRARY_VERSION := _$(LIBRARY_VERSION)
+endif
+
+MAPFILE = $(OBJDIR)/$(LIBRARY_NAME).def
+
+SOFTOKEN_LIBRARY_VERSION = 3
+
+DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\" \
+	-DSHLIB_VERSION=\"$(LIBRARY_VERSION)\" \
+	-DSOFTOKEN_SHLIB_VERSION=\"$(SOFTOKEN_LIBRARY_VERSION)\"
+
+REQUIRES = 
+
+EXPORTS = \
+	blapit.h \
+	shsign.h \
+	ecl-exp.h \
+	$(LOWHASH_EXPORTS) \
+	$(NULL)
+
+PRIVATE_EXPORTS = \
+	alghmac.h \
+	blapi.h \
+	chacha20poly1305.h \
+	hmacct.h \
+	secmpi.h \
+	secrng.h \
+	ec.h \
+	ecl.h \
+	ecl-curve.h \
+	$(NULL)
+
+MPI_HDRS = mpi-config.h mpi.h mpi-priv.h mplogic.h mpprime.h logtab.h mp_gf2m.h
+MPI_SRCS = mpprime.c mpmontg.c mplogic.c mpi.c mp_gf2m.c
+
+
+ECL_HDRS = ecl-exp.h ecl.h ecp.h ecl-priv.h
+ifndef NSS_DISABLE_ECC
+ECL_SRCS = ecl.c ecl_curve.c ecl_mult.c ecl_gf.c \
+	ecp_aff.c ecp_jac.c ecp_mont.c \
+	ec_naf.c ecp_jm.c ecp_256.c ecp_384.c ecp_521.c \
+	ecp_256_32.c ecp_25519.c
+else
+ECL_SRCS = $(NULL)
+endif
+SHA_SRCS = sha_fast.c
+MPCPU_SRCS = mpcpucache.c
+
+CSRCS = \
+	freeblver.c \
+	ldvector.c \
+	sysrand.c \
+	$(SHA_SRCS) \
+	md2.c \
+	md5.c \
+	sha512.c \
+	alghmac.c \
+	rawhash.c \
+	alg2268.c \
+	arcfour.c \
+	arcfive.c \
+	desblapi.c \
+	des.c \
+	drbg.c \
+	chacha20poly1305.c \
+	cts.c \
+	ctr.c \
+	fipsfreebl.c \
+	gcm.c \
+	hmacct.c \
+	rijndael.c \
+	aeskeywrap.c \
+	camellia.c \
+	dh.c \
+	ec.c \
+	ecdecode.c \
+	pqg.c \
+	dsa.c \
+	rsa.c \
+	rsapkcs.c \
+	shvfy.c \
+	tlsprfalg.c \
+	seed.c \
+	jpake.c \
+	$(MPI_SRCS) \
+	$(MPCPU_SRCS) \
+	$(ECL_SRCS) \
+	$(STUBS_SRCS) \
+	$(LOWHASH_SRCS) \
+	$(EXTRA_SRCS) \
+	$(NULL)
+
+ALL_CSRCS := $(CSRCS)
+
+ALL_HDRS =  \
+	alghmac.h \
+	blapi.h \
+	blapit.h \
+	des.h \
+	ec.h \
+	loader.h \
+	rijndael.h \
+	camellia.h \
+	secmpi.h \
+	sha_fast.h \
+	sha256.h \
+	shsign.h \
+	vis_proto.h \
+	seed.h \
+	$(NULL)
+
+
+ifdef AES_GEN_TBL
+DEFINES += -DRIJNDAEL_GENERATE_TABLES
+else 
+ifdef AES_GEN_TBL_M
+DEFINES += -DRIJNDAEL_GENERATE_TABLES_MACRO
+else
+ifdef AES_GEN_VAL
+DEFINES += -DRIJNDAEL_GENERATE_VALUES
+else
+ifdef AES_GEN_VAL_M
+DEFINES += -DRIJNDAEL_GENERATE_VALUES_MACRO
+else
+DEFINES += -DRIJNDAEL_INCLUDE_TABLES
+endif
+endif
+endif
+endif
diff --git a/security/nss/lib/freebl/md2.c b/security/nss/lib/freebl/md2.c
new file mode 100644
index 000000000..cb3d3d82b
--- /dev/null
+++ b/security/nss/lib/freebl/md2.c
@@ -0,0 +1,269 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+
+#include "blapi.h"
+
+#define MD2_DIGEST_LEN 16
+#define MD2_BUFSIZE 16
+#define MD2_X_SIZE 48  /* The X array, [CV | INPUT | TMP VARS] */
+#define MD2_CV 0       /* index into X for chaining variables */
+#define MD2_INPUT 16   /* index into X for input */
+#define MD2_TMPVARS 32 /* index into X for temporary variables */
+#define MD2_CHECKSUM_SIZE 16
+
+struct MD2ContextStr {
+    unsigned char checksum[MD2_BUFSIZE];
+    unsigned char X[MD2_X_SIZE];
+    PRUint8 unusedBuffer;
+};
+
+static const PRUint8 MD2S[256] = {
+    0051, 0056, 0103, 0311, 0242, 0330, 0174, 0001,
+    0075, 0066, 0124, 0241, 0354, 0360, 0006, 0023,
+    0142, 0247, 0005, 0363, 0300, 0307, 0163, 0214,
+    0230, 0223, 0053, 0331, 0274, 0114, 0202, 0312,
+    0036, 0233, 0127, 0074, 0375, 0324, 0340, 0026,
+    0147, 0102, 0157, 0030, 0212, 0027, 0345, 0022,
+    0276, 0116, 0304, 0326, 0332, 0236, 0336, 0111,
+    0240, 0373, 0365, 0216, 0273, 0057, 0356, 0172,
+    0251, 0150, 0171, 0221, 0025, 0262, 0007, 0077,
+    0224, 0302, 0020, 0211, 0013, 0042, 0137, 0041,
+    0200, 0177, 0135, 0232, 0132, 0220, 0062, 0047,
+    0065, 0076, 0314, 0347, 0277, 0367, 0227, 0003,
+    0377, 0031, 0060, 0263, 0110, 0245, 0265, 0321,
+    0327, 0136, 0222, 0052, 0254, 0126, 0252, 0306,
+    0117, 0270, 0070, 0322, 0226, 0244, 0175, 0266,
+    0166, 0374, 0153, 0342, 0234, 0164, 0004, 0361,
+    0105, 0235, 0160, 0131, 0144, 0161, 0207, 0040,
+    0206, 0133, 0317, 0145, 0346, 0055, 0250, 0002,
+    0033, 0140, 0045, 0255, 0256, 0260, 0271, 0366,
+    0034, 0106, 0141, 0151, 0064, 0100, 0176, 0017,
+    0125, 0107, 0243, 0043, 0335, 0121, 0257, 0072,
+    0303, 0134, 0371, 0316, 0272, 0305, 0352, 0046,
+    0054, 0123, 0015, 0156, 0205, 0050, 0204, 0011,
+    0323, 0337, 0315, 0364, 0101, 0201, 0115, 0122,
+    0152, 0334, 0067, 0310, 0154, 0301, 0253, 0372,
+    0044, 0341, 0173, 0010, 0014, 0275, 0261, 0112,
+    0170, 0210, 0225, 0213, 0343, 0143, 0350, 0155,
+    0351, 0313, 0325, 0376, 0073, 0000, 0035, 0071,
+    0362, 0357, 0267, 0016, 0146, 0130, 0320, 0344,
+    0246, 0167, 0162, 0370, 0353, 0165, 0113, 0012,
+    0061, 0104, 0120, 0264, 0217, 0355, 0037, 0032,
+    0333, 0231, 0215, 0063, 0237, 0021, 0203, 0024
+};
+
+SECStatus
+MD2_Hash(unsigned char *dest, const char *src)
+{
+    unsigned int len;
+    MD2Context *cx = MD2_NewContext();
+    if (!cx) {
+        PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+        return SECFailure;
+    }
+    MD2_Begin(cx);
+    MD2_Update(cx, (const unsigned char *)src, PORT_Strlen(src));
+    MD2_End(cx, dest, &len, MD2_DIGEST_LEN);
+    MD2_DestroyContext(cx, PR_TRUE);
+    return SECSuccess;
+}
+
+MD2Context *
+MD2_NewContext(void)
+{
+    MD2Context *cx = (MD2Context *)PORT_ZAlloc(sizeof(MD2Context));
+    if (cx == NULL) {
+        PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+        return NULL;
+    }
+    return cx;
+}
+
+void
+MD2_DestroyContext(MD2Context *cx, PRBool freeit)
+{
+    if (freeit)
+        PORT_ZFree(cx, sizeof(*cx));
+}
+
+void
+MD2_Begin(MD2Context *cx)
+{
+    memset(cx, 0, sizeof(*cx));
+    cx->unusedBuffer = MD2_BUFSIZE;
+}
+
+static void
+md2_compress(MD2Context *cx)
+{
+    int j;
+    unsigned char P;
+    P = cx->checksum[MD2_CHECKSUM_SIZE - 1];
+/* Compute the running checksum, and set the tmp variables to be
+     * CV[i] XOR input[i]
+     */
+#define CKSUMFN(n)                                        \
+    P = cx->checksum[n] ^ MD2S[cx->X[MD2_INPUT + n] ^ P]; \
+    cx->checksum[n] = P;                                  \
+    cx->X[MD2_TMPVARS + n] = cx->X[n] ^ cx->X[MD2_INPUT + n];
+    CKSUMFN(0);
+    CKSUMFN(1);
+    CKSUMFN(2);
+    CKSUMFN(3);
+    CKSUMFN(4);
+    CKSUMFN(5);
+    CKSUMFN(6);
+    CKSUMFN(7);
+    CKSUMFN(8);
+    CKSUMFN(9);
+    CKSUMFN(10);
+    CKSUMFN(11);
+    CKSUMFN(12);
+    CKSUMFN(13);
+    CKSUMFN(14);
+    CKSUMFN(15);
+/* The compression function. */
+#define COMPRESS(n)         \
+    P = cx->X[n] ^ MD2S[P]; \
+    cx->X[n] = P;
+    P = 0x00;
+    for (j = 0; j < 18; j++) {
+        COMPRESS(0);
+        COMPRESS(1);
+        COMPRESS(2);
+        COMPRESS(3);
+        COMPRESS(4);
+        COMPRESS(5);
+        COMPRESS(6);
+        COMPRESS(7);
+        COMPRESS(8);
+        COMPRESS(9);
+        COMPRESS(10);
+        COMPRESS(11);
+        COMPRESS(12);
+        COMPRESS(13);
+        COMPRESS(14);
+        COMPRESS(15);
+        COMPRESS(16);
+        COMPRESS(17);
+        COMPRESS(18);
+        COMPRESS(19);
+        COMPRESS(20);
+        COMPRESS(21);
+        COMPRESS(22);
+        COMPRESS(23);
+        COMPRESS(24);
+        COMPRESS(25);
+        COMPRESS(26);
+        COMPRESS(27);
+        COMPRESS(28);
+        COMPRESS(29);
+        COMPRESS(30);
+        COMPRESS(31);
+        COMPRESS(32);
+        COMPRESS(33);
+        COMPRESS(34);
+        COMPRESS(35);
+        COMPRESS(36);
+        COMPRESS(37);
+        COMPRESS(38);
+        COMPRESS(39);
+        COMPRESS(40);
+        COMPRESS(41);
+        COMPRESS(42);
+        COMPRESS(43);
+        COMPRESS(44);
+        COMPRESS(45);
+        COMPRESS(46);
+        COMPRESS(47);
+        P = (P + j) % 256;
+    }
+    cx->unusedBuffer = MD2_BUFSIZE;
+}
+
+void
+MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+    PRUint32 bytesToConsume;
+
+    /* Fill the remaining input buffer. */
+    if (cx->unusedBuffer != MD2_BUFSIZE) {
+        bytesToConsume = PR_MIN(inputLen, cx->unusedBuffer);
+        memcpy(&cx->X[MD2_INPUT + (MD2_BUFSIZE - cx->unusedBuffer)],
+               input, bytesToConsume);
+        if (cx->unusedBuffer + bytesToConsume >= MD2_BUFSIZE)
+            md2_compress(cx);
+        inputLen -= bytesToConsume;
+        input += bytesToConsume;
+    }
+
+    /* Iterate over 16-byte chunks of the input. */
+    while (inputLen >= MD2_BUFSIZE) {
+        memcpy(&cx->X[MD2_INPUT], input, MD2_BUFSIZE);
+        md2_compress(cx);
+        inputLen -= MD2_BUFSIZE;
+        input += MD2_BUFSIZE;
+    }
+
+    /* Copy any input that remains into the buffer. */
+    if (inputLen)
+        memcpy(&cx->X[MD2_INPUT], input, inputLen);
+    cx->unusedBuffer = MD2_BUFSIZE - inputLen;
+}
+
+void
+MD2_End(MD2Context *cx, unsigned char *digest,
+        unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    PRUint8 padStart;
+    if (maxDigestLen < MD2_BUFSIZE) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return;
+    }
+    padStart = MD2_BUFSIZE - cx->unusedBuffer;
+    memset(&cx->X[MD2_INPUT + padStart], cx->unusedBuffer,
+           cx->unusedBuffer);
+    md2_compress(cx);
+    memcpy(&cx->X[MD2_INPUT], cx->checksum, MD2_BUFSIZE);
+    md2_compress(cx);
+    *digestLen = MD2_DIGEST_LEN;
+    memcpy(digest, &cx->X[MD2_CV], MD2_DIGEST_LEN);
+}
+
+unsigned int
+MD2_FlattenSize(MD2Context *cx)
+{
+    return sizeof(*cx);
+}
+
+SECStatus
+MD2_Flatten(MD2Context *cx, unsigned char *space)
+{
+    memcpy(space, cx, sizeof(*cx));
+    return SECSuccess;
+}
+
+MD2Context *
+MD2_Resurrect(unsigned char *space, void *arg)
+{
+    MD2Context *cx = MD2_NewContext();
+    if (cx)
+        memcpy(cx, space, sizeof(*cx));
+    return cx;
+}
+
+void
+MD2_Clone(MD2Context *dest, MD2Context *src)
+{
+    memcpy(dest, src, sizeof *dest);
+}
diff --git a/security/nss/lib/freebl/md5.c b/security/nss/lib/freebl/md5.c
new file mode 100644
index 000000000..bdd36a61b
--- /dev/null
+++ b/security/nss/lib/freebl/md5.c
@@ -0,0 +1,598 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prlong.h"
+
+#include "blapi.h"
+#include "blapii.h"
+
+#define MD5_HASH_LEN 16
+#define MD5_BUFFER_SIZE 64
+#define MD5_END_BUFFER (MD5_BUFFER_SIZE - 8)
+
+#define CV0_1 0x67452301
+#define CV0_2 0xefcdab89
+#define CV0_3 0x98badcfe
+#define CV0_4 0x10325476
+
+#define T1_0 0xd76aa478
+#define T1_1 0xe8c7b756
+#define T1_2 0x242070db
+#define T1_3 0xc1bdceee
+#define T1_4 0xf57c0faf
+#define T1_5 0x4787c62a
+#define T1_6 0xa8304613
+#define T1_7 0xfd469501
+#define T1_8 0x698098d8
+#define T1_9 0x8b44f7af
+#define T1_10 0xffff5bb1
+#define T1_11 0x895cd7be
+#define T1_12 0x6b901122
+#define T1_13 0xfd987193
+#define T1_14 0xa679438e
+#define T1_15 0x49b40821
+
+#define T2_0 0xf61e2562
+#define T2_1 0xc040b340
+#define T2_2 0x265e5a51
+#define T2_3 0xe9b6c7aa
+#define T2_4 0xd62f105d
+#define T2_5 0x02441453
+#define T2_6 0xd8a1e681
+#define T2_7 0xe7d3fbc8
+#define T2_8 0x21e1cde6
+#define T2_9 0xc33707d6
+#define T2_10 0xf4d50d87
+#define T2_11 0x455a14ed
+#define T2_12 0xa9e3e905
+#define T2_13 0xfcefa3f8
+#define T2_14 0x676f02d9
+#define T2_15 0x8d2a4c8a
+
+#define T3_0 0xfffa3942
+#define T3_1 0x8771f681
+#define T3_2 0x6d9d6122
+#define T3_3 0xfde5380c
+#define T3_4 0xa4beea44
+#define T3_5 0x4bdecfa9
+#define T3_6 0xf6bb4b60
+#define T3_7 0xbebfbc70
+#define T3_8 0x289b7ec6
+#define T3_9 0xeaa127fa
+#define T3_10 0xd4ef3085
+#define T3_11 0x04881d05
+#define T3_12 0xd9d4d039
+#define T3_13 0xe6db99e5
+#define T3_14 0x1fa27cf8
+#define T3_15 0xc4ac5665
+
+#define T4_0 0xf4292244
+#define T4_1 0x432aff97
+#define T4_2 0xab9423a7
+#define T4_3 0xfc93a039
+#define T4_4 0x655b59c3
+#define T4_5 0x8f0ccc92
+#define T4_6 0xffeff47d
+#define T4_7 0x85845dd1
+#define T4_8 0x6fa87e4f
+#define T4_9 0xfe2ce6e0
+#define T4_10 0xa3014314
+#define T4_11 0x4e0811a1
+#define T4_12 0xf7537e82
+#define T4_13 0xbd3af235
+#define T4_14 0x2ad7d2bb
+#define T4_15 0xeb86d391
+
+#define R1B0 0
+#define R1B1 1
+#define R1B2 2
+#define R1B3 3
+#define R1B4 4
+#define R1B5 5
+#define R1B6 6
+#define R1B7 7
+#define R1B8 8
+#define R1B9 9
+#define R1B10 10
+#define R1B11 11
+#define R1B12 12
+#define R1B13 13
+#define R1B14 14
+#define R1B15 15
+
+#define R2B0 1
+#define R2B1 6
+#define R2B2 11
+#define R2B3 0
+#define R2B4 5
+#define R2B5 10
+#define R2B6 15
+#define R2B7 4
+#define R2B8 9
+#define R2B9 14
+#define R2B10 3
+#define R2B11 8
+#define R2B12 13
+#define R2B13 2
+#define R2B14 7
+#define R2B15 12
+
+#define R3B0 5
+#define R3B1 8
+#define R3B2 11
+#define R3B3 14
+#define R3B4 1
+#define R3B5 4
+#define R3B6 7
+#define R3B7 10
+#define R3B8 13
+#define R3B9 0
+#define R3B10 3
+#define R3B11 6
+#define R3B12 9
+#define R3B13 12
+#define R3B14 15
+#define R3B15 2
+
+#define R4B0 0
+#define R4B1 7
+#define R4B2 14
+#define R4B3 5
+#define R4B4 12
+#define R4B5 3
+#define R4B6 10
+#define R4B7 1
+#define R4B8 8
+#define R4B9 15
+#define R4B10 6
+#define R4B11 13
+#define R4B12 4
+#define R4B13 11
+#define R4B14 2
+#define R4B15 9
+
+#define S1_0 7
+#define S1_1 12
+#define S1_2 17
+#define S1_3 22
+
+#define S2_0 5
+#define S2_1 9
+#define S2_2 14
+#define S2_3 20
+
+#define S3_0 4
+#define S3_1 11
+#define S3_2 16
+#define S3_3 23
+
+#define S4_0 6
+#define S4_1 10
+#define S4_2 15
+#define S4_3 21
+
+struct MD5ContextStr {
+    PRUint32 lsbInput;
+    PRUint32 msbInput;
+    PRUint32 cv[4];
+    union {
+        PRUint8 b[64];
+        PRUint32 w[16];
+    } u;
+};
+
+#define inBuf u.b
+
+SECStatus
+MD5_Hash(unsigned char *dest, const char *src)
+{
+    return MD5_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+SECStatus
+MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+    unsigned int len;
+    MD5Context cx;
+
+    MD5_Begin(&cx);
+    MD5_Update(&cx, src, src_length);
+    MD5_End(&cx, dest, &len, MD5_HASH_LEN);
+    memset(&cx, 0, sizeof cx);
+    return SECSuccess;
+}
+
+MD5Context *
+MD5_NewContext(void)
+{
+    /* no need to ZAlloc, MD5_Begin will init the context */
+    MD5Context *cx = (MD5Context *)PORT_Alloc(sizeof(MD5Context));
+    if (cx == NULL) {
+        PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+        return NULL;
+    }
+    return cx;
+}
+
+void
+MD5_DestroyContext(MD5Context *cx, PRBool freeit)
+{
+    memset(cx, 0, sizeof *cx);
+    if (freeit) {
+        PORT_Free(cx);
+    }
+}
+
+void
+MD5_Begin(MD5Context *cx)
+{
+    cx->lsbInput = 0;
+    cx->msbInput = 0;
+    /*  memset(cx->inBuf, 0, sizeof(cx->inBuf)); */
+    cx->cv[0] = CV0_1;
+    cx->cv[1] = CV0_2;
+    cx->cv[2] = CV0_3;
+    cx->cv[3] = CV0_4;
+}
+
+#define cls(i32, s) (tmp = i32, tmp << s | tmp >> (32 - s))
+
+#if defined(SOLARIS) || defined(HPUX)
+#define addto64(sumhigh, sumlow, addend) \
+    sumlow += addend;                    \
+    sumhigh += (sumlow < addend);
+#else
+#define addto64(sumhigh, sumlow, addend) \
+    sumlow += addend;                    \
+    if (sumlow < addend)                 \
+        ++sumhigh;
+#endif
+
+#define MASK 0x00ff00ff
+#ifdef IS_LITTLE_ENDIAN
+#define lendian(i32) \
+    (i32)
+#else
+#define lendian(i32) \
+    (tmp = (i32 >> 16) | (i32 << 16), ((tmp & MASK) << 8) | ((tmp >> 8) & MASK))
+#endif
+
+#ifndef IS_LITTLE_ENDIAN
+
+#define lebytes(b4) \
+    ((b4)[3] << 24 | (b4)[2] << 16 | (b4)[1] << 8 | (b4)[0])
+
+static void
+md5_prep_state_le(MD5Context *cx)
+{
+    PRUint32 tmp;
+    cx->u.w[0] = lendian(cx->u.w[0]);
+    cx->u.w[1] = lendian(cx->u.w[1]);
+    cx->u.w[2] = lendian(cx->u.w[2]);
+    cx->u.w[3] = lendian(cx->u.w[3]);
+    cx->u.w[4] = lendian(cx->u.w[4]);
+    cx->u.w[5] = lendian(cx->u.w[5]);
+    cx->u.w[6] = lendian(cx->u.w[6]);
+    cx->u.w[7] = lendian(cx->u.w[7]);
+    cx->u.w[8] = lendian(cx->u.w[8]);
+    cx->u.w[9] = lendian(cx->u.w[9]);
+    cx->u.w[10] = lendian(cx->u.w[10]);
+    cx->u.w[11] = lendian(cx->u.w[11]);
+    cx->u.w[12] = lendian(cx->u.w[12]);
+    cx->u.w[13] = lendian(cx->u.w[13]);
+    cx->u.w[14] = lendian(cx->u.w[14]);
+    cx->u.w[15] = lendian(cx->u.w[15]);
+}
+
+static void
+md5_prep_buffer_le(MD5Context *cx, const PRUint8 *beBuf)
+{
+    cx->u.w[0] = lebytes(&beBuf[0]);
+    cx->u.w[1] = lebytes(&beBuf[4]);
+    cx->u.w[2] = lebytes(&beBuf[8]);
+    cx->u.w[3] = lebytes(&beBuf[12]);
+    cx->u.w[4] = lebytes(&beBuf[16]);
+    cx->u.w[5] = lebytes(&beBuf[20]);
+    cx->u.w[6] = lebytes(&beBuf[24]);
+    cx->u.w[7] = lebytes(&beBuf[28]);
+    cx->u.w[8] = lebytes(&beBuf[32]);
+    cx->u.w[9] = lebytes(&beBuf[36]);
+    cx->u.w[10] = lebytes(&beBuf[40]);
+    cx->u.w[11] = lebytes(&beBuf[44]);
+    cx->u.w[12] = lebytes(&beBuf[48]);
+    cx->u.w[13] = lebytes(&beBuf[52]);
+    cx->u.w[14] = lebytes(&beBuf[56]);
+    cx->u.w[15] = lebytes(&beBuf[60]);
+}
+#endif
+
+#define F(X, Y, Z) \
+    ((X & Y) | ((~X) & Z))
+
+#define G(X, Y, Z) \
+    ((X & Z) | (Y & (~Z)))
+
+#define H(X, Y, Z) \
+    (X ^ Y ^ Z)
+
+#define I(X, Y, Z) \
+    (Y ^ (X | (~Z)))
+
+#define FF(a, b, c, d, bufint, s, ti) \
+    a = b + cls(a + F(b, c, d) + bufint + ti, s)
+
+#define GG(a, b, c, d, bufint, s, ti) \
+    a = b + cls(a + G(b, c, d) + bufint + ti, s)
+
+#define HH(a, b, c, d, bufint, s, ti) \
+    a = b + cls(a + H(b, c, d) + bufint + ti, s)
+
+#define II(a, b, c, d, bufint, s, ti) \
+    a = b + cls(a + I(b, c, d) + bufint + ti, s)
+
+static void NO_SANITIZE_ALIGNMENT
+md5_compress(MD5Context *cx, const PRUint32 *wBuf)
+{
+    PRUint32 a, b, c, d;
+    PRUint32 tmp;
+    a = cx->cv[0];
+    b = cx->cv[1];
+    c = cx->cv[2];
+    d = cx->cv[3];
+    FF(a, b, c, d, wBuf[R1B0], S1_0, T1_0);
+    FF(d, a, b, c, wBuf[R1B1], S1_1, T1_1);
+    FF(c, d, a, b, wBuf[R1B2], S1_2, T1_2);
+    FF(b, c, d, a, wBuf[R1B3], S1_3, T1_3);
+    FF(a, b, c, d, wBuf[R1B4], S1_0, T1_4);
+    FF(d, a, b, c, wBuf[R1B5], S1_1, T1_5);
+    FF(c, d, a, b, wBuf[R1B6], S1_2, T1_6);
+    FF(b, c, d, a, wBuf[R1B7], S1_3, T1_7);
+    FF(a, b, c, d, wBuf[R1B8], S1_0, T1_8);
+    FF(d, a, b, c, wBuf[R1B9], S1_1, T1_9);
+    FF(c, d, a, b, wBuf[R1B10], S1_2, T1_10);
+    FF(b, c, d, a, wBuf[R1B11], S1_3, T1_11);
+    FF(a, b, c, d, wBuf[R1B12], S1_0, T1_12);
+    FF(d, a, b, c, wBuf[R1B13], S1_1, T1_13);
+    FF(c, d, a, b, wBuf[R1B14], S1_2, T1_14);
+    FF(b, c, d, a, wBuf[R1B15], S1_3, T1_15);
+    GG(a, b, c, d, wBuf[R2B0], S2_0, T2_0);
+    GG(d, a, b, c, wBuf[R2B1], S2_1, T2_1);
+    GG(c, d, a, b, wBuf[R2B2], S2_2, T2_2);
+    GG(b, c, d, a, wBuf[R2B3], S2_3, T2_3);
+    GG(a, b, c, d, wBuf[R2B4], S2_0, T2_4);
+    GG(d, a, b, c, wBuf[R2B5], S2_1, T2_5);
+    GG(c, d, a, b, wBuf[R2B6], S2_2, T2_6);
+    GG(b, c, d, a, wBuf[R2B7], S2_3, T2_7);
+    GG(a, b, c, d, wBuf[R2B8], S2_0, T2_8);
+    GG(d, a, b, c, wBuf[R2B9], S2_1, T2_9);
+    GG(c, d, a, b, wBuf[R2B10], S2_2, T2_10);
+    GG(b, c, d, a, wBuf[R2B11], S2_3, T2_11);
+    GG(a, b, c, d, wBuf[R2B12], S2_0, T2_12);
+    GG(d, a, b, c, wBuf[R2B13], S2_1, T2_13);
+    GG(c, d, a, b, wBuf[R2B14], S2_2, T2_14);
+    GG(b, c, d, a, wBuf[R2B15], S2_3, T2_15);
+    HH(a, b, c, d, wBuf[R3B0], S3_0, T3_0);
+    HH(d, a, b, c, wBuf[R3B1], S3_1, T3_1);
+    HH(c, d, a, b, wBuf[R3B2], S3_2, T3_2);
+    HH(b, c, d, a, wBuf[R3B3], S3_3, T3_3);
+    HH(a, b, c, d, wBuf[R3B4], S3_0, T3_4);
+    HH(d, a, b, c, wBuf[R3B5], S3_1, T3_5);
+    HH(c, d, a, b, wBuf[R3B6], S3_2, T3_6);
+    HH(b, c, d, a, wBuf[R3B7], S3_3, T3_7);
+    HH(a, b, c, d, wBuf[R3B8], S3_0, T3_8);
+    HH(d, a, b, c, wBuf[R3B9], S3_1, T3_9);
+    HH(c, d, a, b, wBuf[R3B10], S3_2, T3_10);
+    HH(b, c, d, a, wBuf[R3B11], S3_3, T3_11);
+    HH(a, b, c, d, wBuf[R3B12], S3_0, T3_12);
+    HH(d, a, b, c, wBuf[R3B13], S3_1, T3_13);
+    HH(c, d, a, b, wBuf[R3B14], S3_2, T3_14);
+    HH(b, c, d, a, wBuf[R3B15], S3_3, T3_15);
+    II(a, b, c, d, wBuf[R4B0], S4_0, T4_0);
+    II(d, a, b, c, wBuf[R4B1], S4_1, T4_1);
+    II(c, d, a, b, wBuf[R4B2], S4_2, T4_2);
+    II(b, c, d, a, wBuf[R4B3], S4_3, T4_3);
+    II(a, b, c, d, wBuf[R4B4], S4_0, T4_4);
+    II(d, a, b, c, wBuf[R4B5], S4_1, T4_5);
+    II(c, d, a, b, wBuf[R4B6], S4_2, T4_6);
+    II(b, c, d, a, wBuf[R4B7], S4_3, T4_7);
+    II(a, b, c, d, wBuf[R4B8], S4_0, T4_8);
+    II(d, a, b, c, wBuf[R4B9], S4_1, T4_9);
+    II(c, d, a, b, wBuf[R4B10], S4_2, T4_10);
+    II(b, c, d, a, wBuf[R4B11], S4_3, T4_11);
+    II(a, b, c, d, wBuf[R4B12], S4_0, T4_12);
+    II(d, a, b, c, wBuf[R4B13], S4_1, T4_13);
+    II(c, d, a, b, wBuf[R4B14], S4_2, T4_14);
+    II(b, c, d, a, wBuf[R4B15], S4_3, T4_15);
+    cx->cv[0] += a;
+    cx->cv[1] += b;
+    cx->cv[2] += c;
+    cx->cv[3] += d;
+}
+
+void
+MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+    PRUint32 bytesToConsume;
+    PRUint32 inBufIndex = cx->lsbInput & 63;
+    const PRUint32 *wBuf;
+
+    /* Add the number of input bytes to the 64-bit input counter. */
+    addto64(cx->msbInput, cx->lsbInput, inputLen);
+    if (inBufIndex) {
+        /* There is already data in the buffer.  Fill with input. */
+        bytesToConsume = PR_MIN(inputLen, MD5_BUFFER_SIZE - inBufIndex);
+        memcpy(&cx->inBuf[inBufIndex], input, bytesToConsume);
+        if (inBufIndex + bytesToConsume >= MD5_BUFFER_SIZE) {
+/* The buffer is filled.  Run the compression function. */
+#ifndef IS_LITTLE_ENDIAN
+            md5_prep_state_le(cx);
+#endif
+            md5_compress(cx, cx->u.w);
+        }
+        /* Remaining input. */
+        inputLen -= bytesToConsume;
+        input += bytesToConsume;
+    }
+
+    /* Iterate over 64-byte chunks of the message. */
+    while (inputLen >= MD5_BUFFER_SIZE) {
+#ifdef IS_LITTLE_ENDIAN
+#ifdef HAVE_UNALIGNED_ACCESS
+        /* x86 can handle arithmetic on non-word-aligned buffers */
+        wBuf = (PRUint32 *)input;
+#else
+        if ((ptrdiff_t)input & 0x3) {
+            /* buffer not aligned, copy it to force alignment */
+            memcpy(cx->inBuf, input, MD5_BUFFER_SIZE);
+            wBuf = cx->u.w;
+        } else {
+            /* buffer is aligned */
+            wBuf = (PRUint32 *)input;
+        }
+#endif
+#else
+        md5_prep_buffer_le(cx, input);
+        wBuf = cx->u.w;
+#endif
+        md5_compress(cx, wBuf);
+        inputLen -= MD5_BUFFER_SIZE;
+        input += MD5_BUFFER_SIZE;
+    }
+
+    /* Tail of message (message bytes mod 64). */
+    if (inputLen)
+        memcpy(cx->inBuf, input, inputLen);
+}
+
+static const unsigned char padbytes[] = {
+    0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+void
+MD5_End(MD5Context *cx, unsigned char *digest,
+        unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#ifndef IS_LITTLE_ENDIAN
+    PRUint32 tmp;
+#endif
+    PRUint32 lowInput, highInput;
+    PRUint32 inBufIndex = cx->lsbInput & 63;
+
+    if (maxDigestLen < MD5_HASH_LEN) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return;
+    }
+
+    /* Copy out the length of bits input before padding. */
+    lowInput = cx->lsbInput;
+    highInput = (cx->msbInput << 3) | (lowInput >> 29);
+    lowInput <<= 3;
+
+    if (inBufIndex < MD5_END_BUFFER) {
+        MD5_Update(cx, padbytes, MD5_END_BUFFER - inBufIndex);
+    } else {
+        MD5_Update(cx, padbytes,
+                   MD5_END_BUFFER + MD5_BUFFER_SIZE - inBufIndex);
+    }
+
+    /* Store the number of bytes input (before padding) in final 64 bits. */
+    cx->u.w[14] = lendian(lowInput);
+    cx->u.w[15] = lendian(highInput);
+
+/* Final call to compress. */
+#ifndef IS_LITTLE_ENDIAN
+    md5_prep_state_le(cx);
+#endif
+    md5_compress(cx, cx->u.w);
+
+    /* Copy the resulting values out of the chain variables into return buf. */
+    if (digestLen)
+        *digestLen = MD5_HASH_LEN;
+#ifndef IS_LITTLE_ENDIAN
+    cx->cv[0] = lendian(cx->cv[0]);
+    cx->cv[1] = lendian(cx->cv[1]);
+    cx->cv[2] = lendian(cx->cv[2]);
+    cx->cv[3] = lendian(cx->cv[3]);
+#endif
+    memcpy(digest, cx->cv, MD5_HASH_LEN);
+}
+
+void
+MD5_EndRaw(MD5Context *cx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#ifndef IS_LITTLE_ENDIAN
+    PRUint32 tmp;
+#endif
+    PRUint32 cv[4];
+
+    if (maxDigestLen < MD5_HASH_LEN) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return;
+    }
+
+    memcpy(cv, cx->cv, sizeof(cv));
+#ifndef IS_LITTLE_ENDIAN
+    cv[0] = lendian(cv[0]);
+    cv[1] = lendian(cv[1]);
+    cv[2] = lendian(cv[2]);
+    cv[3] = lendian(cv[3]);
+#endif
+    memcpy(digest, cv, MD5_HASH_LEN);
+    if (digestLen)
+        *digestLen = MD5_HASH_LEN;
+}
+
+unsigned int
+MD5_FlattenSize(MD5Context *cx)
+{
+    return sizeof(*cx);
+}
+
+SECStatus
+MD5_Flatten(MD5Context *cx, unsigned char *space)
+{
+    memcpy(space, cx, sizeof(*cx));
+    return SECSuccess;
+}
+
+MD5Context *
+MD5_Resurrect(unsigned char *space, void *arg)
+{
+    MD5Context *cx = MD5_NewContext();
+    if (cx)
+        memcpy(cx, space, sizeof(*cx));
+    return cx;
+}
+
+void
+MD5_Clone(MD5Context *dest, MD5Context *src)
+{
+    memcpy(dest, src, sizeof *dest);
+}
+
+void
+MD5_TraceState(MD5Context *cx)
+{
+    PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
diff --git a/security/nss/lib/freebl/mknewpc2.c b/security/nss/lib/freebl/mknewpc2.c
new file mode 100644
index 000000000..6b2968816
--- /dev/null
+++ b/security/nss/lib/freebl/mknewpc2.c
@@ -0,0 +1,208 @@
+/*
+ *  mknewpc2.c
+ *
+ *  Generate PC-2 tables for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+typedef unsigned char BYTE;
+typedef unsigned int HALF;
+
+#define DES_ENCRYPT 0
+#define DES_DECRYPT 1
+
+/* two 28-bit registers defined in key schedule production process */
+static HALF C0, D0;
+
+static HALF L0, R0;
+
+/* key schedule, 16 internal keys, each with 8 6-bit parts */
+static BYTE KS[8][16];
+
+/*
+ * This table takes the 56 bits in C0 and D0 and shows show they are
+ * permuted into the 8 6-bit parts of the key in the key schedule.
+ * The bits of C0 are numbered left to right, 1-28.
+ * The bits of D0 are numbered left to right, 29-56.
+ * Zeros in this table represent bits that are always zero.
+ * Note that all the bits in the first  4 rows come from C0,
+ *       and all the bits in the second 4 rows come from D0.
+ */
+static const BYTE PC2[64] = {
+    14, 17, 11, 24, 1, 5, 0, 0, /* S1 */
+    3, 28, 15, 6, 21, 10, 0, 0, /* S2 */
+    23, 19, 12, 4, 26, 8, 0, 0, /* S3 */
+    16, 7, 27, 20, 13, 2, 0, 0, /* S4 */
+
+    41, 52, 31, 37, 47, 55, 0, 0, /* S5 */
+    30, 40, 51, 45, 33, 48, 0, 0, /* S6 */
+    44, 49, 39, 56, 34, 53, 0, 0, /* S7 */
+    46, 42, 50, 36, 29, 32, 0, 0  /* S8 */
+};
+
+/* This table represents the same info as PC2, except that
+ * The bits of C0 and D0 are each numbered right to left, 0-27.
+ * -1 values indicate bits that are always zero.
+ * As before all the bits in the first  4 rows come from C0,
+ *       and all the bits in the second 4 rows come from D0.
+ */
+static signed char PC2a[64] = {
+    /* bits of C0 */
+    14, 11, 17, 4, 27, 23, -1, -1, /* S1 */
+    25, 0, 13, 22, 7, 18, -1, -1,  /* S2 */
+    5, 9, 16, 24, 2, 20, -1, -1,   /* S3 */
+    12, 21, 1, 8, 15, 26, -1, -1,  /* S4 */
+                                   /* bits of D0 */
+    15, 4, 25, 19, 9, 1, -1, -1,   /* S5 */
+    26, 16, 5, 11, 23, 8, -1, -1,  /* S6 */
+    12, 7, 17, 0, 22, 3, -1, -1,   /* S7 */
+    10, 14, 6, 20, 27, 24, -1, -1  /* S8 */
+};
+
+/* This table represents the same info as PC2a, except that
+ * The order of of the rows has been changed to increase the efficiency
+ * with which the key sechedule is created.
+ * Fewer shifts and ANDs are required to make the KS from these.
+ */
+static const signed char PC2b[64] = {
+    /* bits of C0 */
+    14, 11, 17, 4, 27, 23, -1, -1, /* S1 */
+    5, 9, 16, 24, 2, 20, -1, -1,   /* S3 */
+    25, 0, 13, 22, 7, 18, -1, -1,  /* S2 */
+    12, 21, 1, 8, 15, 26, -1, -1,  /* S4 */
+                                   /* bits of D0 */
+    26, 16, 5, 11, 23, 8, -1, -1,  /* S6 */
+    10, 14, 6, 20, 27, 24, -1, -1, /* S8 */
+    15, 4, 25, 19, 9, 1, -1, -1,   /* S5 */
+    12, 7, 17, 0, 22, 3, -1, -1    /* S7 */
+};
+
+/* Only 24 of the 28 bits in C0 and D0 are used in PC2.
+ * The used bits of C0 and D0 are grouped into 4 groups of 6,
+ * so that the PC2 permutation can be accomplished with 4 lookups
+ * in tables of 64 entries.
+ * The following table shows how the bits of C0 and D0 are grouped
+ * into indexes for the respective table lookups.
+ * Bits are numbered right-to-left, 0-27, as in PC2b.
+ */
+static BYTE NDX[48] = {
+    /* Bits of C0 */
+    27, 26, 25, 24, 23, 22, /* C0 table 0 */
+    18, 17, 16, 15, 14, 13, /* C0 table 1 */
+    9, 8, 7, 2, 1, 0,       /* C0 table 2 */
+    5, 4, 21, 20, 12, 11,   /* C0 table 3 */
+                            /* bits of D0 */
+    27, 26, 25, 24, 23, 22, /* D0 table 0 */
+    20, 19, 17, 16, 15, 14, /* D0 table 1 */
+    12, 11, 10, 9, 8, 7,    /* D0 table 2 */
+    6, 5, 4, 3, 1, 0        /* D0 table 3 */
+};
+
+/* Here's the code that does that grouping.
+    left   = PC2LOOKUP(0, 0, ((c0 >> 22) & 0x3F) );
+    left  |= PC2LOOKUP(0, 1, ((c0 >> 13) & 0x3F) );
+    left  |= PC2LOOKUP(0, 2, ((c0 >>  4) & 0x38) | (c0 & 0x7) );
+    left  |= PC2LOOKUP(0, 3, ((c0>>18)&0xC) | ((c0>>11)&0x3) | (c0&0x30));
+
+    right  = PC2LOOKUP(1, 0, ((d0 >> 22) & 0x3F) );
+    right |= PC2LOOKUP(1, 1, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf) );
+    right |= PC2LOOKUP(1, 2, ((d0 >>  7) & 0x3F) );
+    right |= PC2LOOKUP(1, 3, ((d0 >>  1) & 0x3C) | (d0 & 0x3));
+*/
+
+void
+make_pc2a(void)
+{
+
+    int i, j;
+
+    for (i = 0; i < 64; ++i) {
+        j = PC2[i];
+        if (j == 0)
+            j = -1;
+        else if (j < 29)
+            j = 28 - j;
+        else
+            j = 56 - j;
+        PC2a[i] = j;
+    }
+    for (i = 0; i < 64; i += 8) {
+        printf("%3d,%3d,%3d,%3d,%3d,%3d,%3d,%3d,\n",
+               PC2a[i + 0], PC2a[i + 1], PC2a[i + 2], PC2a[i + 3],
+               PC2a[i + 4], PC2a[i + 5], PC2a[i + 6], PC2a[i + 7]);
+    }
+}
+
+HALF PC2cd0[64];
+
+HALF PC_2H[8][64];
+
+void
+mktable()
+{
+    int i;
+    int table;
+    const BYTE* ndx = NDX;
+    HALF mask;
+
+    mask = 0x80000000;
+    for (i = 0; i < 32; ++i, mask >>= 1) {
+        int bit = PC2b[i];
+        if (bit < 0)
+            continue;
+        PC2cd0[bit + 32] = mask;
+    }
+
+    mask = 0x80000000;
+    for (i = 32; i < 64; ++i, mask >>= 1) {
+        int bit = PC2b[i];
+        if (bit < 0)
+            continue;
+        PC2cd0[bit] = mask;
+    }
+
+#if DEBUG
+    for (i = 0; i < 64; ++i) {
+        printf("0x%08x,\n", PC2cd0[i]);
+    }
+#endif
+    for (i = 0; i < 24; ++i) {
+        NDX[i] += 32; /* because c0 is the upper half */
+    }
+
+    for (table = 0; table < 8; ++table) {
+        HALF bitvals[6];
+        for (i = 0; i < 6; ++i) {
+            bitvals[5 - i] = PC2cd0[*ndx++];
+        }
+        for (i = 0; i < 64; ++i) {
+            int j;
+            int k = 0;
+            HALF value = 0;
+
+            for (j = i; j; j >>= 1, ++k) {
+                if (j & 1) {
+                    value |= bitvals[k];
+                }
+            }
+            PC_2H[table][i] = value;
+        }
+        printf("/* table %d */ {\n", table);
+        for (i = 0; i < 64; i += 4) {
+            printf("    0x%08x, 0x%08x, 0x%08x, 0x%08x, \n",
+                   PC_2H[table][i], PC_2H[table][i + 1],
+                   PC_2H[table][i + 2], PC_2H[table][i + 3]);
+        }
+        printf("  },\n");
+    }
+}
+
+int
+main(void)
+{
+    /*   make_pc2a(); */
+    mktable();
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mksp.c b/security/nss/lib/freebl/mksp.c
new file mode 100644
index 000000000..ca83ac8e7
--- /dev/null
+++ b/security/nss/lib/freebl/mksp.c
@@ -0,0 +1,119 @@
+/*
+ *  mksp.c
+ *
+ *  Generate SP tables for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+
+/*
+ * sboxes - the tables for the s-box functions
+ *        from FIPS 46, pages 15-16.
+ */
+unsigned char S[8][64] = {
+    /* Func S1 = */
+    { 14, 0, 4, 15, 13, 7, 1, 4, 2, 14, 15, 2, 11, 13, 8, 1,
+      3, 10, 10, 6, 6, 12, 12, 11, 5, 9, 9, 5, 0, 3, 7, 8,
+      4, 15, 1, 12, 14, 8, 8, 2, 13, 4, 6, 9, 2, 1, 11, 7,
+      15, 5, 12, 11, 9, 3, 7, 14, 3, 10, 10, 0, 5, 6, 0, 13 },
+    /* Func S2 = */
+    { 15, 3, 1, 13, 8, 4, 14, 7, 6, 15, 11, 2, 3, 8, 4, 14,
+      9, 12, 7, 0, 2, 1, 13, 10, 12, 6, 0, 9, 5, 11, 10, 5,
+      0, 13, 14, 8, 7, 10, 11, 1, 10, 3, 4, 15, 13, 4, 1, 2,
+      5, 11, 8, 6, 12, 7, 6, 12, 9, 0, 3, 5, 2, 14, 15, 9 },
+    /* Func S3 = */
+    { 10, 13, 0, 7, 9, 0, 14, 9, 6, 3, 3, 4, 15, 6, 5, 10,
+      1, 2, 13, 8, 12, 5, 7, 14, 11, 12, 4, 11, 2, 15, 8, 1,
+      13, 1, 6, 10, 4, 13, 9, 0, 8, 6, 15, 9, 3, 8, 0, 7,
+      11, 4, 1, 15, 2, 14, 12, 3, 5, 11, 10, 5, 14, 2, 7, 12 },
+    /* Func S4 = */
+    { 7, 13, 13, 8, 14, 11, 3, 5, 0, 6, 6, 15, 9, 0, 10, 3,
+      1, 4, 2, 7, 8, 2, 5, 12, 11, 1, 12, 10, 4, 14, 15, 9,
+      10, 3, 6, 15, 9, 0, 0, 6, 12, 10, 11, 1, 7, 13, 13, 8,
+      15, 9, 1, 4, 3, 5, 14, 11, 5, 12, 2, 7, 8, 2, 4, 14 },
+    /* Func S5 = */
+    { 2, 14, 12, 11, 4, 2, 1, 12, 7, 4, 10, 7, 11, 13, 6, 1,
+      8, 5, 5, 0, 3, 15, 15, 10, 13, 3, 0, 9, 14, 8, 9, 6,
+      4, 11, 2, 8, 1, 12, 11, 7, 10, 1, 13, 14, 7, 2, 8, 13,
+      15, 6, 9, 15, 12, 0, 5, 9, 6, 10, 3, 4, 0, 5, 14, 3 },
+    /* Func S6 = */
+    { 12, 10, 1, 15, 10, 4, 15, 2, 9, 7, 2, 12, 6, 9, 8, 5,
+      0, 6, 13, 1, 3, 13, 4, 14, 14, 0, 7, 11, 5, 3, 11, 8,
+      9, 4, 14, 3, 15, 2, 5, 12, 2, 9, 8, 5, 12, 15, 3, 10,
+      7, 11, 0, 14, 4, 1, 10, 7, 1, 6, 13, 0, 11, 8, 6, 13 },
+    /* Func S7 = */
+    { 4, 13, 11, 0, 2, 11, 14, 7, 15, 4, 0, 9, 8, 1, 13, 10,
+      3, 14, 12, 3, 9, 5, 7, 12, 5, 2, 10, 15, 6, 8, 1, 6,
+      1, 6, 4, 11, 11, 13, 13, 8, 12, 1, 3, 4, 7, 10, 14, 7,
+      10, 9, 15, 5, 6, 0, 8, 15, 0, 14, 5, 2, 9, 3, 2, 12 },
+    /* Func S8 = */
+    { 13, 1, 2, 15, 8, 13, 4, 8, 6, 10, 15, 3, 11, 7, 1, 4,
+      10, 12, 9, 5, 3, 6, 14, 11, 5, 0, 0, 14, 12, 9, 7, 2,
+      7, 2, 11, 1, 4, 14, 1, 7, 9, 4, 12, 10, 14, 8, 2, 13,
+      0, 15, 6, 12, 10, 9, 13, 0, 15, 3, 3, 5, 5, 6, 8, 11 }
+};
+
+/*
+ * Permutation function for results from s-boxes
+ *   from FIPS 46 pages 12 and 16.
+ * P =
+ */
+unsigned char P[32] = {
+    16, 7, 20, 21, 29, 12, 28, 17,
+    1, 15, 23, 26, 5, 18, 31, 10,
+    2, 8, 24, 14, 32, 27, 3, 9,
+    19, 13, 30, 6, 22, 11, 4, 25
+};
+
+unsigned int Pinv[32];
+unsigned int SP[8][64];
+
+void
+makePinv(void)
+{
+    int i;
+    unsigned int Pi = 0x80000000;
+    for (i = 0; i < 32; ++i) {
+        int j = 32 - P[i];
+        Pinv[j] = Pi;
+        Pi >>= 1;
+    }
+}
+
+void
+makeSP(void)
+{
+    int box;
+    for (box = 0; box < 8; ++box) {
+        int item;
+        printf("/* box S%d */ {\n", box + 1);
+        for (item = 0; item < 64; ++item) {
+            unsigned int s = S[box][item];
+            unsigned int val = 0;
+            unsigned int bitnum = (7 - box) * 4;
+            for (; s; s >>= 1, ++bitnum) {
+                if (s & 1) {
+                    val |= Pinv[bitnum];
+                }
+            }
+            val = (val << 3) | (val >> 29);
+            SP[box][item] = val;
+        }
+        for (item = 0; item < 64; item += 4) {
+            printf("\t0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
+                   SP[box][item], SP[box][item + 1], SP[box][item + 2], SP[box][item + 3]);
+        }
+        printf("    },\n");
+    }
+}
+
+int
+main()
+{
+    makePinv();
+    makeSP();
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/Makefile b/security/nss/lib/freebl/mpi/Makefile
new file mode 100644
index 000000000..0dee5bed1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/Makefile
@@ -0,0 +1,244 @@
+#
+# Makefile for MPI library
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+## Define CC to be the C compiler you wish to use.  The GNU cc
+## compiler (gcc) should work, at the very least
+#CC=cc
+#CC=gcc
+
+## 
+## Define PERL to point to your local Perl interpreter.  It
+## should be Perl 5.x, although it's conceivable that Perl 4
+## might work ... I haven't tested it.
+##
+#PERL=/usr/bin/perl
+#PERL=perl
+
+include target.mk
+
+CFLAGS+= $(XCFLAGS)
+
+##
+## Define LIBS to include any libraries you need to link against.
+## If NO_TABLE is define, LIBS should include '-lm' or whatever is
+## necessary to bring in the math library.  Otherwise, it can be
+## left alone, unless your system has other peculiar requirements.
+##
+LIBS=#-lmalloc#-lefence#-lm
+
+## 
+## Define RANLIB to be the library header randomizer; you might not
+## need this on some systems (just set it to 'echo' on these systems,
+## such as IRIX)
+##
+RANLIB=echo
+
+##
+## This is the version string used for the documentation and 
+## building the distribution tarball.  Don't mess with it unless
+## you are releasing a new version
+VERS=1.7p6
+
+## ----------------------------------------------------------------------
+## You probably don't need to change anything below this line...
+##
+
+##
+## This is the list of source files that need to be packed into
+## the distribution file
+SRCS=   mpi.c mpprime.c mplogic.c mp_gf2m.c mpmontg.c mpi-test.c primes.c \
+	mpcpucache.c tests/ \
+	utils/gcd.c utils/invmod.c utils/lap.c \
+	utils/ptab.pl utils/sieve.c utils/isprime.c\
+	utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
+	utils/bbsrand.c utils/prng.c utils/primegen.c \
+	utils/basecvt.c utils/makeprime.c\
+	utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
+	utils/mpi.h utils/mpprime.h mulsqr.c \
+	make-test-arrays test-arrays.txt all-tests make-logtab \
+	types.pl stats timetest multest
+
+## These are the header files that go into the distribution file
+HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h mp_gf2m.h \
+     mp_gf2m-priv.h utils/bbs_rand.h tests/mpi.h tests/mpprime.h
+
+## These are the documentation files that go into the distribution file
+DOCS=README doc utils/README utils/PRIMES 
+
+## This is the list of tools built by 'make tools'
+TOOLS=gcd invmod isprime lap dec2hex hex2dec primegen prng \
+	basecvt fact exptmod pi makeprime identest
+
+LIBOBJS = mpprime.o mpmontg.o mplogic.o mp_gf2m.o mpi.o mpcpucache.o $(AS_OBJS)
+LIBHDRS = mpi-config.h mpi-priv.h mpi.h
+APPHDRS = mpi-config.h mpi.h mplogic.h mp_gf2m.h mpprime.h
+
+help:
+	@ echo ""
+	@ echo "The following targets can be built with this Makefile:"
+	@ echo ""
+	@ echo "libmpi.a     - arithmetic and prime testing library"
+	@ echo "mpi-test     - test driver (requires MP_IOFUNC)"
+	@ echo "tools        - command line tools"
+	@ echo "doc          - manual pages for tools"
+	@ echo "clean        - clean up objects and such"
+	@ echo "distclean    - get ready for distribution"
+	@ echo "dist         - distribution tarball"
+	@ echo ""
+
+.SUFFIXES: .c .o .i
+
+.c.i:
+	$(CC) $(CFLAGS) -E $< > $@
+
+#.c.o: $*.h $*.c
+#	$(CC) $(CFLAGS) -c $<
+
+#---------------------------------------
+
+$(LIBOBJS): $(LIBHDRS)
+
+logtab.h: make-logtab
+	$(PERL) make-logtab > logtab.h
+
+mpi.o: mpi.c logtab.h $(LIBHDRS)
+
+mplogic.o: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
+
+mp_gf2m.o: mp_gf2m.c mpi-priv.h mp_gf2m.h mp_gf2m-priv.h $(LIBHDRS)
+
+mpmontg.o: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
+
+mpprime.o: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
+
+mpcpucache.o: mpcpucache.c $(LIBHDRS)
+
+mpi_mips.o: mpi_mips.s
+	$(CC) -o $@ $(ASFLAGS) -c mpi_mips.s
+
+mpi_sparc.o : montmulf.h
+
+mpv_sparcv9.s: vis_64.il mpv_sparc.c
+	$(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_64.il mpv_sparc.c
+
+mpv_sparcv8.s: vis_64.il mpv_sparc.c
+	$(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_32.il mpv_sparc.c
+
+montmulfv8.o montmulfv9.o mpv_sparcv8.o mpv_sparcv9.o : %.o : %.s 
+	$(CC) -o $@ $(SOLARIS_ASM_FLAGS) -c $<
+
+mpi_arm.o: mpi_arm.c $(LIBHDRS)
+
+# This rule is used to build the .s sources, which are then hand optimized.
+#montmulfv8.s montmulfv9.s : montmulf%.s : montmulf%.il montmulf.c montmulf.h 
+#	$(CC) -o $@ $(SOLARIS_ASM_FLAGS) -S montmulf$*.il montmulf.c
+
+
+libmpi.a: $(LIBOBJS)
+	ar -cvr libmpi.a $(LIBOBJS)
+	$(RANLIB) libmpi.a
+
+lib libs: libmpi.a
+
+mpi.i: mpi.h
+
+#---------------------------------------
+
+MPTESTOBJS = mptest1.o mptest2.o mptest3.o mptest3a.o mptest4.o mptest4a.o \
+	mptest4b.o mptest6.o mptest7.o mptest8.o mptest9.o mptestb.o
+MPTESTS = $(MPTESTOBJS:.o=)
+
+$(MPTESTOBJS): mptest%.o: tests/mptest-%.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -o $@ -c $<
+
+$(MPTESTS): mptest%: mptest%.o libmpi.a
+	$(CC) $(CFLAGS) -o $@ $^  $(LIBS)
+
+tests: mptest1 mptest2 mptest3 mptest3a mptest4 mptest4a mptest4b mptest6 \
+	mptestb bbsrand
+
+utests: mptest7 mptest8 mptest9
+
+#---------------------------------------
+
+EXTRAOBJS = bbsrand.o bbs_rand.o prng.o
+UTILOBJS = primegen.o metime.o identest.o basecvt.o fact.o exptmod.o pi.o \
+	makeprime.o gcd.o invmod.o lap.o isprime.o \
+	dec2hex.o hex2dec.o
+UTILS = $(UTILOBJS:.o=) 
+
+$(UTILS): % : %.o libmpi.a
+	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+$(UTILOBJS) $(EXTRAOBJS): %.o : utils/%.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -o $@ -c $<
+
+prng: prng.o bbs_rand.o libmpi.a
+	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+bbsrand: bbsrand.o bbs_rand.o libmpi.a
+	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+utils: $(UTILS) prng bbsrand
+
+#---------------------------------------
+
+test-info.c: test-arrays.txt
+	$(PERL) make-test-arrays test-arrays.txt > test-info.c
+
+mpi-test.o: mpi-test.c test-info.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -o $@ -c $<
+
+mpi-test: mpi-test.o libmpi.a
+	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+mdxptest.o: mdxptest.c $(LIBHDRS) mpi-priv.h
+
+mdxptest: mdxptest.o libmpi.a
+	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+mulsqr.o: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h 
+	$(CC) $(CFLAGS) -DMP_SQUARE=1 -o $@ -c mulsqr.c 
+
+mulsqr: mulsqr.o libmpi.a
+	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+#---------------------------------------
+
+alltests: tests utests mpi-test
+
+tools: $(TOOLS)
+
+doc:
+	(cd doc; ./build)
+
+clean:
+	rm -f *.o *.a *.i
+	rm -f core
+	rm -f *~ .*~
+	rm -f utils/*.o
+	rm -f utils/core
+	rm -f utils/*~ utils/.*~
+
+clobber: clean
+	rm -f $(TOOLS) $(UTILS)
+
+distclean: clean
+	rm -f mptest? mpi-test metime mulsqr karatsuba
+	rm -f mptest?a mptest?b
+	rm -f utils/mptest?
+	rm -f test-info.c logtab.h
+	rm -f libmpi.a
+	rm -f $(TOOLS)
+
+dist: Makefile $(HDRS) $(SRCS) $(DOCS)
+	tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
+	pgps -ab mpi-$(VERS).tar
+	chmod +r mpi-$(VERS).tar.asc
+	gzip -9 mpi-$(VERS).tar
+
+# END
diff --git a/security/nss/lib/freebl/mpi/Makefile.os2 b/security/nss/lib/freebl/mpi/Makefile.os2
new file mode 100644
index 000000000..fa705ee08
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/Makefile.os2
@@ -0,0 +1,243 @@
+#
+# Makefile.win - gmake Makefile for building MPI with VACPP on OS/2
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+## Define CC to be the C compiler you wish to use.  The GNU cc
+## compiler (gcc) should work, at the very least
+#CC=cc
+#CC=gcc
+CC=icc.exe
+AS=alp.exe
+
+## 
+## Define PERL to point to your local Perl interpreter.  It
+## should be Perl 5.x, although it's conceivable that Perl 4
+## might work ... I haven't tested it.
+##
+#PERL=/usr/bin/perl
+#PERL=perl
+
+##
+## Define CFLAGS to contain any local options your compiler
+## setup requires.
+##
+## Conditional compilation options are no longer here; see
+## the file 'mpi-config.h' instead.
+##
+MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+
+#OS/2
+AS_SRCS = mpi_x86.asm
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
+#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \
+ -DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+CFLAGS = /Ti+ -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ $(MPICMN)
+ASFLAGS =
+
+##
+## Define LIBS to include any libraries you need to link against.
+## If NO_TABLE is define, LIBS should include '-lm' or whatever is
+## necessary to bring in the math library.  Otherwise, it can be
+## left alone, unless your system has other peculiar requirements.
+##
+LIBS=#-lmalloc#-lefence#-lm
+
+## 
+## Define RANLIB to be the library header randomizer; you might not
+## need this on some systems (just set it to 'echo' on these systems,
+## such as IRIX)
+##
+RANLIB=echo
+
+##
+## This is the version string used for the documentation and 
+## building the distribution tarball.  Don't mess with it unless
+## you are releasing a new version
+VERS=1.7p6
+
+## ----------------------------------------------------------------------
+## You probably don't need to change anything below this line...
+##
+
+##
+## This is the list of source files that need to be packed into
+## the distribution file
+SRCS=   mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \
+	utils/gcd.c utils/invmod.c utils/lap.c \
+	utils/ptab.pl utils/sieve.c utils/isprime.c\
+	utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
+	utils/bbsrand.c utils/prng.c utils/primegen.c \
+	utils/basecvt.c utils/makeprime.c\
+	utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
+	utils/mpi.h utils/mpprime.h mulsqr.c \
+	make-test-arrays test-arrays.txt all-tests make-logtab \
+	types.pl stats timetest multest
+
+## These are the header files that go into the distribution file
+HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \
+     utils/bbs_rand.h tests/mpi.h tests/mpprime.h
+
+## These are the documentation files that go into the distribution file
+DOCS=README doc utils/README utils/PRIMES 
+
+## This is the list of tools built by 'make tools'
+TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \
+ primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe
+
+AS_OBJS = $(AS_SRCS:.asm=.obj)
+LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS)
+LIBHDRS = mpi-config.h mpi-priv.h mpi.h
+APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h
+
+
+help:
+	@ echo ""
+	@ echo "The following targets can be built with this Makefile:"
+	@ echo ""
+	@ echo "mpi.lib      - arithmetic and prime testing library"
+	@ echo "mpi-test.exe - test driver (requires MP_IOFUNC)"
+	@ echo "tools        - command line tools"
+	@ echo "doc          - manual pages for tools"
+	@ echo "clean        - clean up objects and such"
+	@ echo "distclean    - get ready for distribution"
+	@ echo "dist         - distribution tarball"
+	@ echo ""
+
+.SUFFIXES: .c .obj .i .lib .exe .asm
+
+.c.i:
+	$(CC) $(CFLAGS) -E $< > $@
+
+.c.obj: 
+	$(CC) $(CFLAGS) -c $<
+
+.asm.obj:
+	$(AS) $(ASFLAGS) $<
+
+.obj.exe:
+	$(CC) $(CFLAGS) -Fo$@ $<
+
+#---------------------------------------
+
+$(LIBOBJS): $(LIBHDRS)
+
+logtab.h: make-logtab
+	$(PERL) make-logtab > logtab.h
+
+mpi.obj: mpi.c logtab.h $(LIBHDRS)
+
+mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
+
+mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
+
+mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
+
+mpi_mips.obj: mpi_mips.s
+	$(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s
+
+mpi.lib: $(LIBOBJS)
+	ilib /out:mpi.lib $(LIBOBJS)
+	$(RANLIB) mpi.lib
+
+lib libs: mpi.lib
+
+#---------------------------------------
+
+MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \
+ mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj
+MPTESTS = $(MPTESTOBJS:.obj=.exe)
+
+$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -Fo$@ -c $<
+
+$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^ 
+
+tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \
+ mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe
+
+utests: mptest7.exe mptest8.exe mptest9.exe
+
+#---------------------------------------
+
+EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj
+UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \
+ exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \
+ isprime.obj dec2hex.obj hex2dec.obj
+UTILS = $(UTILOBJS:.obj=.exe) 
+
+$(UTILS): %.exe : %.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^ 
+
+$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -Fo$@ -c $<
+
+prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+utils: $(UTILS) prng.exe bbsrand.exe
+
+#---------------------------------------
+
+test-info.c: test-arrays.txt
+	$(PERL) make-test-arrays test-arrays.txt > test-info.c
+
+mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -Fo$@ -c $<
+
+mpi-test.exe: mpi-test.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h
+
+mdxptest.exe: mdxptest.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h 
+	$(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c 
+
+mulsqr.exe: mulsqr.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+#---------------------------------------
+
+alltests: tests utests mpi-test.exe
+
+tools: $(TOOLS)
+
+doc:
+	(cd doc; ./build)
+
+clean:
+	rm -f *.obj *.lib *.pdb *.ilk
+	cd utils; rm -f *.obj *.lib *.pdb *.ilk
+
+distclean: clean
+	rm -f mptest? mpi-test metime mulsqr karatsuba
+	rm -f mptest?a mptest?b
+	rm -f utils/mptest?
+	rm -f test-info.c logtab.h
+	rm -f mpi.lib
+	rm -f $(TOOLS)
+
+dist: Makefile $(HDRS) $(SRCS) $(DOCS)
+	tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
+	pgps -ab mpi-$(VERS).tar
+	chmod +r mpi-$(VERS).tar.asc
+	gzip -9 mpi-$(VERS).tar
+
+
+print: 
+	@echo LIBOBJS = $(LIBOBJS)
+# END
diff --git a/security/nss/lib/freebl/mpi/Makefile.win b/security/nss/lib/freebl/mpi/Makefile.win
new file mode 100644
index 000000000..cd41dfab8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/Makefile.win
@@ -0,0 +1,254 @@
+#
+# Makefile.win - gmake Makefile for building MPI with MSVC on NT
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+## Define CC to be the C compiler you wish to use.  The GNU cc
+## compiler (gcc) should work, at the very least
+#CC=cc
+#CC=gcc
+CC=cl.exe
+ifeq ($(CPU_ARCH),x86_64)
+AS=ml64.exe
+else
+AS=ml.exe
+endif
+
+## 
+## Define PERL to point to your local Perl interpreter.  It
+## should be Perl 5.x, although it's conceivable that Perl 4
+## might work ... I haven't tested it.
+##
+#PERL=/usr/bin/perl
+#PERL=perl
+
+##
+## Define CFLAGS to contain any local options your compiler
+## setup requires.
+##
+## Conditional compilation options are no longer here; see
+## the file 'mpi-config.h' instead.
+##
+MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC
+
+ifeq ($(CPU_ARCH),x86_64)
+AS_SRCS = mpi_x86_64.asm
+CFLAGS = -O2 -Z7 -MD -W3 -nologo -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WIN64 -D_AMD64_ -D_M_AMD64 -D_WINDOWS -DWIN95 $(MPICMN)
+ASFLAGS = -Cp -Sn -Zi -I.
+else
+#NT
+AS_SRCS = mpi_x86.asm
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
+#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \
+ -DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+CFLAGS = -O2 -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+ASFLAGS = -Cp -Sn -Zi -coff -I. 
+endif
+
+##
+## Define LIBS to include any libraries you need to link against.
+## If NO_TABLE is define, LIBS should include '-lm' or whatever is
+## necessary to bring in the math library.  Otherwise, it can be
+## left alone, unless your system has other peculiar requirements.
+##
+LIBS=#-lmalloc#-lefence#-lm
+
+## 
+## Define RANLIB to be the library header randomizer; you might not
+## need this on some systems (just set it to 'echo' on these systems,
+## such as IRIX)
+##
+RANLIB=echo
+
+##
+## This is the version string used for the documentation and 
+## building the distribution tarball.  Don't mess with it unless
+## you are releasing a new version
+VERS=1.7p6
+
+## ----------------------------------------------------------------------
+## You probably don't need to change anything below this line...
+##
+
+##
+## This is the list of source files that need to be packed into
+## the distribution file
+SRCS=   mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \
+	utils/gcd.c utils/invmod.c utils/lap.c \
+	utils/ptab.pl utils/sieve.c utils/isprime.c\
+	utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
+	utils/bbsrand.c utils/prng.c utils/primegen.c \
+	utils/basecvt.c utils/makeprime.c\
+	utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
+	utils/mpi.h utils/mpprime.h mulsqr.c \
+	make-test-arrays test-arrays.txt all-tests make-logtab \
+	types.pl stats timetest multest
+
+## These are the header files that go into the distribution file
+HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \
+     utils/bbs_rand.h tests/mpi.h tests/mpprime.h
+
+## These are the documentation files that go into the distribution file
+DOCS=README doc utils/README utils/PRIMES 
+
+## This is the list of tools built by 'make tools'
+TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \
+ primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe
+
+AS_OBJS = $(AS_SRCS:.asm=.obj)
+LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS)
+LIBHDRS = mpi-config.h mpi-priv.h mpi.h
+APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h
+
+
+help:
+	@ echo ""
+	@ echo "The following targets can be built with this Makefile:"
+	@ echo ""
+	@ echo "mpi.lib     - arithmetic and prime testing library"
+	@ echo "mpi-test     - test driver (requires MP_IOFUNC)"
+	@ echo "tools        - command line tools"
+	@ echo "doc          - manual pages for tools"
+	@ echo "clean        - clean up objects and such"
+	@ echo "distclean    - get ready for distribution"
+	@ echo "dist         - distribution tarball"
+	@ echo ""
+
+.SUFFIXES: .c .obj .i .lib .exe .asm
+
+.c.i:
+	$(CC) $(CFLAGS) -E $< > $@
+
+.c.obj: 
+	$(CC) $(CFLAGS) -c $<
+
+.asm.obj:
+	$(AS) $(ASFLAGS) -c $<
+
+.obj.exe:
+	$(CC) $(CFLAGS) -Fo$@ $<
+
+#---------------------------------------
+
+$(LIBOBJS): $(LIBHDRS)
+
+logtab.h: make-logtab
+	$(PERL) make-logtab > logtab.h
+
+mpi.obj: mpi.c logtab.h $(LIBHDRS)
+
+mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
+
+mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
+
+mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
+
+mpi_mips.obj: mpi_mips.s
+	$(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s
+
+mpi.lib: $(LIBOBJS)
+	ar -cvr mpi.lib $(LIBOBJS)
+	$(RANLIB) mpi.lib
+
+lib libs: mpi.lib
+
+#---------------------------------------
+
+MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \
+ mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj
+MPTESTS = $(MPTESTOBJS:.obj=.exe)
+
+$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -Fo$@ -c $<
+
+$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^ 
+
+tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \
+ mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe
+
+utests: mptest7.exe mptest8.exe mptest9.exe
+
+#---------------------------------------
+
+EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj
+UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \
+ exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \
+ isprime.obj dec2hex.obj hex2dec.obj
+UTILS = $(UTILOBJS:.obj=.exe) 
+
+$(UTILS): %.exe : %.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^ 
+
+$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -Fo$@ -c $<
+
+prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+utils: $(UTILS) prng.exe bbsrand.exe
+
+#---------------------------------------
+
+test-info.c: test-arrays.txt
+	$(PERL) make-test-arrays test-arrays.txt > test-info.c
+
+mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS)
+	$(CC) $(CFLAGS) -Fo$@ -c $<
+
+mpi-test.exe: mpi-test.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h
+
+mdxptest.exe: mdxptest.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h 
+	$(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c 
+
+mulsqr.exe: mulsqr.obj mpi.lib $(LIBS)
+	$(CC) $(CFLAGS) -Fo$@ $^
+
+#---------------------------------------
+
+alltests: tests utests mpi-test.exe
+
+tools: $(TOOLS)
+
+doc:
+	(cd doc; ./build)
+
+clean:
+	rm -f *.obj *.lib *.pdb *.ilk
+	cd utils; rm -f *.obj *.lib *.pdb *.ilk
+
+distclean: clean
+	rm -f mptest? mpi-test metime mulsqr karatsuba
+	rm -f mptest?a mptest?b
+	rm -f utils/mptest?
+	rm -f test-info.c logtab.h
+	rm -f mpi.lib
+	rm -f $(TOOLS)
+
+dist: Makefile $(HDRS) $(SRCS) $(DOCS)
+	tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
+	pgps -ab mpi-$(VERS).tar
+	chmod +r mpi-$(VERS).tar.asc
+	gzip -9 mpi-$(VERS).tar
+
+
+print: 
+	@echo LIBOBJS = $(LIBOBJS)
+# END
diff --git a/security/nss/lib/freebl/mpi/README b/security/nss/lib/freebl/mpi/README
new file mode 100644
index 000000000..475549bad
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/README
@@ -0,0 +1,749 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+About the MPI Library
+---------------------
+
+The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision
+signed integer arithmetic package.  The implementation is not the most
+efficient possible, but the code is small and should be fairly easily
+portable to just about any machine that supports an ANSI C compiler,
+as long as it is capable of at least 16-bit arithmetic (but also see
+below for more on this).
+
+This library was written with an eye to cryptographic applications;
+thus, some care is taken to make sure that temporary values are not
+left lying around in memory when they are no longer in use.  This adds
+some overhead for zeroing buffers before they are released back into
+the free pool; however, it gives you the assurance that there is only
+one copy of your important values residing in your process's address
+space at a time.  Obviously, it is difficult to guarantee anything, in
+a pre-emptive multitasking environment, but this at least helps you
+keep a lid on the more obvious ways your data can get spread around in
+memory.
+
+
+Using the Library
+-----------------
+
+To use the MPI library in your program, you must include the header:
+
+#include "mpi.h"
+
+This header provides all the type and function declarations you'll
+need to use the library.  Almost all the names defined by the library
+begin with the prefix 'mp_', so it should be easy to keep them from
+clashing with your program's namespace (he says, glibly, knowing full
+well there are always pathological cases).
+
+There are a few things you may want to configure about the library.
+By default, the MPI library uses an unsigned short for its digit type,
+and an unsigned int for its word type.  The word type must be big
+enough to contain at least two digits, for the primitive arithmetic to
+work out.  On my machine, a short is 2 bytes and an int is 4 bytes --
+but if you have 64-bit ints, you might want to use a 4-byte digit and
+an 8-byte word.  I have tested the library using 1-byte digits and
+2-byte words, as well.  Whatever you choose to do, the things you need
+to change are:
+
+(1) The type definitions for mp_digit and mp_word.
+
+(2) The macro DIGIT_FMT which tells mp_print() how to display a
+    single digit.  This is just a printf() format string, so you
+    can adjust it appropriately.
+
+(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the 
+    largest value expressible in an mp_digit and an mp_word,
+    respectively.
+
+Both the mp_digit and mp_word should be UNSIGNED integer types.  The
+code relies on having the full positive precision of the type used for
+digits and words.
+
+The remaining type definitions should be left alone, for the most
+part.  The code in the library does not make any significant
+assumptions about the sizes of things, but there is little if any
+reason to change the other parameters, so I would recommend you leave
+them as you found them.
+
+The library comes with a Perl script, 'types.pl', which will scan your
+current Makefile settings, and attempt to find good definitions for
+these types.  It relies on a Unix sort of build environment, so it
+probably won't work under MacOS or Windows, but it can be convenient
+if you're porting to a new flavour of Unix.  Just run 'types.pl' at
+the command line, and it will spit out its results to the standard
+output.
+
+
+Conventions
+-----------
+
+Most functions in the library return a value of type mp_err.  This
+permits the library to communicate success or various kinds of failure
+to the calling program.  The return values currently defined are:
+
+        MP_OKAY         - okay, operation succeeded, all's well
+        MP_YES          - okay, the answer is yes (same as MP_OKAY)
+        MP_NO           - okay, but answer is no (not MP_OKAY)
+        MP_MEM          - operation ran out of memory
+        MP_RANGE        - input parameter was out of range
+        MP_BADARG       - an invalid input parameter was provided
+        MP_UNDEF        - no output value is defined for this input
+
+The only function which currently uses MP_UNDEF is mp_invmod().
+Division by zero is undefined, but the division functions will return
+MP_RANGE for a zero divisor.  MP_BADARG usually means you passed a
+bogus mp_int structure to the function.  MP_YES and MP_NO are not used
+by the library itself; they're defined so you can use them in your own
+extensions.
+
+If you need a readable interpretation of these error codes in your
+program, you may also use the mp_strerror() function.  This function
+takes an mp_err as input, and returns a pointer to a human-readable
+string describing the meaning of the error.  These strings are stored
+as constants within the library, so the caller should not attempt to
+modify or free the memory associated with these strings.
+
+The library represents values in signed-magnitude format.  Values
+strictly less than zero are negative, all others are considered
+positive (zero is positive by fiat).  You can access the 'sign' member
+of the mp_int structure directly, but better is to use the mp_cmp_z()
+function, to find out which side of zero the value lies on.
+
+Most arithmetic functions have a single-digit variant, as well as the
+full arbitrary-precision.  An mp_digit is an unsigned value between 0
+and DIGIT_MAX inclusive.  The radix is available as RADIX.  The number
+of bits in a given digit is given as DIGIT_BIT.
+
+Generally, input parameters are given before output parameters.
+Unless otherwise specified, any input parameter can be re-used as an
+output parameter, without confusing anything.
+
+The basic numeric type defined by the library is an mp_int.  Virtually
+all the functions in the library take a pointer to an mp_int as one of
+their parameters.  An explanation of how to create and use these
+structures follows.  And so, without further ado...
+
+
+Initialization and Cleanup
+--------------------------
+
+The basic numeric type defined by the library is an 'mp_int'.
+However, it is not sufficient to simply declare a variable of type
+mp_int in your program.  These variables also need to be initialized
+before they can be used, to allocate the internal storage they require
+for computation.
+
+This is done using one of the following functions:
+
+        mp_init(mp_int *mp);
+        mp_init_copy(mp_int *mp, mp_int *from);
+        mp_init_size(mp_int *mp, mp_size p);
+
+Each of these requires a pointer to a structure of type mp_int.  The
+basic mp_init() simply initializes the mp_int to a default size, and
+sets its value to zero.  If you would like to initialize a copy of an
+existing mp_int, use mp_init_copy(), where the 'from' parameter is the
+mp_int you'd like to make a copy of.  The third function,
+mp_init_size(), permits you to specify how many digits of precision
+should be preallocated for your mp_int.  This can help the library
+avoid unnecessary re-allocations later on.
+
+The default precision used by mp_init() can be retrieved using:
+
+        precision = mp_get_prec();
+
+This returns the number of digits that will be allocated.  You can
+change this value by using:
+
+        mp_set_prec(unsigned int prec);
+
+Any positive value is acceptable -- if you pass zero, the default
+precision will be re-set to the compiled-in library default (this is
+specified in the header file 'mpi-config.h', and typically defaults to
+8 or 16).
+
+Just as you must allocate an mp_int before you can use it, you must
+clean up the structure when you are done with it.  This is performed
+using the mp_clear() function.  Remember that any mp_int that you
+create as a local variable in a function must be mp_clear()'d before
+that function exits, or else the memory allocated to that mp_int will
+be orphaned and unrecoverable.
+
+To set an mp_int to a given value, the following functions are given:
+
+        mp_set(mp_int *mp, mp_digit d);
+        mp_set_int(mp_int *mp, long z);
+
+The mp_set() function sets the mp_int to a single digit value, while
+mp_set_int() sets the mp_int to a signed long integer value.
+
+To set an mp_int to zero, use:
+
+        mp_zero(mp_int *mp);
+
+
+Copying and Moving
+------------------
+
+If you have two initialized mp_int's, and you want to copy the value
+of one into the other, use:
+
+        mp_copy(from, to)
+
+This takes care of clearing the old value of 'to', and copies the new
+value into it.  If 'to' is not yet initialized, use mp_init_copy()
+instead (see above).
+
+Note:   The library tries, whenever possible, to avoid allocating
+----    new memory.  Thus, mp_copy() tries first to satisfy the needs
+        of the copy by re-using the memory already allocated to 'to'.
+        Only if this proves insufficient will mp_copy() actually
+        allocate new memory.
+
+        For this reason, if you know a priori that 'to' has enough
+        available space to hold 'from', you don't need to check the
+        return value of mp_copy() for memory failure.  The USED()
+        macro tells you how many digits are used by an mp_int, and
+        the ALLOC() macro tells you how many are allocated.
+
+If you have two initialized mp_int's, and you want to exchange their
+values, use:
+
+        mp_exch(a, b)
+
+This is better than using mp_copy() with a temporary, since it will
+not (ever) touch the memory allocator -- it just swaps the exact
+contents of the two structures.  The mp_exch() function cannot fail;
+if you pass it an invalid structure, it just ignores it, and does
+nothing.
+
+
+Basic Arithmetic
+----------------
+
+Once you have initialized your integers, you can operate on them.  The
+basic arithmetic functions on full mp_int values are:
+
+mp_add(a, b, c)         - computes c = a + b
+mp_sub(a, b, c)         - computes c = a - b
+mp_mul(a, b, c)         - computes c = a * b
+mp_sqr(a, b)            - computes b = a * a
+mp_div(a, b, q, r)      - computes q, r such that a = bq + r
+mp_div_2d(a, d, q, r)   - computes q = a / 2^d, r = a % 2^d
+mp_expt(a, b, c)        - computes c = a ** b
+mp_2expt(a, k)          - computes a = 2^k
+
+The mp_div_2d() function efficiently computes division by powers of
+two.  Either the q or r parameter may be NULL, in which case that
+portion of the computation will be discarded.
+
+The algorithms used for some of the computations here are described in
+the following files which are included with this distribution:
+
+mul.txt         Describes the multiplication algorithm
+div.txt         Describes the division algorithm
+expt.txt        Describes the exponentiation algorithm
+sqrt.txt        Describes the square-root algorithm
+square.txt      Describes the squaring algorithm
+
+There are single-digit versions of most of these routines, as well.
+In the following prototypes, 'd' is a single mp_digit:
+
+mp_add_d(a, d, c)       - computes c = a + d
+mp_sub_d(a, d, c)       - computes c = a - d
+mp_mul_d(a, d, c)       - computes c = a * d
+mp_mul_2(a, c)          - computes c = a * 2
+mp_div_d(a, d, q, r)    - computes q, r such that a = bq + r
+mp_div_2(a, c)          - computes c = a / 2
+mp_expt_d(a, d, c)      - computes c = a ** d
+
+The mp_mul_2() and mp_div_2() functions take advantage of the internal
+representation of an mp_int to do multiplication by two more quickly
+than mp_mul_d() would.  Other basic functions of an arithmetic variety
+include:
+
+mp_zero(a)              - assign 0 to a
+mp_neg(a, c)            - negate a: c = -a
+mp_abs(a, c)            - absolute value: c = |a|
+
+
+Comparisons
+-----------
+
+Several comparison functions are provided.  Each of these, unless
+otherwise specified, returns zero if the comparands are equal, < 0 if
+the first is less than the second, and > 0 if the first is greater
+than the second:
+
+mp_cmp_z(a)             - compare a <=> 0
+mp_cmp_d(a, d)          - compare a <=> d, d is a single digit
+mp_cmp(a, b)            - compare a <=> b
+mp_cmp_mag(a, b)        - compare |a| <=> |b|
+mp_isodd(a)             - return nonzero if odd, zero otherwise
+mp_iseven(a)            - return nonzero if even, zero otherwise
+
+
+Modular Arithmetic
+------------------
+
+Modular variations of the basic arithmetic functions are also
+supported.  These are available if the MP_MODARITH parameter in
+mpi-config.h is turned on (it is by default).  The modular arithmetic
+functions are:
+
+mp_mod(a, m, c)         - compute c = a (mod m), 0 <= c < m
+mp_mod_d(a, d, c)       - compute c = a (mod d), 0 <= c < d (see below)
+mp_addmod(a, b, m, c)   - compute c = (a + b) mod m
+mp_submod(a, b, m, c)   - compute c = (a - b) mod m
+mp_mulmod(a, b, m, c)   - compute c = (a * b) mod m
+mp_sqrmod(a, m, c)      - compute c = (a * a) mod m
+mp_exptmod(a, b, m, c)  - compute c = (a ** b) mod m
+mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m
+
+The mp_sqr() function squares its input argument.  A call to mp_sqr(a,
+c) is identical in meaning to mp_mul(a, a, c); however, if the
+MP_SQUARE variable is set true in mpi-config.h (see below), then it
+will be implemented with a different algorithm, that is supposed to
+take advantage of the redundant computation that takes place during
+squaring.  Unfortunately, some compilers result in worse performance
+on this code, so you can change the behaviour at will.  There is a
+utility program "mulsqr.c" that lets you test which does better on
+your system.
+
+The mp_sqrmod() function is analogous to the mp_sqr() function; it
+uses the mp_sqr() function rather than mp_mul(), and then performs the
+modular reduction.  This probably won't help much unless you are doing
+a lot of them.
+
+See the file 'square.txt' for a synopsis of the algorithm used.
+
+Note:   The mp_mod_d() function computes a modular reduction around
+----    a single digit d.  The result is a single digit c.
+
+Because an inverse is defined for a (mod m) if and only if (a, m) = 1
+(that is, if a and m are relatively prime), mp_invmod() may not be
+able to compute an inverse for the arguments.  In this case, it
+returns the value MP_UNDEF, and does not modify c.  If an inverse is
+defined, however, it returns MP_OKAY, and sets c to the value of the
+inverse (mod m).
+
+See the file 'redux.txt' for a description of the modular reduction
+algorithm used by mp_exptmod().
+
+
+Greatest Common Divisor
+-----------------------
+
+If The greates common divisor of two values can be found using one of the
+following functions:
+
+mp_gcd(a, b, c)         - compute c = (a, b) using binary algorithm
+mp_lcm(a, b, c)         - compute c = [a, b] = ab / (a, b)
+mp_xgcd(a, b, g, x, y)  - compute g, x, y so that ax + by = g = (a, b)
+
+Also provided is a function to compute modular inverses, if they
+exist:
+
+mp_invmod(a, m, c)      - compute c = a^-1 (mod m), if it exists
+
+The function mp_xgcd() computes the greatest common divisor, and also
+returns values of x and y satisfying Bezout's identity.  This is used
+by mp_invmod() to find modular inverses.  However, if you do not need
+these values, you will find that mp_gcd() is MUCH more efficient,
+since it doesn't need all the intermediate values that mp_xgcd()
+requires in order to compute x and y. 
+
+The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD
+algorithm due to Josef Stein.
+
+
+Input & Output Functions
+------------------------
+
+The following basic I/O routines are provided.  These are present at
+all times:
+
+mp_read_radix(mp, str, r)  - convert a string in radix r to an mp_int
+mp_read_raw(mp, s, len)    - convert a string of bytes to an mp_int
+mp_radix_size(mp, r)       - return length of buffer needed by mp_toradix()
+mp_raw_size(mp)            - return length of buffer needed by mp_toraw()
+mp_toradix(mp, str, r)     - convert an mp_int to a string of radix r 
+                             digits
+mp_toraw(mp, str)          - convert an mp_int to a string of bytes
+mp_tovalue(ch, r)          - convert ch to its value when taken as
+                             a radix r digit, or -1 if invalid
+mp_strerror(err)           - get a string describing mp_err value 'err'
+
+If you compile the MPI library with MP_IOFUNC defined, you will also
+have access to the following additional I/O function:
+
+mp_print(mp, ofp)       - print an mp_int as text to output stream ofp
+
+Note that mp_radix_size() returns a size in bytes guaranteed to be AT
+LEAST big enough for the digits output by mp_toradix().  Because it
+uses an approximation technique to figure out how many digits will be
+needed, it may return a figure which is larger than necessary.  Thus,
+the caller should not rely on the value to determine how many bytes
+will actually be written by mp_toradix().  The string mp_toradix()
+creates will be NUL terminated, so the standard C library function
+strlen() should be able to ascertain this for you, if you need it.
+
+The mp_read_radix() and mp_toradix() functions support bases from 2 to
+64 inclusive.  If you require more general radix conversion facilities
+than this, you will need to write them yourself (that's why mp_div_d()
+is provided, after all).
+
+Note:   mp_read_radix() will accept as digits either capital or 
+----    lower-case letters.  However, the current implementation of
+        mp_toradix() only outputs upper-case letters, when writing
+        bases betwee 10 and 36.  The underlying code supports using
+        lower-case letters, but the interface stub does not have a
+        selector for it.  You can add one yourself if you think it
+        is worthwhile -- I do not.  Bases from 36 to 64 use lower-
+        case letters as distinct from upper-case.  Bases 63 and
+        64 use the characters '+' and '/' as digits.
+
+        Note also that compiling with MP_IOFUNC defined will cause
+        inclusion of <stdio.h>, so if you are trying to write code
+        which does not depend on the standard C library, you will
+        probably want to avoid this option.  This is needed because
+        the mp_print() function takes a standard library FILE * as
+        one of its parameters, and uses the fprintf() function.
+
+The mp_toraw() function converts the integer to a sequence of bytes,
+in big-endian ordering (most-significant byte first).  Assuming your
+bytes are 8 bits wide, this corresponds to base 256.  The sign is
+encoded as a single leading byte, whose value is 0 for zero or
+positive values, or 1 for negative values.  The mp_read_raw() function
+reverses this process -- it takes a buffer of bytes, interprets the
+first as a sign indicator (0 = zero/positive, nonzero = negative), and
+the rest as a sequence of 1-byte digits in big-endian ordering.
+
+The mp_raw_size() function returns the exact number of bytes required
+to store the given integer in "raw" format (as described in the
+previous paragraph).  Zero is returned in case of error; a valid
+integer will require at least three bytes of storage.
+
+In previous versions of the MPI library, an "external representation
+format" was supported.  This was removed, however, because I found I
+was never using it, it was not as portable as I would have liked, and
+I decided it was a waste of space.
+
+
+Other Functions
+---------------
+
+The files 'mpprime.h' and 'mpprime.c' define some routines which are
+useful for divisibility testing and probabilistic primality testing.
+The routines defined are:
+
+mpp_divis(a, b)          - is a divisible by b?
+mpp_divis_d(a, d)        - is a divisible by digit d?
+mpp_random(a)            - set a to random value at current precision
+mpp_random_size(a, prec) - set a to random value at given precision
+
+Note:  The mpp_random() and mpp_random_size() functions use the C
+----   library's rand() function to generate random values.  It is
+       up to the caller to seed this generator before it is called.
+       These functions are not suitable for generating quantities
+       requiring cryptographic-quality randomness; they are intended
+       primarily for use in primality testing.
+
+       Note too that the MPI library does not call srand(), so your
+       application should do this, if you ever want the sequence
+       to change.
+
+mpp_divis_vector(a, v, s, w)  - is a divisible by any of the s digits
+                                in v?  If so, let w be the index of 
+                                that digit
+
+mpp_divis_primes(a, np)       - is a divisible by any of the first np
+                                primes?  If so, set np to the prime 
+                                which divided a.
+
+mpp_fermat(a, d)              - test if w^a = w (mod a).  If so, 
+                                returns MP_YES, otherwise MP_NO.
+
+mpp_pprime(a, nt)             - perform nt iterations of the Rabin-
+                                Miller probabilistic primality test
+                                on a.  Returns MP_YES if all tests
+                                passed, or MP_NO if any test fails.
+
+The mpp_fermat() function works based on Fermat's little theorem, a
+consequence of which is that if p is a prime, and (w, p) = 1, then:
+
+        w^p = w (mod p)
+
+Put another way, if w^p != w (mod p), then p is not prime.  The test
+is expensive to compute, but it helps to quickly eliminate an enormous
+class of composite numbers prior to Rabin-Miller testing.
+
+Building the Library
+--------------------
+
+The MPI library is designed to be as self-contained as possible.  You
+should be able to compile it with your favourite ANSI C compiler, and
+link it into your program directly.  If you are on a Unix system using
+the GNU C compiler (gcc), the following should work:
+
+% gcc -ansi -pedantic -Wall -O2 -c mpi.c
+
+The file 'mpi-config.h' defines several configurable parameters for
+the library, which you can adjust to suit your application.  At the
+time of this writing, the available options are:
+
+MP_IOFUNC       - Define true to include the mp_print() function, 
+                  which is moderately useful for debugging.  This
+                  implicitly includes <stdio.h>.
+
+MP_MODARITH     - Define true to include the modular arithmetic
+                  functions.  If you don't need modular arithmetic
+                  in your application, you can set this to zero to
+                  leave out all the modular routines.
+
+MP_NUMTH        - Define true to include number theoretic functions
+                  such as mp_gcd(), mp_lcm(), and mp_invmod().
+
+MP_LOGTAB       - If true, the file "logtab.h" is included, which
+                  is basically a static table of base 2 logarithms.
+                  These are used to compute how big the buffers for
+                  radix conversion need to be.  If you set this false,
+                  the library includes <math.h> and uses log().  This
+                  typically forces you to link against math libraries.
+
+MP_MEMSET       - If true, use memset() to zero buffers.  If you run
+                  into weird alignment related bugs, set this to zero
+                  and an explicit loop will be used.
+
+MP_MEMCPY       - If true, use memcpy() to copy buffers.  If you run
+                  into weird alignment bugs, set this to zero and an
+                  explicit loop will be used.
+
+MP_ARGCHK       - Set to 0, 1, or 2.  This defines how the argument
+                  checking macro, ARGCHK(), gets expanded.  If this 
+                  is set to zero, ARGCHK() expands to nothing; no 
+                  argument checks are performed.  If this is 1, the
+                  ARGCHK() macro expands to code that returns MP_BADARG
+                  or similar at runtime.  If it is 2, ARGCHK() expands 
+                  to an assert() call that aborts the program on a 
+                  bad input.
+
+MP_DEBUG        - Turns on debugging output.  This is probably not at
+                  all useful unless you are debugging the library.  It
+                  tends to spit out a LOT of output.
+
+MP_DEFPREC      - The default precision of a newly-created mp_int, in
+                  digits.  The precision can be changed at runtime by
+                  the mp_set_prec() function, but this is its initial
+                  value.
+
+MP_SQUARE       - If this is set to a nonzero value, the mp_sqr() 
+                  function will use an alternate algorithm that takes
+                  advantage of the redundant inner product computation
+                  when both multiplicands are identical.  Unfortunately,
+                  with some compilers this is actually SLOWER than just
+                  calling mp_mul() with the same argument twice.  So
+                  if you set MP_SQUARE to zero, mp_sqr() will be expan-
+                  ded into a call to mp_mul().  This applies to all 
+                  the uses of mp_sqr(), including mp_sqrmod() and the
+                  internal calls to s_mp_sqr() inside mpi.c
+
+                  The program 'mulsqr' (mulsqr.c) can be used to test
+                  which works best for your configuration.  Set up the
+                  CC and CFLAGS variables in the Makefile, then type:
+
+                        make mulsqr
+
+                  Invoke it with arguments similar to the following:
+
+                        mulsqr 25000 1024
+
+                  That is, 25000 products computed on 1024-bit values.
+                  The output will compare the two timings, and recommend
+                  a setting for MP_SQUARE.  It is off by default.
+
+If you would like to use the mp_print() function (see above), be sure
+to define MP_IOFUNC in mpi-config.h.  Many of the test drivers in the
+'tests' subdirectory expect this to be defined (although the test
+driver 'mpi-test' doesn't need it)
+
+The Makefile which comes with the library should take care of building
+the library for you, if you have set the CC and CFLAGS variables at
+the top of the file appropriately.  By default, they are set up to
+use the GNU C compiler:
+
+CC=gcc
+CFLAGS=-ansi -pedantic -Wall -O2
+
+If all goes well, the library should compile without warnings using
+this combination.  You should, of course, make whatever adjustments
+you find necessary.  
+
+The MPI library distribution comes with several additional programs
+which are intended to demonstrate the use of the library, and provide
+a framework for testing it.  There are a handful of test driver
+programs, in the files named 'mptest-X.c', where X is a digit.  Also,
+there are some simple command-line utilities (in the 'utils'
+directory) for manipulating large numbers.  These include:
+
+basecvt.c       A radix-conversion program, supporting bases from
+                2 to 64 inclusive.
+
+bbsrand.c       A BBS (quadratic residue) pseudo-random number 
+                generator.  The file 'bbsrand.c' is just the driver
+                for the program; the real code lives in the files
+                'bbs_rand.h' and 'bbs_rand.c'
+
+dec2hex.c       Converts decimal to hexadecimal
+
+gcd.c           Computes the greatest common divisor of two values.
+                If invoked as 'xgcd', also computes constants x and
+                y such that (a, b) = ax + by, in accordance with
+                Bezout's identity.
+
+hex2dec.c       Converts hexadecimal to decimal
+
+invmod.c        Computes modular inverses
+
+isprime.c       Performs the Rabin-Miller probabilistic primality
+                test on a number.  Values which fail this test are
+                definitely composite, and those which pass are very
+                likely to be prime (although there are no guarantees)
+
+lap.c           Computes the order (least annihilating power) of
+                a value v modulo m.  Very dumb algorithm.
+
+primegen.c      Generates large (probable) primes.
+
+prng.c          A pseudo-random number generator based on the
+                BBS generator code in 'bbs_rand.c'
+
+sieve.c         Implements the Sieve of Eratosthenes, using a big
+                bitmap, to generate a list of prime numbers.
+
+fact.c          Computes the factorial of an arbitrary precision
+                integer (iterative).
+
+exptmod.c       Computes arbitrary precision modular exponentiation
+                from the command line (exptmod a b m -> a^b (mod m))
+
+Most of these can be built from the Makefile that comes with the
+library.  Try 'make tools', if your environment supports it.
+
+
+Testing the Library
+-------------------
+
+Automatic test vectors are included, in the form of a program called
+'mpi-test'.  To build this program and run all the tests, simply
+invoke the shell script 'all-tests'.  If all the tests pass, you
+should see a message:
+
+        All tests passed
+
+If something went wrong, you'll get:
+
+        One or more tests failed.
+
+If this happens, scan back through the preceding lines, to see which
+test failed.  Any failure indicates a bug in the library, which needs
+to be fixed before it will give accurate results.  If you get any such
+thing, please let me know, and I'll try to fix it.  Please let me know
+what platform and compiler you were using, as well as which test
+failed.  If a reason for failure was given, please send me that text
+as well.
+
+If you're on a system where the standard Unix build tools don't work,
+you can build the 'mpi-test' program manually, and run it by hand.
+This is tedious and obnoxious, sorry.
+
+Further manual testing can be performed by building the manual testing
+programs, whose source is found in the 'tests' subdirectory.  Each
+test is in a source file called 'mptest-X.c'.  The Makefile contains a
+target to build all of them at once:
+
+        make tests
+
+Read the comments at the top of each source file to see what the
+driver is supposed to test.  You probably don't need to do this; these
+programs were only written to help me as I was developing the library.
+
+The relevant files are:
+
+mpi-test.c              The source for the test driver
+
+make-test-arrays        A Perl script to generate some of the internal
+                        data structures used by mpi-test.c
+
+test-arrays.txt         The source file for make-test-arrays
+
+all-tests               A Bourne shell script which runs all the
+                        tests in the mpi-test suite
+
+Running 'make mpi-test' should build the mpi-test program.  If you
+cannot use make, here is what needs to be done:
+
+(1) Use 'make-test-arrays' to generate the file 'test-info.c' from
+    the 'test-arrays.txt' file.  Since Perl can be found everywhere,
+    this should be no trouble.  Under Unix, this looks like:
+
+        make-test-arrays test-arrays.txt > test-info.c
+
+(2) Build the MPI library:
+
+        gcc -ansi -pedantic -Wall -c mpi.c
+
+(3) Build the mpi-test program:
+
+        gcc -ansi -pedantic -Wall -o mpi-test mpi.o mpi-test.c
+
+When you've got mpi-test, you can use 'all-tests' to run all the tests
+made available by mpi-test.  If any of them fail, there should be a
+diagnostic indicating what went wrong.  These are fairly high-level
+diagnostics, and won't really help you debug the problem; they're
+simply intended to help you isolate which function caused the problem.
+If you encounter a problem of this sort, feel free to e-mail me, and I
+will certainly attempt to help you debug it.
+
+Note:   Several of the tests hard-wired into 'mpi-test' operate under
+----    the assumption that you are using at least a 16-bit mp_digit 
+        type.  If that is not true, several tests might fail, because 
+        of range problems with the maximum digit value.
+
+        If you are using an 8-bit digit, you will also need to 
+        modify the code for mp_read_raw(), which assumes that
+        multiplication by 256 can be done with mp_mul_d(), a
+        fact that fails when DIGIT_MAX is 255.  You can replace
+        the call with s_mp_lshd(), which will give you the same
+        effect, and without doing as much work. :)
+
+Acknowledgements:
+----------------
+
+The algorithms used in this library were drawn primarily from Volume
+2 of Donald Knuth's magnum opus, _The Art of Computer Programming_, 
+"Semi-Numerical Methods".  Barrett's algorithm for modular reduction
+came from Menezes, Oorschot, and Vanstone's _Handbook of Applied
+Cryptography_, Chapter 14.
+
+Thanks are due to Tom St. Denis, for finding an obnoxious sign-related
+bug in mp_read_raw() that made things break on platforms which use
+signed chars.
+
+About the Author
+----------------
+
+This software was written by Michael J. Fromberger.  You can contact
+the author as follows:
+
+E-mail:   <sting@linguist.dartmouth.edu>
+
+Postal:   8000 Cummings Hall, Thayer School of Engineering
+          Dartmouth College, Hanover, New Hampshire, USA
+
+PGP key:  http://linguist.dartmouth.edu/~sting/keys/mjf.html
+          9736 188B 5AFA 23D6 D6AA  BE0D 5856 4525 289D 9907
+
+Last updated:  16-Jan-2000
diff --git a/security/nss/lib/freebl/mpi/all-tests b/security/nss/lib/freebl/mpi/all-tests
new file mode 100755
index 000000000..3429a15c0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/all-tests
@@ -0,0 +1,83 @@
+#!/bin/sh
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ECHO=/bin/echo
+MAKE=gmake
+
+$ECHO "\n** Running unit tests for MPI library\n"
+
+# Build the mpi-test program, which comprises all the unit tests for
+# the MPI library...
+
+$ECHO "Bringing mpi-test up to date ... "
+if $MAKE mpi-test ; then
+  :
+else
+  $ECHO " "
+  $ECHO "Make failed to build mpi-test."
+  $ECHO " "
+  exit 1
+fi
+
+if [ ! -x mpi-test ] ; then
+  $ECHO " "
+  $ECHO "Cannot find 'mpi-test' program, testing cannot continue."
+  $ECHO " "
+  exit 1
+fi
+
+# Get the list of available test suites...
+tests=`./mpi-test list | awk '{print $1}'`
+errs=0
+
+# Run each test suite and check the result code of mpi-test
+for test in $tests ; do
+  $ECHO "$test ... \c"
+  if ./mpi-test $test ; then
+    $ECHO "passed"
+  else
+    $ECHO "FAILED"
+    errs=1
+  fi
+done
+
+# If any tests failed, we'll stop at this point
+if [ "$errs" = "0" ] ; then
+  $ECHO "All unit tests passed"
+else
+  $ECHO "One or more tests failed"
+  exit 1
+fi
+
+# Now try to build the 'pi' program, and see if it can compute the
+# first thousand digits of pi correctly
+$ECHO "\n** Running other tests\n"
+
+$ECHO "Bringing 'pi' up to date ... "
+if $MAKE pi ; then
+    :
+else
+    $ECHO "\nMake failed to build pi.\n"
+    exit 1
+fi
+
+if [ ! -x pi ] ; then
+    $ECHO "\nCannot find 'pi' program; testing cannot continue.\n"
+    exit 1
+fi
+
+./pi 2000 > /tmp/pi.tmp.$$
+if cmp tests/pi2k.txt /tmp/pi.tmp.$$ ; then
+    $ECHO "Okay!  The pi test passes."
+else
+    $ECHO "Oops!  The pi test failed. :("
+    exit 1
+fi
+
+rm -f /tmp/pi.tmp.$$
+
+exit 0
+
+# Here there be dragons
diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE b/security/nss/lib/freebl/mpi/doc/LICENSE
new file mode 100644
index 000000000..35cca68ce
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/LICENSE
@@ -0,0 +1,11 @@
+Within this directory, each of the file listed below is licensed under 
+the terms given in the file LICENSE-MPL, also in this directory.
+
+basecvt.pod
+gcd.pod
+invmod.pod
+isprime.pod
+lap.pod
+mpi-test.pod
+prime.txt
+prng.pod
diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE-MPL b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL
new file mode 100644
index 000000000..41dc2327f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL
@@ -0,0 +1,3 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/basecvt.pod b/security/nss/lib/freebl/mpi/doc/basecvt.pod
new file mode 100644
index 000000000..c3d87fbc7
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/basecvt.pod
@@ -0,0 +1,65 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ basecvt - radix conversion for arbitrary precision integers
+
+=head1 SYNOPSIS
+
+ basecvt <ibase> <obase> [values]
+
+=head1 DESCRIPTION
+
+The B<basecvt> program is a command-line tool for converting integers
+of arbitrary precision from one radix to another.  The current version
+supports radix values from 2 (binary) to 64, inclusive.  The first two
+command line arguments specify the input and output radix, in base 10.
+Any further arguments are taken to be integers notated in the input
+radix, and these are converted to the output radix.  The output is
+written, one integer per line, to standard output.
+
+When reading integers, only digits considered "valid" for the input
+radix are considered.  Processing of an integer terminates when an
+invalid input digit is encountered.  So, for example, if you set the
+input radix to 10 and enter '10ACF', B<basecvt> would assume that you
+had entered '10' and ignore the rest of the string.
+
+If no values are provided, no output is written, but the program
+simply terminates with a zero exit status.  Error diagnostics are
+written to standard error in the event of out-of-range radix
+specifications.  Regardless of the actual values of the input and
+output radix, the radix arguments are taken to be in base 10 (decimal)
+notation.
+
+=head1 DIGITS
+
+For radices from 2-10, standard ASCII decimal digits 0-9 are used for
+both input and output.  For radices from 11-36, the ASCII letters A-Z
+are also included, following the convention used in hexadecimal.  In
+this range, input is accepted in either upper or lower case, although
+on output only lower-case letters are used.
+
+For radices from 37-62, the output includes both upper- and lower-case
+ASCII letters, and case matters.  In this range, case is distinguished
+both for input and for output values.
+
+For radices 63 and 64, the characters '+' (plus) and '/' (forward
+solidus) are also used.  These are derived from the MIME base64
+encoding scheme.  The overall encoding is not the same as base64,
+because the ASCII digits are used for the bottom of the range, and the
+letters are shifted upward; however, the output will consist of the
+same character set.
+
+This input and output behaviour is inherited from the MPI library used
+by B<basecvt>, and so is not configurable at runtime.
+
+=head1 SEE ALSO
+
+ dec2hex(1), hex2dec(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/build b/security/nss/lib/freebl/mpi/doc/build
new file mode 100755
index 000000000..4d75b1e5a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/build
@@ -0,0 +1,30 @@
+#!/bin/sh
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+VERS="1.7p6"
+SECT="1"
+NAME="MPI Tools"
+
+echo "Building manual pages ..."
+case $# in
+  0)
+    files=`ls *.pod`
+    ;;
+  *)
+    files=$*
+    ;;
+esac
+
+for name in $files
+do
+   echo -n "$name ... "
+#  sname=`noext $name`
+   sname=`basename $name .pod`
+   pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT
+   echo "(done)"
+done
+
+echo "Finished building."
+
diff --git a/security/nss/lib/freebl/mpi/doc/div.txt b/security/nss/lib/freebl/mpi/doc/div.txt
new file mode 100644
index 000000000..c13fb6ef1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/div.txt
@@ -0,0 +1,64 @@
+Division
+
+This describes the division algorithm used by the MPI library.
+
+Input:    a, b; a > b
+Compute:  Q, R; a = Qb + R
+
+The input numbers are normalized so that the high-order digit of b is
+at least half the radix.  This guarantees that we have a reasonable
+way to guess at the digits of the quotient (this method was taken from
+Knuth, vol. 2, with adaptations).
+
+To normalize, test the high-order digit of b.  If it is less than half
+the radix, multiply both a and b by d, where:
+
+             radix - 1
+	d = -----------
+              bmax + 1
+
+...where bmax is the high-order digit of b.  Otherwise, set d = 1.
+
+Given normalize values for a and b, let the notation a[n] denote the
+nth digit of a.  Let #a be the number of significant figures of a (not
+including any leading zeroes).
+
+	Let R = 0
+	Let p = #a - 1
+
+	while(p >= 0)
+	  do
+	    R = (R * radix) + a[p]
+	    p = p - 1
+	  while(R < b and p >= 0)
+
+	  if(R < b)
+	    break
+
+	  q = (R[#R - 1] * radix) + R[#R - 2]
+	  q = q / b[#b - 1]
+
+	  T = b * q
+
+	  while(T > L)
+	    q = q - 1
+	    T = T - b
+	  endwhile
+
+	  L = L - T
+
+	  Q = (Q * radix) + q
+
+	endwhile
+
+At this point, Q is the quotient, and R is the normalized remainder.
+To denormalize R, compute:
+
+	R = (R / d)
+
+At this point, you are finished.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/expt.txt b/security/nss/lib/freebl/mpi/doc/expt.txt
new file mode 100644
index 000000000..bd9d6f196
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/expt.txt
@@ -0,0 +1,94 @@
+Exponentiation
+
+For exponentiation, the MPI library uses a simple and fairly standard
+square-and-multiply method.  The algorithm is this:
+
+Input:	a, b
+Output: a ** b
+
+	s = 1
+
+	while(b != 0)
+	  if(b is odd)
+	    s = s * a
+	  endif
+
+	  b = b / 2
+
+	  x = x * x
+	endwhile
+
+	return s
+
+The modular exponentiation is done the same way, except replacing:
+
+	s = s * a
+
+with
+	s = (s * a) mod m
+
+and replacing
+
+	x = x * x
+
+with
+
+	x = (x * x) mod m
+
+Here is a sample exponentiation using the MPI library, as compared to
+the same problem solved by the Unix 'bc' program on my system:
+
+Computation of 2,381,283 ** 235
+
+'bc' says:
+
+4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
+4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
+6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
+4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
+6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
+FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
+CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
+5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
+CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
+49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
+5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
+A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
+D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
+92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
+A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
+AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
+E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
+1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
+CFFF2E1AC93F3CA264A1B
+
+MPI says:
+
+4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
+4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
+6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
+4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
+6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
+FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
+CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
+5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
+CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
+49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
+5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
+A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
+D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
+92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
+A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
+AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
+E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
+1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
+CFFF2E1AC93F3CA264A1B
+
+Diff says:
+% diff bc.txt mp.txt
+%
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/gcd.pod b/security/nss/lib/freebl/mpi/doc/gcd.pod
new file mode 100644
index 000000000..b5b8fa34f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/gcd.pod
@@ -0,0 +1,28 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ gcd - compute greatest common divisor of two integers
+
+=head1 SYNOPSIS
+
+ gcd <a> <b>
+
+=head1 DESCRIPTION
+
+The B<gcd> program computes the greatest common divisor of two
+arbitrary-precision integers I<a> and I<b>.  The result is written in
+standard decimal notation to the standard output.
+
+If I<b> is zero, B<gcd> will print an error message and exit.
+
+=head1 SEE ALSO
+
+invmod(1), isprime(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/invmod.pod b/security/nss/lib/freebl/mpi/doc/invmod.pod
new file mode 100644
index 000000000..0194f4488
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/invmod.pod
@@ -0,0 +1,34 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ invmod - compute modular inverse of an integer
+
+=head1 SYNOPSIS
+
+ invmod <a> <m>
+
+=head1 DESCRIPTION
+
+The B<invmod> program computes the inverse of I<a>, modulo I<m>, if
+that inverse exists.  Both I<a> and I<m> are arbitrary-precision
+integers in decimal notation.  The result is written in standard
+decimal notation to the standard output.
+
+If there is no inverse, the message:
+
+ No inverse
+
+...will be printed to the standard output (an inverse exists if and
+only if the greatest common divisor of I<a> and I<m> is 1).
+
+=head1 SEE ALSO
+
+gcd(1), isprime(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/isprime.pod b/security/nss/lib/freebl/mpi/doc/isprime.pod
new file mode 100644
index 000000000..a8ec1f7ee
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/isprime.pod
@@ -0,0 +1,63 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ isprime - probabilistic primality testing
+
+=head1 SYNOPSIS
+
+ isprime <a>
+
+=head1 DESCRIPTION
+
+The B<isprime> program attempts to determine whether the arbitrary
+precision integer I<a> is prime.  It first tests I<a> for divisibility
+by the first 170 or so small primes, and assuming I<a> is not
+divisible by any of these, applies 15 iterations of the Rabin-Miller
+probabilistic primality test.
+
+If the program discovers that the number is composite, it will print:
+
+ Not prime (reason)
+
+Where I<reason> is either:
+
+	divisible by small prime x
+
+Or:
+
+	failed nth pseudoprime test
+
+In the first case, I<x> indicates the first small prime factor that
+was found.  In the second case, I<n> indicates which of the
+pseudoprime tests failed (numbered from 1)
+
+If this happens, the number is definitely not prime.  However, if the
+number succeeds, this message results:
+
+ Probably prime, 1 in 4^15 chance of false positive
+
+If this happens, the number is prime with very high probability, but
+its primality has not been absolutely proven, only demonstrated to a
+very convincing degree.
+
+The value I<a> can be input in standard decimal notation, or, if it is
+prefixed with I<Ox>, it will be read as hexadecimal.
+
+=head1 ENVIRONMENT
+
+You can control how many iterations of Rabin-Miller are performed on
+the candidate number by setting the I<RM_TESTS> environment variable
+to an integer value before starting up B<isprime>.  This will change
+the output slightly if the number passes all the tests.
+
+=head1 SEE ALSO
+
+gcd(1), invmod(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/lap.pod b/security/nss/lib/freebl/mpi/doc/lap.pod
new file mode 100644
index 000000000..47539fbbf
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/lap.pod
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ lap - compute least annihilating power of a number
+
+=head1 SYNOPSIS
+
+ lap <a> <m>
+
+=head1 DESCRIPTION
+
+The B<lap> program computes the order of I<a> modulo I<m>, for
+arbitrary precision integers I<a> and I<m>.  The B<order> of I<a>
+modulo I<m> is defined as the smallest positive value I<n> for which
+I<a> raised to the I<n>th power, modulo I<m>, is equal to 1.  The
+order may not exist, if I<m> is composite.
+
+=head1 RESTRICTIONS
+
+This program is very slow, especially for large moduli.  It is
+intended as a way to help find primitive elements in a modular field,
+but it does not do so in a particularly inefficient manner.  It was
+written simply to help verify that a particular candidate does not
+have an obviously short cycle mod I<m>.
+
+=head1 SEE ALSO
+
+gcd(1), invmod(1), isprime(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/mpi-test.pod b/security/nss/lib/freebl/mpi/doc/mpi-test.pod
new file mode 100644
index 000000000..b05f866e5
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/mpi-test.pod
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ mpi-test - automated test program for MPI library
+
+=head1 SYNOPSIS
+
+ mpi-test <suite-name> [quiet]
+ mpi-test list
+ mpi-test help
+
+=head1 DESCRIPTION
+
+The B<mpi-test> program is a general unit test driver for the MPI
+library.  It is used to verify that the library works as it is
+supposed to on your architecture.  As with most such things, passing
+all the tests in B<mpi-test> does not guarantee the code is correct,
+but if any of them fail, there are certainly problems.
+
+Each major function of the library can be tested individually.  For a
+list of the test suites understood by B<mpi-test>, run it with the
+I<list> command line option:
+
+ mpi-test list
+
+This will display a list of the available test suites and a brief
+synopsis of what each one does.  For a brief overview of this
+document, run B<mpi-test> I<help>.
+
+B<mpi-test> exits with a zero status if the selected test succeeds, or
+a nonzero status if it fails.  If a I<suite-name> which is not
+understood by B<mpi-test> is given, a diagnostic is printed to the
+standard error, and the program exits with a result code of 2.  If a
+test fails, the result code will be 1, and a diagnostic is ordinarily
+printed to the standard error.  However, if the I<quiet> option is
+provided, these diagnostics will be suppressed.
+
+=head1 RESTRICTIONS
+
+Only a few canned test cases are provided.  The solutions have been
+verified using the GNU bc(1) program, so bugs there may cause problems
+here; however, this is very unlikely, so if a test fails, it is almost
+certainly my fault, not bc(1)'s.
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/mul.txt b/security/nss/lib/freebl/mpi/doc/mul.txt
new file mode 100644
index 000000000..975f56ddb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/mul.txt
@@ -0,0 +1,77 @@
+Multiplication
+
+This describes the multiplication algorithm used by the MPI library.
+
+This is basically a standard "schoolbook" algorithm.  It is slow --
+O(mn) for m = #a, n = #b -- but easy to implement and verify.
+Basically, we run two nested loops, as illustrated here (R is the
+radix):
+
+k = 0
+for j <- 0 to (#b - 1)
+  for i <- 0 to (#a - 1)
+    w = (a[j] * b[i]) + k + c[i+j]
+    c[i+j] = w mod R
+    k = w div R
+  endfor
+  c[i+j] = k;
+  k = 0;
+endfor
+
+It is necessary that 'w' have room for at least two radix R digits.
+The product of any two digits in radix R is at most:
+
+	(R - 1)(R - 1) = R^2 - 2R + 1
+
+Since a two-digit radix-R number can hold R^2 - 1 distinct values,
+this insures that the product will fit into the two-digit register.
+
+To insure that two digits is enough for w, we must also show that
+there is room for the carry-in from the previous multiplication, and
+the current value of the product digit that is being recomputed.
+Assuming each of these may be as big as R - 1 (and no larger,
+certainly), two digits will be enough if and only if:
+
+	(R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1
+
+Solving this equation shows that, indeed, this is the case:
+
+	R^2 - 2R + 1 + 2R - 2 <= R^2 - 1
+
+	R^2 - 1 <= R^2 - 1
+
+This suggests that a good radix would be one more than the largest
+value that can be held in half a machine word -- so, for example, as
+in this implementation, where we used a radix of 65536 on a machine
+with 4-byte words.  Another advantage of a radix of this sort is that
+binary-level operations are easy on numbers in this representation.
+
+Here's an example multiplication worked out longhand in radix-10,
+using the above algorithm:
+
+   a =     999
+   b =   x 999
+  -------------
+   p =   98001
+
+w = (a[jx] * b[ix]) + kin + c[ix + jx]
+c[ix+jx] = w % RADIX
+k = w / RADIX
+                                                               product
+ix	jx	a[jx]	b[ix]	kin	w	c[i+j]	kout	000000
+0	0	9	9	0	81+0+0	1	8	000001
+0	1	9	9	8	81+8+0	9	8	000091
+0	2	9	9	8	81+8+0	9	8	000991
+				8			0	008991
+1	0	9	9	0	81+0+9	0	9	008901
+1	1	9	9	9	81+9+9	9	9	008901
+1	2	9	9	9	81+9+8	8	9	008901
+				9			0	098901
+2	0	9	9	0	81+0+9	0	9	098001
+2	1	9	9	9	81+9+8	8	9	098001
+2	2	9	9	9	81+9+9	9	9	098001
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/pi.txt b/security/nss/lib/freebl/mpi/doc/pi.txt
new file mode 100644
index 000000000..a6ef91137
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/pi.txt
@@ -0,0 +1,53 @@
+This file describes how pi is computed by the program in 'pi.c' (see
+the utils subdirectory).
+
+Basically, we use Machin's formula, which is what everyone in the
+world uses as a simple method for computing approximations to pi.
+This works for up to a few thousand digits without too much effort.
+Beyond that, though, it gets too slow.
+
+Machin's formula states:
+
+	 pi := 16 * arctan(1/5) - 4 * arctan(1/239)
+
+We compute this in integer arithmetic by first multiplying everything
+through by 10^d, where 'd' is the number of digits of pi we wanted to
+compute.  It turns out, the last few digits will be wrong, but the
+number that are wrong is usually very small (ordinarly only 2-3).
+Having done this, we compute the arctan() function using the formula:
+
+                       1      1       1       1       1     
+       arctan(1/x) := --- - ----- + ----- - ----- + ----- - ...
+                       x    3 x^3   5 x^5   7 x^7   9 x^9
+
+This is done iteratively by computing the first term manually, and
+then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the
+current figure.  This is then added to (or subtracted from) a running
+sum, as appropriate.  The iteration continues until we overflow our
+available precision and the current figure goes to zero under integer
+division.  At that point, we're finished.
+
+Actually, we get a couple extra bits of precision out of the fact that
+we know we're computing y * arctan(1/x), by setting up the multiplier
+as:
+
+      y * 10^d
+
+... instead of just 10^d.  There is also a bit of cleverness in how
+the loop is constructed, to avoid special-casing the first term.
+Check out the code for arctan() in 'pi.c', if you are interested in
+seeing how it is set up.
+
+Thanks to Jason P. for this algorithm, which I assembled from notes
+and programs found on his cool "Pile of Pi Programs" page, at:
+
+      http://www.isr.umd.edu/~jasonp/pipage.html
+
+Thanks also to Henrik Johansson <Henrik.Johansson@Nexus.Comm.SE>, from
+whose pi program I borrowed the clever idea of pre-multiplying by x in
+order to avoid a special case on the loop iteration.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/prime.txt b/security/nss/lib/freebl/mpi/doc/prime.txt
new file mode 100644
index 000000000..694797d5f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/prime.txt
@@ -0,0 +1,6542 @@
+2
+3
+5
+7
+11
+13
+17
+19
+23
+29
+31
+37
+41
+43
+47
+53
+59
+61
+67
+71
+73
+79
+83
+89
+97
+101
+103
+107
+109
+113
+127
+131
+137
+139
+149
+151
+157
+163
+167
+173
+179
+181
+191
+193
+197
+199
+211
+223
+227
+229
+233
+239
+241
+251
+257
+263
+269
+271
+277
+281
+283
+293
+307
+311
+313
+317
+331
+337
+347
+349
+353
+359
+367
+373
+379
+383
+389
+397
+401
+409
+419
+421
+431
+433
+439
+443
+449
+457
+461
+463
+467
+479
+487
+491
+499
+503
+509
+521
+523
+541
+547
+557
+563
+569
+571
+577
+587
+593
+599
+601
+607
+613
+617
+619
+631
+641
+643
+647
+653
+659
+661
+673
+677
+683
+691
+701
+709
+719
+727
+733
+739
+743
+751
+757
+761
+769
+773
+787
+797
+809
+811
+821
+823
+827
+829
+839
+853
+857
+859
+863
+877
+881
+883
+887
+907
+911
+919
+929
+937
+941
+947
+953
+967
+971
+977
+983
+991
+997
+1009
+1013
+1019
+1021
+1031
+1033
+1039
+1049
+1051
+1061
+1063
+1069
+1087
+1091
+1093
+1097
+1103
+1109
+1117
+1123
+1129
+1151
+1153
+1163
+1171
+1181
+1187
+1193
+1201
+1213
+1217
+1223
+1229
+1231
+1237
+1249
+1259
+1277
+1279
+1283
+1289
+1291
+1297
+1301
+1303
+1307
+1319
+1321
+1327
+1361
+1367
+1373
+1381
+1399
+1409
+1423
+1427
+1429
+1433
+1439
+1447
+1451
+1453
+1459
+1471
+1481
+1483
+1487
+1489
+1493
+1499
+1511
+1523
+1531
+1543
+1549
+1553
+1559
+1567
+1571
+1579
+1583
+1597
+1601
+1607
+1609
+1613
+1619
+1621
+1627
+1637
+1657
+1663
+1667
+1669
+1693
+1697
+1699
+1709
+1721
+1723
+1733
+1741
+1747
+1753
+1759
+1777
+1783
+1787
+1789
+1801
+1811
+1823
+1831
+1847
+1861
+1867
+1871
+1873
+1877
+1879
+1889
+1901
+1907
+1913
+1931
+1933
+1949
+1951
+1973
+1979
+1987
+1993
+1997
+1999
+2003
+2011
+2017
+2027
+2029
+2039
+2053
+2063
+2069
+2081
+2083
+2087
+2089
+2099
+2111
+2113
+2129
+2131
+2137
+2141
+2143
+2153
+2161
+2179
+2203
+2207
+2213
+2221
+2237
+2239
+2243
+2251
+2267
+2269
+2273
+2281
+2287
+2293
+2297
+2309
+2311
+2333
+2339
+2341
+2347
+2351
+2357
+2371
+2377
+2381
+2383
+2389
+2393
+2399
+2411
+2417
+2423
+2437
+2441
+2447
+2459
+2467
+2473
+2477
+2503
+2521
+2531
+2539
+2543
+2549
+2551
+2557
+2579
+2591
+2593
+2609
+2617
+2621
+2633
+2647
+2657
+2659
+2663
+2671
+2677
+2683
+2687
+2689
+2693
+2699
+2707
+2711
+2713
+2719
+2729
+2731
+2741
+2749
+2753
+2767
+2777
+2789
+2791
+2797
+2801
+2803
+2819
+2833
+2837
+2843
+2851
+2857
+2861
+2879
+2887
+2897
+2903
+2909
+2917
+2927
+2939
+2953
+2957
+2963
+2969
+2971
+2999
+3001
+3011
+3019
+3023
+3037
+3041
+3049
+3061
+3067
+3079
+3083
+3089
+3109
+3119
+3121
+3137
+3163
+3167
+3169
+3181
+3187
+3191
+3203
+3209
+3217
+3221
+3229
+3251
+3253
+3257
+3259
+3271
+3299
+3301
+3307
+3313
+3319
+3323
+3329
+3331
+3343
+3347
+3359
+3361
+3371
+3373
+3389
+3391
+3407
+3413
+3433
+3449
+3457
+3461
+3463
+3467
+3469
+3491
+3499
+3511
+3517
+3527
+3529
+3533
+3539
+3541
+3547
+3557
+3559
+3571
+3581
+3583
+3593
+3607
+3613
+3617
+3623
+3631
+3637
+3643
+3659
+3671
+3673
+3677
+3691
+3697
+3701
+3709
+3719
+3727
+3733
+3739
+3761
+3767
+3769
+3779
+3793
+3797
+3803
+3821
+3823
+3833
+3847
+3851
+3853
+3863
+3877
+3881
+3889
+3907
+3911
+3917
+3919
+3923
+3929
+3931
+3943
+3947
+3967
+3989
+4001
+4003
+4007
+4013
+4019
+4021
+4027
+4049
+4051
+4057
+4073
+4079
+4091
+4093
+4099
+4111
+4127
+4129
+4133
+4139
+4153
+4157
+4159
+4177
+4201
+4211
+4217
+4219
+4229
+4231
+4241
+4243
+4253
+4259
+4261
+4271
+4273
+4283
+4289
+4297
+4327
+4337
+4339
+4349
+4357
+4363
+4373
+4391
+4397
+4409
+4421
+4423
+4441
+4447
+4451
+4457
+4463
+4481
+4483
+4493
+4507
+4513
+4517
+4519
+4523
+4547
+4549
+4561
+4567
+4583
+4591
+4597
+4603
+4621
+4637
+4639
+4643
+4649
+4651
+4657
+4663
+4673
+4679
+4691
+4703
+4721
+4723
+4729
+4733
+4751
+4759
+4783
+4787
+4789
+4793
+4799
+4801
+4813
+4817
+4831
+4861
+4871
+4877
+4889
+4903
+4909
+4919
+4931
+4933
+4937
+4943
+4951
+4957
+4967
+4969
+4973
+4987
+4993
+4999
+5003
+5009
+5011
+5021
+5023
+5039
+5051
+5059
+5077
+5081
+5087
+5099
+5101
+5107
+5113
+5119
+5147
+5153
+5167
+5171
+5179
+5189
+5197
+5209
+5227
+5231
+5233
+5237
+5261
+5273
+5279
+5281
+5297
+5303
+5309
+5323
+5333
+5347
+5351
+5381
+5387
+5393
+5399
+5407
+5413
+5417
+5419
+5431
+5437
+5441
+5443
+5449
+5471
+5477
+5479
+5483
+5501
+5503
+5507
+5519
+5521
+5527
+5531
+5557
+5563
+5569
+5573
+5581
+5591
+5623
+5639
+5641
+5647
+5651
+5653
+5657
+5659
+5669
+5683
+5689
+5693
+5701
+5711
+5717
+5737
+5741
+5743
+5749
+5779
+5783
+5791
+5801
+5807
+5813
+5821
+5827
+5839
+5843
+5849
+5851
+5857
+5861
+5867
+5869
+5879
+5881
+5897
+5903
+5923
+5927
+5939
+5953
+5981
+5987
+6007
+6011
+6029
+6037
+6043
+6047
+6053
+6067
+6073
+6079
+6089
+6091
+6101
+6113
+6121
+6131
+6133
+6143
+6151
+6163
+6173
+6197
+6199
+6203
+6211
+6217
+6221
+6229
+6247
+6257
+6263
+6269
+6271
+6277
+6287
+6299
+6301
+6311
+6317
+6323
+6329
+6337
+6343
+6353
+6359
+6361
+6367
+6373
+6379
+6389
+6397
+6421
+6427
+6449
+6451
+6469
+6473
+6481
+6491
+6521
+6529
+6547
+6551
+6553
+6563
+6569
+6571
+6577
+6581
+6599
+6607
+6619
+6637
+6653
+6659
+6661
+6673
+6679
+6689
+6691
+6701
+6703
+6709
+6719
+6733
+6737
+6761
+6763
+6779
+6781
+6791
+6793
+6803
+6823
+6827
+6829
+6833
+6841
+6857
+6863
+6869
+6871
+6883
+6899
+6907
+6911
+6917
+6947
+6949
+6959
+6961
+6967
+6971
+6977
+6983
+6991
+6997
+7001
+7013
+7019
+7027
+7039
+7043
+7057
+7069
+7079
+7103
+7109
+7121
+7127
+7129
+7151
+7159
+7177
+7187
+7193
+7207
+7211
+7213
+7219
+7229
+7237
+7243
+7247
+7253
+7283
+7297
+7307
+7309
+7321
+7331
+7333
+7349
+7351
+7369
+7393
+7411
+7417
+7433
+7451
+7457
+7459
+7477
+7481
+7487
+7489
+7499
+7507
+7517
+7523
+7529
+7537
+7541
+7547
+7549
+7559
+7561
+7573
+7577
+7583
+7589
+7591
+7603
+7607
+7621
+7639
+7643
+7649
+7669
+7673
+7681
+7687
+7691
+7699
+7703
+7717
+7723
+7727
+7741
+7753
+7757
+7759
+7789
+7793
+7817
+7823
+7829
+7841
+7853
+7867
+7873
+7877
+7879
+7883
+7901
+7907
+7919
+7927
+7933
+7937
+7949
+7951
+7963
+7993
+8009
+8011
+8017
+8039
+8053
+8059
+8069
+8081
+8087
+8089
+8093
+8101
+8111
+8117
+8123
+8147
+8161
+8167
+8171
+8179
+8191
+8209
+8219
+8221
+8231
+8233
+8237
+8243
+8263
+8269
+8273
+8287
+8291
+8293
+8297
+8311
+8317
+8329
+8353
+8363
+8369
+8377
+8387
+8389
+8419
+8423
+8429
+8431
+8443
+8447
+8461
+8467
+8501
+8513
+8521
+8527
+8537
+8539
+8543
+8563
+8573
+8581
+8597
+8599
+8609
+8623
+8627
+8629
+8641
+8647
+8663
+8669
+8677
+8681
+8689
+8693
+8699
+8707
+8713
+8719
+8731
+8737
+8741
+8747
+8753
+8761
+8779
+8783
+8803
+8807
+8819
+8821
+8831
+8837
+8839
+8849
+8861
+8863
+8867
+8887
+8893
+8923
+8929
+8933
+8941
+8951
+8963
+8969
+8971
+8999
+9001
+9007
+9011
+9013
+9029
+9041
+9043
+9049
+9059
+9067
+9091
+9103
+9109
+9127
+9133
+9137
+9151
+9157
+9161
+9173
+9181
+9187
+9199
+9203
+9209
+9221
+9227
+9239
+9241
+9257
+9277
+9281
+9283
+9293
+9311
+9319
+9323
+9337
+9341
+9343
+9349
+9371
+9377
+9391
+9397
+9403
+9413
+9419
+9421
+9431
+9433
+9437
+9439
+9461
+9463
+9467
+9473
+9479
+9491
+9497
+9511
+9521
+9533
+9539
+9547
+9551
+9587
+9601
+9613
+9619
+9623
+9629
+9631
+9643
+9649
+9661
+9677
+9679
+9689
+9697
+9719
+9721
+9733
+9739
+9743
+9749
+9767
+9769
+9781
+9787
+9791
+9803
+9811
+9817
+9829
+9833
+9839
+9851
+9857
+9859
+9871
+9883
+9887
+9901
+9907
+9923
+9929
+9931
+9941
+9949
+9967
+9973
+10007
+10009
+10037
+10039
+10061
+10067
+10069
+10079
+10091
+10093
+10099
+10103
+10111
+10133
+10139
+10141
+10151
+10159
+10163
+10169
+10177
+10181
+10193
+10211
+10223
+10243
+10247
+10253
+10259
+10267
+10271
+10273
+10289
+10301
+10303
+10313
+10321
+10331
+10333
+10337
+10343
+10357
+10369
+10391
+10399
+10427
+10429
+10433
+10453
+10457
+10459
+10463
+10477
+10487
+10499
+10501
+10513
+10529
+10531
+10559
+10567
+10589
+10597
+10601
+10607
+10613
+10627
+10631
+10639
+10651
+10657
+10663
+10667
+10687
+10691
+10709
+10711
+10723
+10729
+10733
+10739
+10753
+10771
+10781
+10789
+10799
+10831
+10837
+10847
+10853
+10859
+10861
+10867
+10883
+10889
+10891
+10903
+10909
+10937
+10939
+10949
+10957
+10973
+10979
+10987
+10993
+11003
+11027
+11047
+11057
+11059
+11069
+11071
+11083
+11087
+11093
+11113
+11117
+11119
+11131
+11149
+11159
+11161
+11171
+11173
+11177
+11197
+11213
+11239
+11243
+11251
+11257
+11261
+11273
+11279
+11287
+11299
+11311
+11317
+11321
+11329
+11351
+11353
+11369
+11383
+11393
+11399
+11411
+11423
+11437
+11443
+11447
+11467
+11471
+11483
+11489
+11491
+11497
+11503
+11519
+11527
+11549
+11551
+11579
+11587
+11593
+11597
+11617
+11621
+11633
+11657
+11677
+11681
+11689
+11699
+11701
+11717
+11719
+11731
+11743
+11777
+11779
+11783
+11789
+11801
+11807
+11813
+11821
+11827
+11831
+11833
+11839
+11863
+11867
+11887
+11897
+11903
+11909
+11923
+11927
+11933
+11939
+11941
+11953
+11959
+11969
+11971
+11981
+11987
+12007
+12011
+12037
+12041
+12043
+12049
+12071
+12073
+12097
+12101
+12107
+12109
+12113
+12119
+12143
+12149
+12157
+12161
+12163
+12197
+12203
+12211
+12227
+12239
+12241
+12251
+12253
+12263
+12269
+12277
+12281
+12289
+12301
+12323
+12329
+12343
+12347
+12373
+12377
+12379
+12391
+12401
+12409
+12413
+12421
+12433
+12437
+12451
+12457
+12473
+12479
+12487
+12491
+12497
+12503
+12511
+12517
+12527
+12539
+12541
+12547
+12553
+12569
+12577
+12583
+12589
+12601
+12611
+12613
+12619
+12637
+12641
+12647
+12653
+12659
+12671
+12689
+12697
+12703
+12713
+12721
+12739
+12743
+12757
+12763
+12781
+12791
+12799
+12809
+12821
+12823
+12829
+12841
+12853
+12889
+12893
+12899
+12907
+12911
+12917
+12919
+12923
+12941
+12953
+12959
+12967
+12973
+12979
+12983
+13001
+13003
+13007
+13009
+13033
+13037
+13043
+13049
+13063
+13093
+13099
+13103
+13109
+13121
+13127
+13147
+13151
+13159
+13163
+13171
+13177
+13183
+13187
+13217
+13219
+13229
+13241
+13249
+13259
+13267
+13291
+13297
+13309
+13313
+13327
+13331
+13337
+13339
+13367
+13381
+13397
+13399
+13411
+13417
+13421
+13441
+13451
+13457
+13463
+13469
+13477
+13487
+13499
+13513
+13523
+13537
+13553
+13567
+13577
+13591
+13597
+13613
+13619
+13627
+13633
+13649
+13669
+13679
+13681
+13687
+13691
+13693
+13697
+13709
+13711
+13721
+13723
+13729
+13751
+13757
+13759
+13763
+13781
+13789
+13799
+13807
+13829
+13831
+13841
+13859
+13873
+13877
+13879
+13883
+13901
+13903
+13907
+13913
+13921
+13931
+13933
+13963
+13967
+13997
+13999
+14009
+14011
+14029
+14033
+14051
+14057
+14071
+14081
+14083
+14087
+14107
+14143
+14149
+14153
+14159
+14173
+14177
+14197
+14207
+14221
+14243
+14249
+14251
+14281
+14293
+14303
+14321
+14323
+14327
+14341
+14347
+14369
+14387
+14389
+14401
+14407
+14411
+14419
+14423
+14431
+14437
+14447
+14449
+14461
+14479
+14489
+14503
+14519
+14533
+14537
+14543
+14549
+14551
+14557
+14561
+14563
+14591
+14593
+14621
+14627
+14629
+14633
+14639
+14653
+14657
+14669
+14683
+14699
+14713
+14717
+14723
+14731
+14737
+14741
+14747
+14753
+14759
+14767
+14771
+14779
+14783
+14797
+14813
+14821
+14827
+14831
+14843
+14851
+14867
+14869
+14879
+14887
+14891
+14897
+14923
+14929
+14939
+14947
+14951
+14957
+14969
+14983
+15013
+15017
+15031
+15053
+15061
+15073
+15077
+15083
+15091
+15101
+15107
+15121
+15131
+15137
+15139
+15149
+15161
+15173
+15187
+15193
+15199
+15217
+15227
+15233
+15241
+15259
+15263
+15269
+15271
+15277
+15287
+15289
+15299
+15307
+15313
+15319
+15329
+15331
+15349
+15359
+15361
+15373
+15377
+15383
+15391
+15401
+15413
+15427
+15439
+15443
+15451
+15461
+15467
+15473
+15493
+15497
+15511
+15527
+15541
+15551
+15559
+15569
+15581
+15583
+15601
+15607
+15619
+15629
+15641
+15643
+15647
+15649
+15661
+15667
+15671
+15679
+15683
+15727
+15731
+15733
+15737
+15739
+15749
+15761
+15767
+15773
+15787
+15791
+15797
+15803
+15809
+15817
+15823
+15859
+15877
+15881
+15887
+15889
+15901
+15907
+15913
+15919
+15923
+15937
+15959
+15971
+15973
+15991
+16001
+16007
+16033
+16057
+16061
+16063
+16067
+16069
+16073
+16087
+16091
+16097
+16103
+16111
+16127
+16139
+16141
+16183
+16187
+16189
+16193
+16217
+16223
+16229
+16231
+16249
+16253
+16267
+16273
+16301
+16319
+16333
+16339
+16349
+16361
+16363
+16369
+16381
+16411
+16417
+16421
+16427
+16433
+16447
+16451
+16453
+16477
+16481
+16487
+16493
+16519
+16529
+16547
+16553
+16561
+16567
+16573
+16603
+16607
+16619
+16631
+16633
+16649
+16651
+16657
+16661
+16673
+16691
+16693
+16699
+16703
+16729
+16741
+16747
+16759
+16763
+16787
+16811
+16823
+16829
+16831
+16843
+16871
+16879
+16883
+16889
+16901
+16903
+16921
+16927
+16931
+16937
+16943
+16963
+16979
+16981
+16987
+16993
+17011
+17021
+17027
+17029
+17033
+17041
+17047
+17053
+17077
+17093
+17099
+17107
+17117
+17123
+17137
+17159
+17167
+17183
+17189
+17191
+17203
+17207
+17209
+17231
+17239
+17257
+17291
+17293
+17299
+17317
+17321
+17327
+17333
+17341
+17351
+17359
+17377
+17383
+17387
+17389
+17393
+17401
+17417
+17419
+17431
+17443
+17449
+17467
+17471
+17477
+17483
+17489
+17491
+17497
+17509
+17519
+17539
+17551
+17569
+17573
+17579
+17581
+17597
+17599
+17609
+17623
+17627
+17657
+17659
+17669
+17681
+17683
+17707
+17713
+17729
+17737
+17747
+17749
+17761
+17783
+17789
+17791
+17807
+17827
+17837
+17839
+17851
+17863
+17881
+17891
+17903
+17909
+17911
+17921
+17923
+17929
+17939
+17957
+17959
+17971
+17977
+17981
+17987
+17989
+18013
+18041
+18043
+18047
+18049
+18059
+18061
+18077
+18089
+18097
+18119
+18121
+18127
+18131
+18133
+18143
+18149
+18169
+18181
+18191
+18199
+18211
+18217
+18223
+18229
+18233
+18251
+18253
+18257
+18269
+18287
+18289
+18301
+18307
+18311
+18313
+18329
+18341
+18353
+18367
+18371
+18379
+18397
+18401
+18413
+18427
+18433
+18439
+18443
+18451
+18457
+18461
+18481
+18493
+18503
+18517
+18521
+18523
+18539
+18541
+18553
+18583
+18587
+18593
+18617
+18637
+18661
+18671
+18679
+18691
+18701
+18713
+18719
+18731
+18743
+18749
+18757
+18773
+18787
+18793
+18797
+18803
+18839
+18859
+18869
+18899
+18911
+18913
+18917
+18919
+18947
+18959
+18973
+18979
+19001
+19009
+19013
+19031
+19037
+19051
+19069
+19073
+19079
+19081
+19087
+19121
+19139
+19141
+19157
+19163
+19181
+19183
+19207
+19211
+19213
+19219
+19231
+19237
+19249
+19259
+19267
+19273
+19289
+19301
+19309
+19319
+19333
+19373
+19379
+19381
+19387
+19391
+19403
+19417
+19421
+19423
+19427
+19429
+19433
+19441
+19447
+19457
+19463
+19469
+19471
+19477
+19483
+19489
+19501
+19507
+19531
+19541
+19543
+19553
+19559
+19571
+19577
+19583
+19597
+19603
+19609
+19661
+19681
+19687
+19697
+19699
+19709
+19717
+19727
+19739
+19751
+19753
+19759
+19763
+19777
+19793
+19801
+19813
+19819
+19841
+19843
+19853
+19861
+19867
+19889
+19891
+19913
+19919
+19927
+19937
+19949
+19961
+19963
+19973
+19979
+19991
+19993
+19997
+20011
+20021
+20023
+20029
+20047
+20051
+20063
+20071
+20089
+20101
+20107
+20113
+20117
+20123
+20129
+20143
+20147
+20149
+20161
+20173
+20177
+20183
+20201
+20219
+20231
+20233
+20249
+20261
+20269
+20287
+20297
+20323
+20327
+20333
+20341
+20347
+20353
+20357
+20359
+20369
+20389
+20393
+20399
+20407
+20411
+20431
+20441
+20443
+20477
+20479
+20483
+20507
+20509
+20521
+20533
+20543
+20549
+20551
+20563
+20593
+20599
+20611
+20627
+20639
+20641
+20663
+20681
+20693
+20707
+20717
+20719
+20731
+20743
+20747
+20749
+20753
+20759
+20771
+20773
+20789
+20807
+20809
+20849
+20857
+20873
+20879
+20887
+20897
+20899
+20903
+20921
+20929
+20939
+20947
+20959
+20963
+20981
+20983
+21001
+21011
+21013
+21017
+21019
+21023
+21031
+21059
+21061
+21067
+21089
+21101
+21107
+21121
+21139
+21143
+21149
+21157
+21163
+21169
+21179
+21187
+21191
+21193
+21211
+21221
+21227
+21247
+21269
+21277
+21283
+21313
+21317
+21319
+21323
+21341
+21347
+21377
+21379
+21383
+21391
+21397
+21401
+21407
+21419
+21433
+21467
+21481
+21487
+21491
+21493
+21499
+21503
+21517
+21521
+21523
+21529
+21557
+21559
+21563
+21569
+21577
+21587
+21589
+21599
+21601
+21611
+21613
+21617
+21647
+21649
+21661
+21673
+21683
+21701
+21713
+21727
+21737
+21739
+21751
+21757
+21767
+21773
+21787
+21799
+21803
+21817
+21821
+21839
+21841
+21851
+21859
+21863
+21871
+21881
+21893
+21911
+21929
+21937
+21943
+21961
+21977
+21991
+21997
+22003
+22013
+22027
+22031
+22037
+22039
+22051
+22063
+22067
+22073
+22079
+22091
+22093
+22109
+22111
+22123
+22129
+22133
+22147
+22153
+22157
+22159
+22171
+22189
+22193
+22229
+22247
+22259
+22271
+22273
+22277
+22279
+22283
+22291
+22303
+22307
+22343
+22349
+22367
+22369
+22381
+22391
+22397
+22409
+22433
+22441
+22447
+22453
+22469
+22481
+22483
+22501
+22511
+22531
+22541
+22543
+22549
+22567
+22571
+22573
+22613
+22619
+22621
+22637
+22639
+22643
+22651
+22669
+22679
+22691
+22697
+22699
+22709
+22717
+22721
+22727
+22739
+22741
+22751
+22769
+22777
+22783
+22787
+22807
+22811
+22817
+22853
+22859
+22861
+22871
+22877
+22901
+22907
+22921
+22937
+22943
+22961
+22963
+22973
+22993
+23003
+23011
+23017
+23021
+23027
+23029
+23039
+23041
+23053
+23057
+23059
+23063
+23071
+23081
+23087
+23099
+23117
+23131
+23143
+23159
+23167
+23173
+23189
+23197
+23201
+23203
+23209
+23227
+23251
+23269
+23279
+23291
+23293
+23297
+23311
+23321
+23327
+23333
+23339
+23357
+23369
+23371
+23399
+23417
+23431
+23447
+23459
+23473
+23497
+23509
+23531
+23537
+23539
+23549
+23557
+23561
+23563
+23567
+23581
+23593
+23599
+23603
+23609
+23623
+23627
+23629
+23633
+23663
+23669
+23671
+23677
+23687
+23689
+23719
+23741
+23743
+23747
+23753
+23761
+23767
+23773
+23789
+23801
+23813
+23819
+23827
+23831
+23833
+23857
+23869
+23873
+23879
+23887
+23893
+23899
+23909
+23911
+23917
+23929
+23957
+23971
+23977
+23981
+23993
+24001
+24007
+24019
+24023
+24029
+24043
+24049
+24061
+24071
+24077
+24083
+24091
+24097
+24103
+24107
+24109
+24113
+24121
+24133
+24137
+24151
+24169
+24179
+24181
+24197
+24203
+24223
+24229
+24239
+24247
+24251
+24281
+24317
+24329
+24337
+24359
+24371
+24373
+24379
+24391
+24407
+24413
+24419
+24421
+24439
+24443
+24469
+24473
+24481
+24499
+24509
+24517
+24527
+24533
+24547
+24551
+24571
+24593
+24611
+24623
+24631
+24659
+24671
+24677
+24683
+24691
+24697
+24709
+24733
+24749
+24763
+24767
+24781
+24793
+24799
+24809
+24821
+24841
+24847
+24851
+24859
+24877
+24889
+24907
+24917
+24919
+24923
+24943
+24953
+24967
+24971
+24977
+24979
+24989
+25013
+25031
+25033
+25037
+25057
+25073
+25087
+25097
+25111
+25117
+25121
+25127
+25147
+25153
+25163
+25169
+25171
+25183
+25189
+25219
+25229
+25237
+25243
+25247
+25253
+25261
+25301
+25303
+25307
+25309
+25321
+25339
+25343
+25349
+25357
+25367
+25373
+25391
+25409
+25411
+25423
+25439
+25447
+25453
+25457
+25463
+25469
+25471
+25523
+25537
+25541
+25561
+25577
+25579
+25583
+25589
+25601
+25603
+25609
+25621
+25633
+25639
+25643
+25657
+25667
+25673
+25679
+25693
+25703
+25717
+25733
+25741
+25747
+25759
+25763
+25771
+25793
+25799
+25801
+25819
+25841
+25847
+25849
+25867
+25873
+25889
+25903
+25913
+25919
+25931
+25933
+25939
+25943
+25951
+25969
+25981
+25997
+25999
+26003
+26017
+26021
+26029
+26041
+26053
+26083
+26099
+26107
+26111
+26113
+26119
+26141
+26153
+26161
+26171
+26177
+26183
+26189
+26203
+26209
+26227
+26237
+26249
+26251
+26261
+26263
+26267
+26293
+26297
+26309
+26317
+26321
+26339
+26347
+26357
+26371
+26387
+26393
+26399
+26407
+26417
+26423
+26431
+26437
+26449
+26459
+26479
+26489
+26497
+26501
+26513
+26539
+26557
+26561
+26573
+26591
+26597
+26627
+26633
+26641
+26647
+26669
+26681
+26683
+26687
+26693
+26699
+26701
+26711
+26713
+26717
+26723
+26729
+26731
+26737
+26759
+26777
+26783
+26801
+26813
+26821
+26833
+26839
+26849
+26861
+26863
+26879
+26881
+26891
+26893
+26903
+26921
+26927
+26947
+26951
+26953
+26959
+26981
+26987
+26993
+27011
+27017
+27031
+27043
+27059
+27061
+27067
+27073
+27077
+27091
+27103
+27107
+27109
+27127
+27143
+27179
+27191
+27197
+27211
+27239
+27241
+27253
+27259
+27271
+27277
+27281
+27283
+27299
+27329
+27337
+27361
+27367
+27397
+27407
+27409
+27427
+27431
+27437
+27449
+27457
+27479
+27481
+27487
+27509
+27527
+27529
+27539
+27541
+27551
+27581
+27583
+27611
+27617
+27631
+27647
+27653
+27673
+27689
+27691
+27697
+27701
+27733
+27737
+27739
+27743
+27749
+27751
+27763
+27767
+27773
+27779
+27791
+27793
+27799
+27803
+27809
+27817
+27823
+27827
+27847
+27851
+27883
+27893
+27901
+27917
+27919
+27941
+27943
+27947
+27953
+27961
+27967
+27983
+27997
+28001
+28019
+28027
+28031
+28051
+28057
+28069
+28081
+28087
+28097
+28099
+28109
+28111
+28123
+28151
+28163
+28181
+28183
+28201
+28211
+28219
+28229
+28277
+28279
+28283
+28289
+28297
+28307
+28309
+28319
+28349
+28351
+28387
+28393
+28403
+28409
+28411
+28429
+28433
+28439
+28447
+28463
+28477
+28493
+28499
+28513
+28517
+28537
+28541
+28547
+28549
+28559
+28571
+28573
+28579
+28591
+28597
+28603
+28607
+28619
+28621
+28627
+28631
+28643
+28649
+28657
+28661
+28663
+28669
+28687
+28697
+28703
+28711
+28723
+28729
+28751
+28753
+28759
+28771
+28789
+28793
+28807
+28813
+28817
+28837
+28843
+28859
+28867
+28871
+28879
+28901
+28909
+28921
+28927
+28933
+28949
+28961
+28979
+29009
+29017
+29021
+29023
+29027
+29033
+29059
+29063
+29077
+29101
+29123
+29129
+29131
+29137
+29147
+29153
+29167
+29173
+29179
+29191
+29201
+29207
+29209
+29221
+29231
+29243
+29251
+29269
+29287
+29297
+29303
+29311
+29327
+29333
+29339
+29347
+29363
+29383
+29387
+29389
+29399
+29401
+29411
+29423
+29429
+29437
+29443
+29453
+29473
+29483
+29501
+29527
+29531
+29537
+29567
+29569
+29573
+29581
+29587
+29599
+29611
+29629
+29633
+29641
+29663
+29669
+29671
+29683
+29717
+29723
+29741
+29753
+29759
+29761
+29789
+29803
+29819
+29833
+29837
+29851
+29863
+29867
+29873
+29879
+29881
+29917
+29921
+29927
+29947
+29959
+29983
+29989
+30011
+30013
+30029
+30047
+30059
+30071
+30089
+30091
+30097
+30103
+30109
+30113
+30119
+30133
+30137
+30139
+30161
+30169
+30181
+30187
+30197
+30203
+30211
+30223
+30241
+30253
+30259
+30269
+30271
+30293
+30307
+30313
+30319
+30323
+30341
+30347
+30367
+30389
+30391
+30403
+30427
+30431
+30449
+30467
+30469
+30491
+30493
+30497
+30509
+30517
+30529
+30539
+30553
+30557
+30559
+30577
+30593
+30631
+30637
+30643
+30649
+30661
+30671
+30677
+30689
+30697
+30703
+30707
+30713
+30727
+30757
+30763
+30773
+30781
+30803
+30809
+30817
+30829
+30839
+30841
+30851
+30853
+30859
+30869
+30871
+30881
+30893
+30911
+30931
+30937
+30941
+30949
+30971
+30977
+30983
+31013
+31019
+31033
+31039
+31051
+31063
+31069
+31079
+31081
+31091
+31121
+31123
+31139
+31147
+31151
+31153
+31159
+31177
+31181
+31183
+31189
+31193
+31219
+31223
+31231
+31237
+31247
+31249
+31253
+31259
+31267
+31271
+31277
+31307
+31319
+31321
+31327
+31333
+31337
+31357
+31379
+31387
+31391
+31393
+31397
+31469
+31477
+31481
+31489
+31511
+31513
+31517
+31531
+31541
+31543
+31547
+31567
+31573
+31583
+31601
+31607
+31627
+31643
+31649
+31657
+31663
+31667
+31687
+31699
+31721
+31723
+31727
+31729
+31741
+31751
+31769
+31771
+31793
+31799
+31817
+31847
+31849
+31859
+31873
+31883
+31891
+31907
+31957
+31963
+31973
+31981
+31991
+32003
+32009
+32027
+32029
+32051
+32057
+32059
+32063
+32069
+32077
+32083
+32089
+32099
+32117
+32119
+32141
+32143
+32159
+32173
+32183
+32189
+32191
+32203
+32213
+32233
+32237
+32251
+32257
+32261
+32297
+32299
+32303
+32309
+32321
+32323
+32327
+32341
+32353
+32359
+32363
+32369
+32371
+32377
+32381
+32401
+32411
+32413
+32423
+32429
+32441
+32443
+32467
+32479
+32491
+32497
+32503
+32507
+32531
+32533
+32537
+32561
+32563
+32569
+32573
+32579
+32587
+32603
+32609
+32611
+32621
+32633
+32647
+32653
+32687
+32693
+32707
+32713
+32717
+32719
+32749
+32771
+32779
+32783
+32789
+32797
+32801
+32803
+32831
+32833
+32839
+32843
+32869
+32887
+32909
+32911
+32917
+32933
+32939
+32941
+32957
+32969
+32971
+32983
+32987
+32993
+32999
+33013
+33023
+33029
+33037
+33049
+33053
+33071
+33073
+33083
+33091
+33107
+33113
+33119
+33149
+33151
+33161
+33179
+33181
+33191
+33199
+33203
+33211
+33223
+33247
+33287
+33289
+33301
+33311
+33317
+33329
+33331
+33343
+33347
+33349
+33353
+33359
+33377
+33391
+33403
+33409
+33413
+33427
+33457
+33461
+33469
+33479
+33487
+33493
+33503
+33521
+33529
+33533
+33547
+33563
+33569
+33577
+33581
+33587
+33589
+33599
+33601
+33613
+33617
+33619
+33623
+33629
+33637
+33641
+33647
+33679
+33703
+33713
+33721
+33739
+33749
+33751
+33757
+33767
+33769
+33773
+33791
+33797
+33809
+33811
+33827
+33829
+33851
+33857
+33863
+33871
+33889
+33893
+33911
+33923
+33931
+33937
+33941
+33961
+33967
+33997
+34019
+34031
+34033
+34039
+34057
+34061
+34123
+34127
+34129
+34141
+34147
+34157
+34159
+34171
+34183
+34211
+34213
+34217
+34231
+34253
+34259
+34261
+34267
+34273
+34283
+34297
+34301
+34303
+34313
+34319
+34327
+34337
+34351
+34361
+34367
+34369
+34381
+34403
+34421
+34429
+34439
+34457
+34469
+34471
+34483
+34487
+34499
+34501
+34511
+34513
+34519
+34537
+34543
+34549
+34583
+34589
+34591
+34603
+34607
+34613
+34631
+34649
+34651
+34667
+34673
+34679
+34687
+34693
+34703
+34721
+34729
+34739
+34747
+34757
+34759
+34763
+34781
+34807
+34819
+34841
+34843
+34847
+34849
+34871
+34877
+34883
+34897
+34913
+34919
+34939
+34949
+34961
+34963
+34981
+35023
+35027
+35051
+35053
+35059
+35069
+35081
+35083
+35089
+35099
+35107
+35111
+35117
+35129
+35141
+35149
+35153
+35159
+35171
+35201
+35221
+35227
+35251
+35257
+35267
+35279
+35281
+35291
+35311
+35317
+35323
+35327
+35339
+35353
+35363
+35381
+35393
+35401
+35407
+35419
+35423
+35437
+35447
+35449
+35461
+35491
+35507
+35509
+35521
+35527
+35531
+35533
+35537
+35543
+35569
+35573
+35591
+35593
+35597
+35603
+35617
+35671
+35677
+35729
+35731
+35747
+35753
+35759
+35771
+35797
+35801
+35803
+35809
+35831
+35837
+35839
+35851
+35863
+35869
+35879
+35897
+35899
+35911
+35923
+35933
+35951
+35963
+35969
+35977
+35983
+35993
+35999
+36007
+36011
+36013
+36017
+36037
+36061
+36067
+36073
+36083
+36097
+36107
+36109
+36131
+36137
+36151
+36161
+36187
+36191
+36209
+36217
+36229
+36241
+36251
+36263
+36269
+36277
+36293
+36299
+36307
+36313
+36319
+36341
+36343
+36353
+36373
+36383
+36389
+36433
+36451
+36457
+36467
+36469
+36473
+36479
+36493
+36497
+36523
+36527
+36529
+36541
+36551
+36559
+36563
+36571
+36583
+36587
+36599
+36607
+36629
+36637
+36643
+36653
+36671
+36677
+36683
+36691
+36697
+36709
+36713
+36721
+36739
+36749
+36761
+36767
+36779
+36781
+36787
+36791
+36793
+36809
+36821
+36833
+36847
+36857
+36871
+36877
+36887
+36899
+36901
+36913
+36919
+36923
+36929
+36931
+36943
+36947
+36973
+36979
+36997
+37003
+37013
+37019
+37021
+37039
+37049
+37057
+37061
+37087
+37097
+37117
+37123
+37139
+37159
+37171
+37181
+37189
+37199
+37201
+37217
+37223
+37243
+37253
+37273
+37277
+37307
+37309
+37313
+37321
+37337
+37339
+37357
+37361
+37363
+37369
+37379
+37397
+37409
+37423
+37441
+37447
+37463
+37483
+37489
+37493
+37501
+37507
+37511
+37517
+37529
+37537
+37547
+37549
+37561
+37567
+37571
+37573
+37579
+37589
+37591
+37607
+37619
+37633
+37643
+37649
+37657
+37663
+37691
+37693
+37699
+37717
+37747
+37781
+37783
+37799
+37811
+37813
+37831
+37847
+37853
+37861
+37871
+37879
+37889
+37897
+37907
+37951
+37957
+37963
+37967
+37987
+37991
+37993
+37997
+38011
+38039
+38047
+38053
+38069
+38083
+38113
+38119
+38149
+38153
+38167
+38177
+38183
+38189
+38197
+38201
+38219
+38231
+38237
+38239
+38261
+38273
+38281
+38287
+38299
+38303
+38317
+38321
+38327
+38329
+38333
+38351
+38371
+38377
+38393
+38431
+38447
+38449
+38453
+38459
+38461
+38501
+38543
+38557
+38561
+38567
+38569
+38593
+38603
+38609
+38611
+38629
+38639
+38651
+38653
+38669
+38671
+38677
+38693
+38699
+38707
+38711
+38713
+38723
+38729
+38737
+38747
+38749
+38767
+38783
+38791
+38803
+38821
+38833
+38839
+38851
+38861
+38867
+38873
+38891
+38903
+38917
+38921
+38923
+38933
+38953
+38959
+38971
+38977
+38993
+39019
+39023
+39041
+39043
+39047
+39079
+39089
+39097
+39103
+39107
+39113
+39119
+39133
+39139
+39157
+39161
+39163
+39181
+39191
+39199
+39209
+39217
+39227
+39229
+39233
+39239
+39241
+39251
+39293
+39301
+39313
+39317
+39323
+39341
+39343
+39359
+39367
+39371
+39373
+39383
+39397
+39409
+39419
+39439
+39443
+39451
+39461
+39499
+39503
+39509
+39511
+39521
+39541
+39551
+39563
+39569
+39581
+39607
+39619
+39623
+39631
+39659
+39667
+39671
+39679
+39703
+39709
+39719
+39727
+39733
+39749
+39761
+39769
+39779
+39791
+39799
+39821
+39827
+39829
+39839
+39841
+39847
+39857
+39863
+39869
+39877
+39883
+39887
+39901
+39929
+39937
+39953
+39971
+39979
+39983
+39989
+40009
+40013
+40031
+40037
+40039
+40063
+40087
+40093
+40099
+40111
+40123
+40127
+40129
+40151
+40153
+40163
+40169
+40177
+40189
+40193
+40213
+40231
+40237
+40241
+40253
+40277
+40283
+40289
+40343
+40351
+40357
+40361
+40387
+40423
+40427
+40429
+40433
+40459
+40471
+40483
+40487
+40493
+40499
+40507
+40519
+40529
+40531
+40543
+40559
+40577
+40583
+40591
+40597
+40609
+40627
+40637
+40639
+40693
+40697
+40699
+40709
+40739
+40751
+40759
+40763
+40771
+40787
+40801
+40813
+40819
+40823
+40829
+40841
+40847
+40849
+40853
+40867
+40879
+40883
+40897
+40903
+40927
+40933
+40939
+40949
+40961
+40973
+40993
+41011
+41017
+41023
+41039
+41047
+41051
+41057
+41077
+41081
+41113
+41117
+41131
+41141
+41143
+41149
+41161
+41177
+41179
+41183
+41189
+41201
+41203
+41213
+41221
+41227
+41231
+41233
+41243
+41257
+41263
+41269
+41281
+41299
+41333
+41341
+41351
+41357
+41381
+41387
+41389
+41399
+41411
+41413
+41443
+41453
+41467
+41479
+41491
+41507
+41513
+41519
+41521
+41539
+41543
+41549
+41579
+41593
+41597
+41603
+41609
+41611
+41617
+41621
+41627
+41641
+41647
+41651
+41659
+41669
+41681
+41687
+41719
+41729
+41737
+41759
+41761
+41771
+41777
+41801
+41809
+41813
+41843
+41849
+41851
+41863
+41879
+41887
+41893
+41897
+41903
+41911
+41927
+41941
+41947
+41953
+41957
+41959
+41969
+41981
+41983
+41999
+42013
+42017
+42019
+42023
+42043
+42061
+42071
+42073
+42083
+42089
+42101
+42131
+42139
+42157
+42169
+42179
+42181
+42187
+42193
+42197
+42209
+42221
+42223
+42227
+42239
+42257
+42281
+42283
+42293
+42299
+42307
+42323
+42331
+42337
+42349
+42359
+42373
+42379
+42391
+42397
+42403
+42407
+42409
+42433
+42437
+42443
+42451
+42457
+42461
+42463
+42467
+42473
+42487
+42491
+42499
+42509
+42533
+42557
+42569
+42571
+42577
+42589
+42611
+42641
+42643
+42649
+42667
+42677
+42683
+42689
+42697
+42701
+42703
+42709
+42719
+42727
+42737
+42743
+42751
+42767
+42773
+42787
+42793
+42797
+42821
+42829
+42839
+42841
+42853
+42859
+42863
+42899
+42901
+42923
+42929
+42937
+42943
+42953
+42961
+42967
+42979
+42989
+43003
+43013
+43019
+43037
+43049
+43051
+43063
+43067
+43093
+43103
+43117
+43133
+43151
+43159
+43177
+43189
+43201
+43207
+43223
+43237
+43261
+43271
+43283
+43291
+43313
+43319
+43321
+43331
+43391
+43397
+43399
+43403
+43411
+43427
+43441
+43451
+43457
+43481
+43487
+43499
+43517
+43541
+43543
+43573
+43577
+43579
+43591
+43597
+43607
+43609
+43613
+43627
+43633
+43649
+43651
+43661
+43669
+43691
+43711
+43717
+43721
+43753
+43759
+43777
+43781
+43783
+43787
+43789
+43793
+43801
+43853
+43867
+43889
+43891
+43913
+43933
+43943
+43951
+43961
+43963
+43969
+43973
+43987
+43991
+43997
+44017
+44021
+44027
+44029
+44041
+44053
+44059
+44071
+44087
+44089
+44101
+44111
+44119
+44123
+44129
+44131
+44159
+44171
+44179
+44189
+44201
+44203
+44207
+44221
+44249
+44257
+44263
+44267
+44269
+44273
+44279
+44281
+44293
+44351
+44357
+44371
+44381
+44383
+44389
+44417
+44449
+44453
+44483
+44491
+44497
+44501
+44507
+44519
+44531
+44533
+44537
+44543
+44549
+44563
+44579
+44587
+44617
+44621
+44623
+44633
+44641
+44647
+44651
+44657
+44683
+44687
+44699
+44701
+44711
+44729
+44741
+44753
+44771
+44773
+44777
+44789
+44797
+44809
+44819
+44839
+44843
+44851
+44867
+44879
+44887
+44893
+44909
+44917
+44927
+44939
+44953
+44959
+44963
+44971
+44983
+44987
+45007
+45013
+45053
+45061
+45077
+45083
+45119
+45121
+45127
+45131
+45137
+45139
+45161
+45179
+45181
+45191
+45197
+45233
+45247
+45259
+45263
+45281
+45289
+45293
+45307
+45317
+45319
+45329
+45337
+45341
+45343
+45361
+45377
+45389
+45403
+45413
+45427
+45433
+45439
+45481
+45491
+45497
+45503
+45523
+45533
+45541
+45553
+45557
+45569
+45587
+45589
+45599
+45613
+45631
+45641
+45659
+45667
+45673
+45677
+45691
+45697
+45707
+45737
+45751
+45757
+45763
+45767
+45779
+45817
+45821
+45823
+45827
+45833
+45841
+45853
+45863
+45869
+45887
+45893
+45943
+45949
+45953
+45959
+45971
+45979
+45989
+46021
+46027
+46049
+46051
+46061
+46073
+46091
+46093
+46099
+46103
+46133
+46141
+46147
+46153
+46171
+46181
+46183
+46187
+46199
+46219
+46229
+46237
+46261
+46271
+46273
+46279
+46301
+46307
+46309
+46327
+46337
+46349
+46351
+46381
+46399
+46411
+46439
+46441
+46447
+46451
+46457
+46471
+46477
+46489
+46499
+46507
+46511
+46523
+46549
+46559
+46567
+46573
+46589
+46591
+46601
+46619
+46633
+46639
+46643
+46649
+46663
+46679
+46681
+46687
+46691
+46703
+46723
+46727
+46747
+46751
+46757
+46769
+46771
+46807
+46811
+46817
+46819
+46829
+46831
+46853
+46861
+46867
+46877
+46889
+46901
+46919
+46933
+46957
+46993
+46997
+47017
+47041
+47051
+47057
+47059
+47087
+47093
+47111
+47119
+47123
+47129
+47137
+47143
+47147
+47149
+47161
+47189
+47207
+47221
+47237
+47251
+47269
+47279
+47287
+47293
+47297
+47303
+47309
+47317
+47339
+47351
+47353
+47363
+47381
+47387
+47389
+47407
+47417
+47419
+47431
+47441
+47459
+47491
+47497
+47501
+47507
+47513
+47521
+47527
+47533
+47543
+47563
+47569
+47581
+47591
+47599
+47609
+47623
+47629
+47639
+47653
+47657
+47659
+47681
+47699
+47701
+47711
+47713
+47717
+47737
+47741
+47743
+47777
+47779
+47791
+47797
+47807
+47809
+47819
+47837
+47843
+47857
+47869
+47881
+47903
+47911
+47917
+47933
+47939
+47947
+47951
+47963
+47969
+47977
+47981
+48017
+48023
+48029
+48049
+48073
+48079
+48091
+48109
+48119
+48121
+48131
+48157
+48163
+48179
+48187
+48193
+48197
+48221
+48239
+48247
+48259
+48271
+48281
+48299
+48311
+48313
+48337
+48341
+48353
+48371
+48383
+48397
+48407
+48409
+48413
+48437
+48449
+48463
+48473
+48479
+48481
+48487
+48491
+48497
+48523
+48527
+48533
+48539
+48541
+48563
+48571
+48589
+48593
+48611
+48619
+48623
+48647
+48649
+48661
+48673
+48677
+48679
+48731
+48733
+48751
+48757
+48761
+48767
+48779
+48781
+48787
+48799
+48809
+48817
+48821
+48823
+48847
+48857
+48859
+48869
+48871
+48883
+48889
+48907
+48947
+48953
+48973
+48989
+48991
+49003
+49009
+49019
+49031
+49033
+49037
+49043
+49057
+49069
+49081
+49103
+49109
+49117
+49121
+49123
+49139
+49157
+49169
+49171
+49177
+49193
+49199
+49201
+49207
+49211
+49223
+49253
+49261
+49277
+49279
+49297
+49307
+49331
+49333
+49339
+49363
+49367
+49369
+49391
+49393
+49409
+49411
+49417
+49429
+49433
+49451
+49459
+49463
+49477
+49481
+49499
+49523
+49529
+49531
+49537
+49547
+49549
+49559
+49597
+49603
+49613
+49627
+49633
+49639
+49663
+49667
+49669
+49681
+49697
+49711
+49727
+49739
+49741
+49747
+49757
+49783
+49787
+49789
+49801
+49807
+49811
+49823
+49831
+49843
+49853
+49871
+49877
+49891
+49919
+49921
+49927
+49937
+49939
+49943
+49957
+49991
+49993
+49999
+50021
+50023
+50033
+50047
+50051
+50053
+50069
+50077
+50087
+50093
+50101
+50111
+50119
+50123
+50129
+50131
+50147
+50153
+50159
+50177
+50207
+50221
+50227
+50231
+50261
+50263
+50273
+50287
+50291
+50311
+50321
+50329
+50333
+50341
+50359
+50363
+50377
+50383
+50387
+50411
+50417
+50423
+50441
+50459
+50461
+50497
+50503
+50513
+50527
+50539
+50543
+50549
+50551
+50581
+50587
+50591
+50593
+50599
+50627
+50647
+50651
+50671
+50683
+50707
+50723
+50741
+50753
+50767
+50773
+50777
+50789
+50821
+50833
+50839
+50849
+50857
+50867
+50873
+50891
+50893
+50909
+50923
+50929
+50951
+50957
+50969
+50971
+50989
+50993
+51001
+51031
+51043
+51047
+51059
+51061
+51071
+51109
+51131
+51133
+51137
+51151
+51157
+51169
+51193
+51197
+51199
+51203
+51217
+51229
+51239
+51241
+51257
+51263
+51283
+51287
+51307
+51329
+51341
+51343
+51347
+51349
+51361
+51383
+51407
+51413
+51419
+51421
+51427
+51431
+51437
+51439
+51449
+51461
+51473
+51479
+51481
+51487
+51503
+51511
+51517
+51521
+51539
+51551
+51563
+51577
+51581
+51593
+51599
+51607
+51613
+51631
+51637
+51647
+51659
+51673
+51679
+51683
+51691
+51713
+51719
+51721
+51749
+51767
+51769
+51787
+51797
+51803
+51817
+51827
+51829
+51839
+51853
+51859
+51869
+51871
+51893
+51899
+51907
+51913
+51929
+51941
+51949
+51971
+51973
+51977
+51991
+52009
+52021
+52027
+52051
+52057
+52067
+52069
+52081
+52103
+52121
+52127
+52147
+52153
+52163
+52177
+52181
+52183
+52189
+52201
+52223
+52237
+52249
+52253
+52259
+52267
+52289
+52291
+52301
+52313
+52321
+52361
+52363
+52369
+52379
+52387
+52391
+52433
+52453
+52457
+52489
+52501
+52511
+52517
+52529
+52541
+52543
+52553
+52561
+52567
+52571
+52579
+52583
+52609
+52627
+52631
+52639
+52667
+52673
+52691
+52697
+52709
+52711
+52721
+52727
+52733
+52747
+52757
+52769
+52783
+52807
+52813
+52817
+52837
+52859
+52861
+52879
+52883
+52889
+52901
+52903
+52919
+52937
+52951
+52957
+52963
+52967
+52973
+52981
+52999
+53003
+53017
+53047
+53051
+53069
+53077
+53087
+53089
+53093
+53101
+53113
+53117
+53129
+53147
+53149
+53161
+53171
+53173
+53189
+53197
+53201
+53231
+53233
+53239
+53267
+53269
+53279
+53281
+53299
+53309
+53323
+53327
+53353
+53359
+53377
+53381
+53401
+53407
+53411
+53419
+53437
+53441
+53453
+53479
+53503
+53507
+53527
+53549
+53551
+53569
+53591
+53593
+53597
+53609
+53611
+53617
+53623
+53629
+53633
+53639
+53653
+53657
+53681
+53693
+53699
+53717
+53719
+53731
+53759
+53773
+53777
+53783
+53791
+53813
+53819
+53831
+53849
+53857
+53861
+53881
+53887
+53891
+53897
+53899
+53917
+53923
+53927
+53939
+53951
+53959
+53987
+53993
+54001
+54011
+54013
+54037
+54049
+54059
+54083
+54091
+54101
+54121
+54133
+54139
+54151
+54163
+54167
+54181
+54193
+54217
+54251
+54269
+54277
+54287
+54293
+54311
+54319
+54323
+54331
+54347
+54361
+54367
+54371
+54377
+54401
+54403
+54409
+54413
+54419
+54421
+54437
+54443
+54449
+54469
+54493
+54497
+54499
+54503
+54517
+54521
+54539
+54541
+54547
+54559
+54563
+54577
+54581
+54583
+54601
+54617
+54623
+54629
+54631
+54647
+54667
+54673
+54679
+54709
+54713
+54721
+54727
+54751
+54767
+54773
+54779
+54787
+54799
+54829
+54833
+54851
+54869
+54877
+54881
+54907
+54917
+54919
+54941
+54949
+54959
+54973
+54979
+54983
+55001
+55009
+55021
+55049
+55051
+55057
+55061
+55073
+55079
+55103
+55109
+55117
+55127
+55147
+55163
+55171
+55201
+55207
+55213
+55217
+55219
+55229
+55243
+55249
+55259
+55291
+55313
+55331
+55333
+55337
+55339
+55343
+55351
+55373
+55381
+55399
+55411
+55439
+55441
+55457
+55469
+55487
+55501
+55511
+55529
+55541
+55547
+55579
+55589
+55603
+55609
+55619
+55621
+55631
+55633
+55639
+55661
+55663
+55667
+55673
+55681
+55691
+55697
+55711
+55717
+55721
+55733
+55763
+55787
+55793
+55799
+55807
+55813
+55817
+55819
+55823
+55829
+55837
+55843
+55849
+55871
+55889
+55897
+55901
+55903
+55921
+55927
+55931
+55933
+55949
+55967
+55987
+55997
+56003
+56009
+56039
+56041
+56053
+56081
+56087
+56093
+56099
+56101
+56113
+56123
+56131
+56149
+56167
+56171
+56179
+56197
+56207
+56209
+56237
+56239
+56249
+56263
+56267
+56269
+56299
+56311
+56333
+56359
+56369
+56377
+56383
+56393
+56401
+56417
+56431
+56437
+56443
+56453
+56467
+56473
+56477
+56479
+56489
+56501
+56503
+56509
+56519
+56527
+56531
+56533
+56543
+56569
+56591
+56597
+56599
+56611
+56629
+56633
+56659
+56663
+56671
+56681
+56687
+56701
+56711
+56713
+56731
+56737
+56747
+56767
+56773
+56779
+56783
+56807
+56809
+56813
+56821
+56827
+56843
+56857
+56873
+56891
+56893
+56897
+56909
+56911
+56921
+56923
+56929
+56941
+56951
+56957
+56963
+56983
+56989
+56993
+56999
+57037
+57041
+57047
+57059
+57073
+57077
+57089
+57097
+57107
+57119
+57131
+57139
+57143
+57149
+57163
+57173
+57179
+57191
+57193
+57203
+57221
+57223
+57241
+57251
+57259
+57269
+57271
+57283
+57287
+57301
+57329
+57331
+57347
+57349
+57367
+57373
+57383
+57389
+57397
+57413
+57427
+57457
+57467
+57487
+57493
+57503
+57527
+57529
+57557
+57559
+57571
+57587
+57593
+57601
+57637
+57641
+57649
+57653
+57667
+57679
+57689
+57697
+57709
+57713
+57719
+57727
+57731
+57737
+57751
+57773
+57781
+57787
+57791
+57793
+57803
+57809
+57829
+57839
+57847
+57853
+57859
+57881
+57899
+57901
+57917
+57923
+57943
+57947
+57973
+57977
+57991
+58013
+58027
+58031
+58043
+58049
+58057
+58061
+58067
+58073
+58099
+58109
+58111
+58129
+58147
+58151
+58153
+58169
+58171
+58189
+58193
+58199
+58207
+58211
+58217
+58229
+58231
+58237
+58243
+58271
+58309
+58313
+58321
+58337
+58363
+58367
+58369
+58379
+58391
+58393
+58403
+58411
+58417
+58427
+58439
+58441
+58451
+58453
+58477
+58481
+58511
+58537
+58543
+58549
+58567
+58573
+58579
+58601
+58603
+58613
+58631
+58657
+58661
+58679
+58687
+58693
+58699
+58711
+58727
+58733
+58741
+58757
+58763
+58771
+58787
+58789
+58831
+58889
+58897
+58901
+58907
+58909
+58913
+58921
+58937
+58943
+58963
+58967
+58979
+58991
+58997
+59009
+59011
+59021
+59023
+59029
+59051
+59053
+59063
+59069
+59077
+59083
+59093
+59107
+59113
+59119
+59123
+59141
+59149
+59159
+59167
+59183
+59197
+59207
+59209
+59219
+59221
+59233
+59239
+59243
+59263
+59273
+59281
+59333
+59341
+59351
+59357
+59359
+59369
+59377
+59387
+59393
+59399
+59407
+59417
+59419
+59441
+59443
+59447
+59453
+59467
+59471
+59473
+59497
+59509
+59513
+59539
+59557
+59561
+59567
+59581
+59611
+59617
+59621
+59627
+59629
+59651
+59659
+59663
+59669
+59671
+59693
+59699
+59707
+59723
+59729
+59743
+59747
+59753
+59771
+59779
+59791
+59797
+59809
+59833
+59863
+59879
+59887
+59921
+59929
+59951
+59957
+59971
+59981
+59999
+60013
+60017
+60029
+60037
+60041
+60077
+60083
+60089
+60091
+60101
+60103
+60107
+60127
+60133
+60139
+60149
+60161
+60167
+60169
+60209
+60217
+60223
+60251
+60257
+60259
+60271
+60289
+60293
+60317
+60331
+60337
+60343
+60353
+60373
+60383
+60397
+60413
+60427
+60443
+60449
+60457
+60493
+60497
+60509
+60521
+60527
+60539
+60589
+60601
+60607
+60611
+60617
+60623
+60631
+60637
+60647
+60649
+60659
+60661
+60679
+60689
+60703
+60719
+60727
+60733
+60737
+60757
+60761
+60763
+60773
+60779
+60793
+60811
+60821
+60859
+60869
+60887
+60889
+60899
+60901
+60913
+60917
+60919
+60923
+60937
+60943
+60953
+60961
+61001
+61007
+61027
+61031
+61043
+61051
+61057
+61091
+61099
+61121
+61129
+61141
+61151
+61153
+61169
+61211
+61223
+61231
+61253
+61261
+61283
+61291
+61297
+61331
+61333
+61339
+61343
+61357
+61363
+61379
+61381
+61403
+61409
+61417
+61441
+61463
+61469
+61471
+61483
+61487
+61493
+61507
+61511
+61519
+61543
+61547
+61553
+61559
+61561
+61583
+61603
+61609
+61613
+61627
+61631
+61637
+61643
+61651
+61657
+61667
+61673
+61681
+61687
+61703
+61717
+61723
+61729
+61751
+61757
+61781
+61813
+61819
+61837
+61843
+61861
+61871
+61879
+61909
+61927
+61933
+61949
+61961
+61967
+61979
+61981
+61987
+61991
+62003
+62011
+62017
+62039
+62047
+62053
+62057
+62071
+62081
+62099
+62119
+62129
+62131
+62137
+62141
+62143
+62171
+62189
+62191
+62201
+62207
+62213
+62219
+62233
+62273
+62297
+62299
+62303
+62311
+62323
+62327
+62347
+62351
+62383
+62401
+62417
+62423
+62459
+62467
+62473
+62477
+62483
+62497
+62501
+62507
+62533
+62539
+62549
+62563
+62581
+62591
+62597
+62603
+62617
+62627
+62633
+62639
+62653
+62659
+62683
+62687
+62701
+62723
+62731
+62743
+62753
+62761
+62773
+62791
+62801
+62819
+62827
+62851
+62861
+62869
+62873
+62897
+62903
+62921
+62927
+62929
+62939
+62969
+62971
+62981
+62983
+62987
+62989
+63029
+63031
+63059
+63067
+63073
+63079
+63097
+63103
+63113
+63127
+63131
+63149
+63179
+63197
+63199
+63211
+63241
+63247
+63277
+63281
+63299
+63311
+63313
+63317
+63331
+63337
+63347
+63353
+63361
+63367
+63377
+63389
+63391
+63397
+63409
+63419
+63421
+63439
+63443
+63463
+63467
+63473
+63487
+63493
+63499
+63521
+63527
+63533
+63541
+63559
+63577
+63587
+63589
+63599
+63601
+63607
+63611
+63617
+63629
+63647
+63649
+63659
+63667
+63671
+63689
+63691
+63697
+63703
+63709
+63719
+63727
+63737
+63743
+63761
+63773
+63781
+63793
+63799
+63803
+63809
+63823
+63839
+63841
+63853
+63857
+63863
+63901
+63907
+63913
+63929
+63949
+63977
+63997
+64007
+64013
+64019
+64033
+64037
+64063
+64067
+64081
+64091
+64109
+64123
+64151
+64153
+64157
+64171
+64187
+64189
+64217
+64223
+64231
+64237
+64271
+64279
+64283
+64301
+64303
+64319
+64327
+64333
+64373
+64381
+64399
+64403
+64433
+64439
+64451
+64453
+64483
+64489
+64499
+64513
+64553
+64567
+64577
+64579
+64591
+64601
+64609
+64613
+64621
+64627
+64633
+64661
+64663
+64667
+64679
+64693
+64709
+64717
+64747
+64763
+64781
+64783
+64793
+64811
+64817
+64849
+64853
+64871
+64877
+64879
+64891
+64901
+64919
+64921
+64927
+64937
+64951
+64969
+64997
+65003
+65011
+65027
+65029
+65033
+65053
+65063
+65071
+65089
+65099
+65101
+65111
+65119
+65123
+65129
+65141
+65147
+65167
+65171
+65173
+65179
+65183
+65203
+65213
+65239
+65257
+65267
+65269
+65287
+65293
+65309
+65323
+65327
+65353
+65357
+65371
+65381
+65393
+65407
+65413
+65419
+65423
+65437
+65447
+65449
+65479
+65497
+65519
+65521
diff --git a/security/nss/lib/freebl/mpi/doc/prng.pod b/security/nss/lib/freebl/mpi/doc/prng.pod
new file mode 100644
index 000000000..6da4d4a9c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/prng.pod
@@ -0,0 +1,38 @@
+=head1 NAME
+
+ prng - pseudo-random number generator
+
+=head1 SYNOPSIS
+
+ prng [count]
+
+=head1 DESCRIPTION
+
+B<Prng> generates 32-bit pseudo-random integers using the
+Blum-Blum-Shub (BBS) quadratic residue generator.  It is seeded using
+the standard C library's rand() function, which itself seeded from the
+system clock and the process ID number.  Thus, the values generated
+are not particularly useful for cryptographic applications, but they
+are in general much better than the typical output of the usual
+multiplicative congruency generator used by most runtime libraries.
+
+You may optionally specify how many random values should be generated
+by giving a I<count> argument on the command line.  If you do not
+specify a count, only one random value will be generated.  The results
+are output to the standard output in decimal notation, one value per
+line.
+
+=head1 RESTRICTIONS
+
+As stated above, B<prng> uses the C library's rand() function to seed
+the generator, so it is not terribly suitable for cryptographic
+applications.  Also note that each time you run the program, a new
+seed is generated, so it is better to run it once with a I<count>
+parameter than it is to run it multiple times to generate several
+values.
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
+ Thayer School of Engineering, Dartmouth College, Hanover, NH  USA
diff --git a/security/nss/lib/freebl/mpi/doc/redux.txt b/security/nss/lib/freebl/mpi/doc/redux.txt
new file mode 100644
index 000000000..0df0f0390
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/redux.txt
@@ -0,0 +1,86 @@
+Modular Reduction
+
+Usually, modular reduction is accomplished by long division, using the
+mp_div() or mp_mod() functions.  However, when performing modular
+exponentiation, you spend a lot of time reducing by the same modulus
+again and again.  For this purpose, doing a full division for each
+multiplication is quite inefficient.
+
+For this reason, the mp_exptmod() function does not perform modular
+reductions in the usual way, but instead takes advantage of an
+algorithm due to Barrett, as described by Menezes, Oorschot and
+VanStone in their book _Handbook of Applied Cryptography_, published
+by the CRC Press (see Chapter 14 for details).  This method reduces
+most of the computation of reduction to efficient shifting and masking
+operations, and avoids the multiple-precision division entirely.
+
+Here is a brief synopsis of Barrett reduction, as it is implemented in
+this library.
+
+Let b denote the radix of the computation (one more than the maximum
+value that can be denoted by an mp_digit).  Let m be the modulus, and
+let k be the number of significant digits of m.  Let x be the value to
+be reduced modulo m.  By the Division Theorem, there exist unique
+integers Q and R such that:
+
+	 x = Qm + R, 0 <= R < m
+
+Barrett reduction takes advantage of the fact that you can easily
+approximate Q to within two, given a value M such that:
+
+	                  2k
+	                 b
+	    M = floor( ----- )
+	                 m
+
+Computation of M requires a full-precision division step, so if you
+are only doing a single reduction by m, you gain no advantage.
+However, when multiple reductions by the same m are required, this
+division need only be done once, beforehand.  Using this, we can use
+the following equation to compute Q', an approximation of Q:
+
+                     x
+            floor( ------ ) M
+                      k-1
+                     b
+Q' = floor( ----------------- )
+                    k+1
+                   b
+
+The divisions by b^(k-1) and b^(k+1) and the floor() functions can be
+efficiently implemented with shifts and masks, leaving only a single
+multiplication to be performed to get this approximation.  It can be
+shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with
+two additional subtractions to bring the value into line with the
+actual value of Q.
+
+Once we've got Q', we basically multiply that by m and subtract from
+x, yielding:
+
+   x - Q'm = Qm + R - Q'm
+
+Since we know the constraint on Q', this is one of:
+
+      R
+      m + R
+      2m + R
+
+Since R < m by the Division Theorem, we can simply subtract off m
+until we get a value in the correct range, which will happen with no
+more than 2 subtractions:
+
+     v = x - Q'm
+
+     while(v >= m)
+       v = v - m
+     endwhile
+
+
+In random performance trials, modular exponentiation using this method
+of reduction gave around a 40% speedup over using the division for
+reduction.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/sqrt.txt b/security/nss/lib/freebl/mpi/doc/sqrt.txt
new file mode 100644
index 000000000..4529cbfc4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/sqrt.txt
@@ -0,0 +1,50 @@
+Square Root
+
+A simple iterative algorithm is used to compute the greatest integer
+less than or equal to the square root.  Essentially, this is Newton's
+linear approximation, computed by finding successive values of the
+equation:
+
+		    x[k]^2 - V
+x[k+1]	 =  x[k] - ------------
+	             2 x[k]
+
+...where V is the value for which the square root is being sought.  In
+essence, what is happening here is that we guess a value for the
+square root, then figure out how far off we were by squaring our guess
+and subtracting the target.  Using this value, we compute a linear
+approximation for the error, and adjust the "guess".  We keep doing
+this until the precision gets low enough that the above equation
+yields a quotient of zero.  At this point, our last guess is one
+greater than the square root we're seeking.
+
+The initial guess is computed by dividing V by 4, which is a heuristic
+I have found to be fairly good on average.  This also has the
+advantage of being very easy to compute efficiently, even for large
+values.
+
+So, the resulting algorithm works as follows:
+
+    x = V / 4   /* compute initial guess */
+    
+    loop
+	t = (x * x) - V   /* Compute absolute error  */
+	u = 2 * x         /* Adjust by tangent slope */
+	t = t / u
+
+	/* Loop is done if error is zero */
+	if(t == 0)
+	    break
+
+	/* Adjust guess by error term    */
+	x = x - t
+    end
+
+    x = x - 1
+
+The result of the computation is the value of x.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/square.txt b/security/nss/lib/freebl/mpi/doc/square.txt
new file mode 100644
index 000000000..edbb97882
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/square.txt
@@ -0,0 +1,72 @@
+Squaring Algorithm
+
+When you are squaring a value, you can take advantage of the fact that
+half the multiplications performed by the more general multiplication
+algorithm (see 'mul.txt' for a description) are redundant when the
+multiplicand equals the multiplier.
+
+In particular, the modified algorithm is:
+
+k = 0
+for j <- 0 to (#a - 1)
+  w = c[2*j] + (a[j] ^ 2);
+  k = w div R
+
+  for i <- j+1 to (#a - 1)
+    w = (2 * a[j] * a[i]) + k + c[i+j]
+    c[i+j] = w mod R
+    k = w div R
+  endfor
+  c[i+j] = k;
+  k = 0;
+endfor
+
+On the surface, this looks identical to the multiplication algorithm;
+however, note the following differences:
+
+  - precomputation of the leading term in the outer loop
+
+  - i runs from j+1 instead of from zero
+
+  - doubling of a[i] * a[j] in the inner product
+
+Unfortunately, the construction of the inner product is such that we
+need more than two digits to represent the inner product, in some
+cases.  In a C implementation, this means that some gymnastics must be
+performed in order to handle overflow, for which C has no direct
+abstraction.  We do this by observing the following:
+
+If we have multiplied a[i] and a[j], and the product is more than half
+the maximum value expressible in two digits, then doubling this result
+will overflow into a third digit.  If this occurs, we take note of the
+overflow, and double it anyway -- C integer arithmetic ignores
+overflow, so the two digits we get back should still be valid, modulo
+the overflow.
+
+Having doubled this value, we now have to add in the remainders and
+the digits already computed by earlier steps.  If we did not overflow
+in the previous step, we might still cause an overflow here.  That
+will happen whenever the maximum value expressible in two digits, less
+the amount we have to add, is greater than the result of the previous
+step.  Thus, the overflow computation is:
+
+
+  u = 0
+  w = a[i] * a[j]
+
+  if(w > (R - 1)/ 2)
+    u = 1;
+
+  w = w * 2
+  v = c[i + j] + k
+
+  if(u == 0 && (R - 1 - v) < w)
+    u = 1
+
+If there is an overflow, u will be 1, otherwise u will be 0.  The rest
+of the parameters are the same as they are in the above description.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/timing.txt b/security/nss/lib/freebl/mpi/doc/timing.txt
new file mode 100644
index 000000000..58f37c9df
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/timing.txt
@@ -0,0 +1,213 @@
+MPI Library Timing Tests
+
+Hardware/OS
+(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3
+(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3
+(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20
+(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac
+(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1
+(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2
+
+Compiler
+(1) MIPSpro C 7.2.1 -O3 optimizations
+(2) GCC 2.95.1 -O3 optimizations
+(3) IBM AIX xlc -O3 optimizations (version unknown)
+(4) EGCS 2.91.66 -O3 optimizations
+(5) Metrowerks CodeWarrior 5.0 C, all optimizations
+(6) MIPSpro C 7.30 -O3 optimizations
+(7) same as (6), with optimized libmalloc.so
+
+Timings are given in seconds, computed using the C library's clock()
+function.  The first column gives the hardware and compiler
+configuration used for the test. The second column indicates the
+number of tests that were aggregated to get the statistics for that
+size.  These were compiled using 16 bit digits.
+
+Source data were generated randomly using a fixed seed, so they should
+be internally consistent, but may vary on different systems depending
+on the C library.  Also, since the resolution of the timer accessed by
+clock() varies, there may be some variance in the precision of these
+measurements.
+
+Prime Generation (primegen)
+
+128 bits:
+A1      200     min=0.03, avg=0.19, max=0.72, sum=38.46
+A2      200     min=0.02, avg=0.16, max=0.62, sum=32.55
+B3      200     min=0.01, avg=0.07, max=0.22, sum=13.29
+C4      200     min=0.00, avg=0.03, max=0.20, sum=6.14
+D4      200     min=0.00, avg=0.05, max=0.33, sum=9.70
+A6      200     min=0.01, avg=0.09, max=0.36, sum=17.48
+A7      200     min=0.00, avg=0.05, max=0.24, sum=10.07
+
+192 bits:
+A1      200     min=0.05, avg=0.45, max=3.13, sum=89.96
+A2      200     min=0.04, avg=0.39, max=2.61, sum=77.55
+B3      200     min=0.02, avg=0.18, max=1.25, sum=36.97
+C4      200     min=0.01, avg=0.09, max=0.33, sum=18.24
+D4      200     min=0.02, avg=0.15, max=0.54, sum=29.63
+A6      200     min=0.02, avg=0.24, max=1.70, sum=47.84
+A7      200     min=0.01, avg=0.15, max=1.05, sum=30.88
+
+256 bits:
+A1      200     min=0.08, avg=0.92, max=6.13, sum=184.79
+A2      200     min=0.06, avg=0.76, max=5.03, sum=151.11
+B3      200     min=0.04, avg=0.41, max=2.68, sum=82.35
+C4      200     min=0.02, avg=0.19, max=0.69, sum=37.91
+D4      200     min=0.03, avg=0.31, max=1.15, sum=63.00
+A6      200     min=0.04, avg=0.48, max=3.13, sum=95.46
+A7      200     min=0.03, avg=0.37, max=2.36, sum=73.60
+
+320 bits:
+A1      200     min=0.11, avg=1.59, max=6.14, sum=318.81
+A2      200     min=0.09, avg=1.27, max=4.93, sum=254.03
+B3      200     min=0.07, avg=0.82, max=3.13, sum=163.80
+C4      200     min=0.04, avg=0.44, max=1.91, sum=87.59
+D4      200     min=0.06, avg=0.73, max=3.22, sum=146.73
+A6      200     min=0.07, avg=0.93, max=3.50, sum=185.01
+A7      200     min=0.05, avg=0.76, max=2.94, sum=151.78
+
+384 bits:
+A1      200     min=0.16, avg=2.69, max=11.41, sum=537.89
+A2      200     min=0.13, avg=2.15, max=9.03, sum=429.14
+B3      200     min=0.11, avg=1.54, max=6.49, sum=307.78
+C4      200     min=0.06, avg=0.81, max=4.84, sum=161.13
+D4      200     min=0.10, avg=1.38, max=8.31, sum=276.81
+A6      200     min=0.11, avg=1.73, max=7.36, sum=345.55
+A7      200     min=0.09, avg=1.46, max=6.12, sum=292.02
+
+448 bits:
+A1      200     min=0.23, avg=3.36, max=15.92, sum=672.63
+A2      200     min=0.17, avg=2.61, max=12.25, sum=522.86
+B3      200     min=0.16, avg=2.10, max=9.83, sum=420.86
+C4      200     min=0.09, avg=1.44, max=7.64, sum=288.36
+D4      200     min=0.16, avg=2.50, max=13.29, sum=500.17
+A6      200     min=0.15, avg=2.31, max=10.81, sum=461.58
+A7      200     min=0.14, avg=2.03, max=9.53, sum=405.16
+
+512 bits:
+A1      200     min=0.30, avg=6.12, max=22.18, sum=1223.35
+A2      200     min=0.25, avg=4.67, max=16.90, sum=933.18
+B3      200     min=0.23, avg=4.13, max=14.94, sum=825.45
+C4      200     min=0.13, avg=2.08, max=9.75, sum=415.22
+D4      200     min=0.24, avg=4.04, max=20.18, sum=808.11
+A6      200     min=0.22, avg=4.47, max=16.19, sum=893.83
+A7      200     min=0.20, avg=4.03, max=14.65, sum=806.02
+
+Modular Exponentation (metime)
+
+The following results are aggregated from 200 pseudo-randomly
+generated tests, based on a fixed seed. 
+
+                      base, exponent, and modulus size (bits)
+P/C       128   192   256   320   384   448   512   640   768   896  1024
+------- -----------------------------------------------------------------
+A1      0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040
+A2      0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668
+B3      0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840
+C4      0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507
+D4      0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899
+E5      0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317
+A6      0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880
+A7      0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855
+
+Multiplication and Squaring tests, (mulsqr)
+
+The following results are aggregated from 500000 pseudo-randomly
+generated tests, based on a per-run wall-clock seed.  Times are given
+in seconds, except where indicated in microseconds (us).
+
+(A1)
+
+bits    multiply    square  ad  percent time/mult   time/square
+64      9.33        9.15    >   1.9     18.7us      18.3us
+128     10.88       10.44   >   4.0     21.8us      20.9us
+192     13.30       11.89   >   10.6    26.7us      23.8us
+256     14.88       12.64   >   15.1    29.8us      25.3us
+320     18.64       15.01   >   19.5    37.3us      30.0us
+384     23.11       17.70   >   23.4    46.2us      35.4us
+448     28.28       20.88   >   26.2    56.6us      41.8us
+512     34.09       24.51   >   28.1    68.2us      49.0us
+640     47.86       33.25   >   30.5    95.7us      66.5us
+768     64.91       43.54   >   32.9    129.8us     87.1us
+896     84.49       55.48   >   34.3    169.0us     111.0us
+1024    107.25      69.21   >   35.5    214.5us     138.4us
+1536    227.97      141.91  >   37.8    456.0us     283.8us
+2048    394.05      242.15  >   38.5    788.1us     484.3us
+
+(A2)
+
+bits    multiply    square  ad  percent time/mult   time/square
+64      7.87        7.95    <   1.0     15.7us      15.9us
+128     9.40        9.19    >   2.2     18.8us      18.4us
+192     11.15       10.59   >   5.0     22.3us      21.2us
+256     12.02       11.16   >   7.2     24.0us      22.3us
+320     14.62       13.43   >   8.1     29.2us      26.9us
+384     17.72       15.80   >   10.8    35.4us      31.6us
+448     21.24       18.51   >   12.9    42.5us      37.0us
+512     25.36       21.78   >   14.1    50.7us      43.6us
+640     34.57       29.00   >   16.1    69.1us      58.0us
+768     46.10       37.60   >   18.4    92.2us      75.2us
+896     58.94       47.72   >   19.0    117.9us     95.4us
+1024    73.76       59.12   >   19.8    147.5us     118.2us
+1536    152.00      118.80  >   21.8    304.0us     237.6us
+2048    259.41      199.57  >   23.1    518.8us     399.1us
+
+(B3)
+
+bits    multiply    square  ad  percent time/mult   time/square
+64      2.60        2.47    >   5.0     5.20us      4.94us
+128     4.43        4.06    >   8.4     8.86us      8.12us
+192     7.03        6.10    >   13.2    14.1us      12.2us
+256     10.44       8.59    >   17.7    20.9us      17.2us
+320     14.44       11.64   >   19.4    28.9us      23.3us
+384     19.12       15.08   >   21.1    38.2us      30.2us
+448     24.55       19.09   >   22.2    49.1us      38.2us
+512     31.03       23.53   >   24.2    62.1us      47.1us
+640     45.05       33.80   >   25.0    90.1us      67.6us
+768     63.02       46.05   >   26.9    126.0us     92.1us
+896     83.74       60.29   >   28.0    167.5us     120.6us
+1024    106.73      76.65   >   28.2    213.5us     153.3us
+1536    228.94      160.98  >   29.7    457.9us     322.0us
+2048    398.08      275.93  >   30.7    796.2us     551.9us
+
+(C4)
+
+bits    multiply    square  ad  percent time/mult   time/square
+64      1.34        1.28    >   4.5     2.68us      2.56us
+128     2.76        2.59    >   6.2     5.52us      5.18us
+192     4.52        4.16    >   8.0     9.04us      8.32us
+256     6.64        5.99    >   9.8     13.3us      12.0us
+320     9.20        8.13    >   11.6    18.4us      16.3us
+384     12.01       10.58   >   11.9    24.0us      21.2us
+448     15.24       13.33   >   12.5    30.5us      26.7us
+512     19.02       16.46   >   13.5    38.0us      32.9us
+640     27.56       23.54   >   14.6    55.1us      47.1us
+768     37.89       31.78   >   16.1    75.8us      63.6us
+896     49.24       41.42   >   15.9    98.5us      82.8us
+1024    62.59       52.18   >   16.6    125.2us     104.3us
+1536    131.66      107.72  >   18.2    263.3us     215.4us
+2048    226.45      182.95  >   19.2    453.0us     365.9us
+
+(A7)
+
+bits    multiply    square  ad  percent time/mult   time/square
+64      1.74        1.71    >   1.7     3.48us      3.42us
+128     3.48        2.96    >   14.9    6.96us      5.92us
+192     5.74        4.60    >   19.9    11.5us      9.20us
+256     8.75        6.61    >   24.5    17.5us      13.2us
+320     12.5        8.99    >   28.1    25.0us      18.0us
+384     16.9        11.9    >   29.6    33.8us      23.8us
+448     22.2        15.2    >   31.7    44.4us      30.4us
+512     28.3        19.0    >   32.7    56.6us      38.0us
+640     42.4        28.0    >   34.0    84.8us      56.0us
+768     59.4        38.5    >   35.2    118.8us     77.0us
+896     79.5        51.2    >   35.6    159.0us     102.4us
+1024    102.6	    65.5    >	36.2	205.2us	    131.0us
+1536    224.3	    140.6   >	37.3	448.6us	    281.2us
+2048    393.4	    244.3   >	37.9	786.8us	    488.6us
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/hpma512.s b/security/nss/lib/freebl/mpi/hpma512.s
new file mode 100644
index 000000000..ae9da630d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hpma512.s
@@ -0,0 +1,615 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/*
+ *
+ *  This PA-RISC 2.0 function computes the product of two unsigned integers,
+ *  and adds the result to a previously computed integer.  The multiplicand
+ *  is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in
+ *  memory in little-double-wordian order.  The multiplier is an unsigned
+ *  64-bit integer.  The previously computed integer to which the product is
+ *  added is located in the result ("res") area, and is assumed to be a
+ *  576-bit (72-byte, nine doubleword) unsigned integer, stored in memory
+ *  in little-double-wordian order.  This value normally will be the result
+ *  of a previously computed nine doubleword result.  It is not necessary
+ *  to pad the multiplicand with an additional 64-bit zero doubleword.
+ *
+ *  Multiplicand, multiplier, and addend ideally should be aligned at
+ *  16-byte boundaries for best performance.  The code will function
+ *  correctly for alignment at eight-byte boundaries which are not 16-byte
+ *  boundaries, but the execution may be slightly slower due to even/odd
+ *  bank conflicts on PA-RISC 8000 processors.
+ *
+ *  This function is designed to accept the same calling sequence as Bill
+ *  Ackerman's "maxpy_little" function.  The carry from the ninth doubleword
+ *  of the result is written to the tenth word of the result, as is done by
+ *  Bill Ackerman's function.  The final carry also is returned as an
+ *  integer, which may be ignored.  The function prototype may be either
+ *  of the following:
+ *
+ *      void multacc512( int l, chunk* m, const chunk* a, chunk* res );
+ *          or
+ *      int multacc512( int l, chunk* m, const chunk* a, chunk* res );
+ *
+ *  where:  "l" originally denoted vector lengths.  This parameter is
+ *      ignored.  This function always assumes a multiplicand length of
+ *      512 bits (eight doublewords), and addend and result lengths of
+ *      576 bits (nine doublewords).
+ *
+ *      "m" is a pointer to the doubleword multiplier, ideally aligned
+ *      on a 16-byte boundary.
+ *
+ *      "a" is a pointer to the eight-doubleword multiplicand, stored
+ *      in little-double-wordian order, and ideally aligned on a 16-byte
+ *      boundary.
+ *
+ *      "res" is a pointer to the nine doubleword addend, and to the
+ *      nine-doubleword product computed by this function.  The result
+ *      also is stored in little-double-wordian order, and ideally is
+ *      aligned on a 16-byte boundary. It is expected that the alignment
+ *      of the "res" area may alternate between even/odd doubleword
+ *      boundaries for successive calls for 512-bit x 512-bit
+ *      multiplications.
+ *
+ *  The code for this function has been scheduled to use the parallelism
+ *  of the PA-RISC 8000 series microprocessors as well as the author was
+ *  able.  Comments and/or suggestions for improvement are welcomed.
+ *
+ *  The code is "64-bit safe".  This means it may be called in either
+ *  the 32ILP context or the 64LP context.  All 64-bits of registers are
+ *  saved and restored.
+ *
+ *  This code is self-contained.  It requires no other header files in order
+ *  to compile and to be linkable on a PA-RISC 2.0 machine.  Symbolic
+ *  definitions for registers and stack offsets are included within this
+ *  one source file.
+ *
+ *  This is a leaf routine.  As such, minimal use is made of the stack area.
+ *  Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight
+ *  general registers, and 128 bytes are used to move intermediate products
+ *  from the floating-point registers to the general registers.  Stack
+ *  protocols assure proper alignment of these areas.
+ *
+ */
+
+
+/*  ====================================================================*/
+/*      symbolic definitions for PA-RISC registers      */
+/*      in the MIPS style, avoids lots of case shifts       */
+/*      assigments (except t4) preserve register number parity  */
+/*  ====================================================================*/
+
+#define zero    %r0         /* permanent zero */
+#define t5      %r1         /* temp register, altered by addil */
+
+#define rp      %r2         /* return pointer */
+
+#define s1      %r3         /* callee saves register*/
+#define s0      %r4         /* callee saves register*/
+#define s3      %r5         /* callee saves register*/
+#define s2      %r6         /* callee saves register*/
+#define s5      %r7         /* callee saves register*/
+#define s4      %r8         /* callee saves register*/
+#define s7      %r9         /* callee saves register*/
+#define s6      %r10        /* callee saves register*/
+
+#define t1      %r19        /* caller saves register*/
+#define t0      %r20        /* caller saves register*/
+#define t3      %r21        /* caller saves register*/
+#define t2      %r22        /* caller saves register*/
+
+#define a3      %r23        /* fourth argument register, high word */
+#define a2      %r24        /* third argument register, low word*/
+#define a1      %r25        /* second argument register, high word*/
+#define a0      %r26        /* first argument register, low word*/
+
+#define v0      %r28        /* high order return value*/
+#define v1      %r29        /* low order return value*/
+
+#define sp      %r30        /* stack pointer*/
+#define t4      %r31        /* temporary register   */
+
+#define fa0     %fr4        /* first argument register*/
+#define fa1     %fr5        /* second argument register*/
+#define fa2     %fr6        /* third argument register*/
+#define fa3     %fr7        /* fourth argument register*/
+
+#define fa0r    %fr4R       /* first argument register*/
+#define fa1r    %fr5R       /* second argument register*/
+#define fa2r    %fr6R       /* third argument register*/
+#define fa3r    %fr7R       /* fourth argument register*/
+
+#define ft0     %fr8        /* caller saves register*/
+#define ft1     %fr9        /* caller saves register*/
+#define ft2     %fr10       /* caller saves register*/
+#define ft3     %fr11       /* caller saves register*/
+
+#define ft0r    %fr8R       /* caller saves register*/
+#define ft1r    %fr9R       /* caller saves register*/
+#define ft2r    %fr10R      /* caller saves register*/
+#define ft3r    %fr11R      /* caller saves register*/
+
+#define ft4     %fr22       /* caller saves register*/
+#define ft5     %fr23       /* caller saves register*/
+#define ft6     %fr24       /* caller saves register*/
+#define ft7     %fr25       /* caller saves register*/
+#define ft8     %fr26       /* caller saves register*/
+#define ft9     %fr27       /* caller saves register*/
+#define ft10    %fr28       /* caller saves register*/
+#define ft11    %fr29       /* caller saves register*/
+#define ft12    %fr30       /* caller saves register*/
+#define ft13    %fr31       /* caller saves register*/
+
+#define ft4r    %fr22R      /* caller saves register*/
+#define ft5r    %fr23R      /* caller saves register*/
+#define ft6r    %fr24R      /* caller saves register*/
+#define ft7r    %fr25R      /* caller saves register*/
+#define ft8r    %fr26R      /* caller saves register*/
+#define ft9r    %fr27R      /* caller saves register*/
+#define ft10r   %fr28R      /* caller saves register*/
+#define ft11r   %fr29R      /* caller saves register*/
+#define ft12r   %fr30R      /* caller saves register*/
+#define ft13r   %fr31R      /* caller saves register*/
+
+
+
+/*  ================================================================== */
+/*      functional definitions for PA-RISC registers           */
+/*  ================================================================== */
+
+/*              general registers           */
+
+#define T1      a0          /* temp, (length parameter ignored)             */
+
+#define pM      a1          /* -> 64-bit multiplier                         */
+#define T2      a1          /* temp, (after fetching multiplier)            */
+
+#define pA      a2          /* -> multiplicand vector (8 64-bit words)      */
+#define T3      a2          /* temp, (after fetching multiplicand)          */
+
+#define pR      a3          /* -> addend vector (8 64-bit doublewords,
+                                  result vector (9 64-bit words)            */
+
+#define S0      s0          /* callee saves summand registers               */
+#define S1      s1
+#define S2      s2
+#define S3      s3
+#define S4      s4
+#define S5      s5
+#define S6      s6
+#define S7      s7
+
+#define S8      v0          /* caller saves summand registers               */
+#define S9      v1
+#define S10     t0
+#define S11     t1
+#define S12     t2
+#define S13     t3
+#define S14     t4
+#define S15     t5
+
+
+
+/*              floating-point registers                                    */
+
+#define M       fa0         /* multiplier double word                       */
+#define MR      fa0r        /* low order half of multiplier double word     */
+#define ML      fa0         /* high order half of multiplier double word    */
+
+#define A0      fa2         /* multiplicand double word 0                   */
+#define A0R     fa2r        /* low order half of multiplicand double word   */
+#define A0L     fa2         /* high order half of multiplicand double word  */
+
+#define A1      fa3         /* multiplicand double word 1                   */
+#define A1R     fa3r        /* low order half of multiplicand double word   */
+#define A1L     fa3         /* high order half of multiplicand double word  */
+
+#define A2      ft0         /* multiplicand double word 2                   */
+#define A2R     ft0r        /* low order half of multiplicand double word   */
+#define A2L     ft0         /* high order half of multiplicand double word  */
+
+#define A3      ft1         /* multiplicand double word 3                   */
+#define A3R     ft1r        /* low order half of multiplicand double word   */
+#define A3L     ft1         /* high order half of multiplicand double word  */
+
+#define A4      ft2         /* multiplicand double word 4                   */
+#define A4R     ft2r        /* low order half of multiplicand double word   */
+#define A4L     ft2         /* high order half of multiplicand double word  */
+
+#define A5      ft3         /* multiplicand double word 5                   */
+#define A5R     ft3r        /* low order half of multiplicand double word   */
+#define A5L     ft3         /* high order half of multiplicand double word  */
+
+#define A6      ft4         /* multiplicand double word 6                   */
+#define A6R     ft4r        /* low order half of multiplicand double word   */
+#define A6L     ft4         /* high order half of multiplicand double word  */
+
+#define A7      ft5         /* multiplicand double word 7                   */
+#define A7R     ft5r        /* low order half of multiplicand double word   */
+#define A7L     ft5         /* high order half of multiplicand double word  */
+
+#define P0      ft6         /* product word 0                               */
+#define P1      ft7         /* product word 0                               */
+#define P2      ft8         /* product word 0                               */
+#define P3      ft9         /* product word 0                               */
+#define P4      ft10        /* product word 0                               */
+#define P5      ft11        /* product word 0                               */
+#define P6      ft12        /* product word 0                               */
+#define P7      ft13        /* product word 0                               */
+
+
+
+
+/*  ======================================================================  */
+/*      symbolic definitions for HP-UX stack offsets                        */
+/*      symbolic definitions for memory NOPs                                */
+/*  ======================================================================  */
+
+#define ST_SZ       192         /* stack area total size                    */
+
+#define SV0         -192(sp)    /* general register save area               */
+#define SV1         -184(sp)
+#define SV2         -176(sp)
+#define SV3         -168(sp)
+#define SV4         -160(sp)
+#define SV5         -152(sp)
+#define SV6         -144(sp)
+#define SV7         -136(sp)
+
+#define XF0         -128(sp)    /* data transfer area                       */
+#define XF1         -120(sp)    /* for floating-pt to integer regs          */
+#define XF2         -112(sp)
+#define XF3         -104(sp)
+#define XF4         -96(sp)
+#define XF5         -88(sp)
+#define XF6         -80(sp)
+#define XF7         -72(sp)
+#define XF8         -64(sp)
+#define XF9         -56(sp)
+#define XF10        -48(sp)
+#define XF11        -40(sp)
+#define XF12        -32(sp)
+#define XF13        -24(sp)
+#define XF14        -16(sp)
+#define XF15        -8(sp)
+
+#define mnop    proberi (sp),3,zero     /* memory NOP                       */
+
+
+
+
+/*  ======================================================================  */
+/*      assembler formalities                                               */
+/*  ======================================================================  */
+
+#ifdef __LP64__
+                .level  2.0W
+#else
+                .level  2.0
+#endif
+                .space    $TEXT$
+                .subspa   $CODE$
+                .align    16
+
+/*  ======================================================================  */
+/*      here to compute 64-bit x 512-bit product + 512-bit addend           */
+/*  ======================================================================  */
+
+multacc512
+        .PROC
+        .CALLINFO
+        .ENTRY
+    fldd    0(pM),M                 ; multiplier double word
+    ldo     ST_SZ(sp),sp            ; push stack
+
+    fldd    0(pA),A0                ; multiplicand double word 0
+    std     S1,SV1                  ; save s1
+
+    fldd    16(pA),A2               ; multiplicand double word 2
+    std     S3,SV3                  ; save s3
+
+    fldd    32(pA),A4               ; multiplicand double word 4
+    std     S5,SV5                  ; save s5
+
+    fldd    48(pA),A6               ; multiplicand double word 6
+    std     S7,SV7                  ; save s7
+
+
+    std     S0,SV0                  ; save s0
+    fldd    8(pA),A1                ; multiplicand double word 1
+    xmpyu   MR,A0L,P0               ; A0 cross 32-bit word products
+    xmpyu   ML,A0R,P2
+
+    std     S2,SV2                  ; save s2
+    fldd    24(pA),A3               ; multiplicand double word 3
+    xmpyu   MR,A2L,P4               ; A2 cross 32-bit word products
+    xmpyu   ML,A2R,P6
+
+    std     S4,SV4                  ; save s4
+    fldd    40(pA),A5               ; multiplicand double word 5
+
+    std     S6,SV6                  ; save s6
+    fldd    56(pA),A7               ; multiplicand double word 7
+
+
+    fstd    P0,XF0                  ; MR * A0L
+    xmpyu   MR,A0R,P0               ; A0 right 32-bit word product
+    xmpyu   MR,A1L,P1               ; A1 cross 32-bit word product
+
+    fstd    P2,XF2                  ; ML * A0R
+    xmpyu   ML,A0L,P2               ; A0 left 32-bit word product
+    xmpyu   ML,A1R,P3               ; A1 cross 32-bit word product
+
+    fstd    P4,XF4                  ; MR * A2L
+    xmpyu   MR,A2R,P4               ; A2 right 32-bit word product
+    xmpyu   MR,A3L,P5               ; A3 cross 32-bit word product
+
+    fstd    P6,XF6                  ; ML * A2R
+    xmpyu   ML,A2L,P6               ; A2 parallel 32-bit word product
+    xmpyu   ML,A3R,P7               ; A3 cross 32-bit word product
+
+
+    ldd     XF0,S0                  ; MR * A0L
+    fstd    P1,XF1                  ; MR * A1L
+
+    ldd     XF2,S2                  ; ML * A0R
+    fstd    P3,XF3                  ; ML * A1R
+
+    ldd     XF4,S4                  ; MR * A2L
+    fstd    P5,XF5                  ; MR * A3L
+    xmpyu   MR,A1R,P1               ; A1 parallel 32-bit word products
+    xmpyu   ML,A1L,P3
+
+    ldd     XF6,S6                  ; ML * A2R
+    fstd    P7,XF7                  ; ML * A3R
+    xmpyu   MR,A3R,P5               ; A3 parallel 32-bit word products
+    xmpyu   ML,A3L,P7
+
+
+    fstd    P0,XF0                  ; MR * A0R
+    ldd     XF1,S1                  ; MR * A1L
+    nop
+    add     S0,S2,T1                ; A0 cross product sum
+
+    fstd    P2,XF2                  ; ML * A0L
+    ldd     XF3,S3                  ; ML * A1R
+    add,dc  zero,zero,S0            ; A0 cross product sum carry
+    depd,z  T1,31,32,S2             ; A0 cross product sum << 32
+
+    fstd    P4,XF4                  ; MR * A2R
+    ldd     XF5,S5                  ; MR * A3L
+    shrpd   S0,T1,32,S0             ; A0 carry | cross product sum >> 32
+    add     S4,S6,T3                ; A2 cross product sum
+
+    fstd    P6,XF6                  ; ML * A2L
+    ldd     XF7,S7                  ; ML * A3R
+    add,dc  zero,zero,S4            ; A2 cross product sum carry
+    depd,z  T3,31,32,S6             ; A2 cross product sum << 32
+
+
+    ldd     XF0,S8                  ; MR * A0R
+    fstd    P1,XF1                  ; MR * A1R
+    xmpyu   MR,A4L,P0               ; A4 cross 32-bit word product
+    xmpyu   MR,A5L,P1               ; A5 cross 32-bit word product
+
+    ldd     XF2,S10                 ; ML * A0L
+    fstd    P3,XF3                  ; ML * A1L
+    xmpyu   ML,A4R,P2               ; A4 cross 32-bit word product
+    xmpyu   ML,A5R,P3               ; A5 cross 32-bit word product
+
+    ldd     XF4,S12                 ; MR * A2R
+    fstd    P5,XF5                  ; MR * A3L
+    xmpyu   MR,A6L,P4               ; A6 cross 32-bit word product
+    xmpyu   MR,A7L,P5               ; A7 cross 32-bit word product
+
+    ldd     XF6,S14                 ; ML * A2L
+    fstd    P7,XF7                  ; ML * A3L
+    xmpyu   ML,A6R,P6               ; A6 cross 32-bit word product
+    xmpyu   ML,A7R,P7               ; A7 cross 32-bit word product
+
+
+    fstd    P0,XF0                  ; MR * A4L
+    ldd     XF1,S9                  ; MR * A1R
+    shrpd   S4,T3,32,S4             ; A2 carry | cross product sum >> 32
+    add     S1,S3,T1                ; A1 cross product sum
+
+    fstd    P2,XF2                  ; ML * A4R
+    ldd     XF3,S11                 ; ML * A1L
+    add,dc  zero,zero,S1            ; A1 cross product sum carry
+    depd,z  T1,31,32,S3             ; A1 cross product sum << 32
+
+    fstd    P4,XF4                  ; MR * A6L
+    ldd     XF5,S13                 ; MR * A3R
+    shrpd   S1,T1,32,S1             ; A1 carry | cross product sum >> 32
+    add     S5,S7,T3                ; A3 cross product sum
+
+    fstd    P6,XF6                  ; ML * A6R
+    ldd     XF7,S15                 ; ML * A3L
+    add,dc  zero,zero,S5            ; A3 cross product sum carry
+    depd,z  T3,31,32,S7             ; A3 cross product sum << 32
+
+
+    shrpd   S5,T3,32,S5             ; A3 carry | cross product sum >> 32
+    add     S2,S8,S8                ; M * A0 right doubleword, P0 doubleword
+
+    add,dc  S0,S10,S10              ; M * A0 left doubleword
+    add     S3,S9,S9                ; M * A1 right doubleword
+
+    add,dc  S1,S11,S11              ; M * A1 left doubleword
+    add     S6,S12,S12              ; M * A2 right doubleword
+
+
+    ldd     24(pR),S3               ; Addend word 3
+    fstd    P1,XF1                  ; MR * A5L
+    add,dc  S4,S14,S14              ; M * A2 left doubleword
+    xmpyu   MR,A5R,P1               ; A5 right 32-bit word product
+
+    ldd     8(pR),S1                ; Addend word 1
+    fstd    P3,XF3                  ; ML * A5R
+    add     S7,S13,S13              ; M * A3 right doubleword
+    xmpyu   ML,A5L,P3               ; A5 left 32-bit word product
+
+    ldd     0(pR),S7                ; Addend word 0
+    fstd    P5,XF5                  ; MR * A7L
+    add,dc  S5,S15,S15              ; M * A3 left doubleword
+    xmpyu   MR,A7R,P5               ; A7 right 32-bit word product
+
+    ldd     16(pR),S5               ; Addend word 2
+    fstd    P7,XF7                  ; ML * A7R
+    add     S10,S9,S9               ; P1 doubleword
+    xmpyu   ML,A7L,P7               ; A7 left 32-bit word products
+
+
+    ldd     XF0,S0                  ; MR * A4L
+    fstd    P1,XF9                  ; MR * A5R
+    add,dc  S11,S12,S12             ; P2 doubleword
+    xmpyu   MR,A4R,P0               ; A4 right 32-bit word product
+
+    ldd     XF2,S2                  ; ML * A4R
+    fstd    P3,XF11                 ; ML * A5L
+    add,dc  S14,S13,S13             ; P3 doubleword
+    xmpyu   ML,A4L,P2               ; A4 left 32-bit word product
+
+    ldd     XF6,S6                  ; ML * A6R
+    fstd    P5,XF13                 ; MR * A7R
+    add,dc  zero,S15,T2             ; P4 partial doubleword
+    xmpyu   MR,A6R,P4               ; A6 right 32-bit word product
+
+    ldd     XF4,S4                  ; MR * A6L
+    fstd    P7,XF15                 ; ML * A7L
+    add     S7,S8,S8                ; R0 + P0, new R0 doubleword
+    xmpyu   ML,A6L,P6               ; A6 left 32-bit word product
+
+
+    fstd    P0,XF0                  ; MR * A4R
+    ldd     XF7,S7                  ; ML * A7R
+    add,dc  S1,S9,S9                ; c + R1 + P1, new R1 doubleword
+
+    fstd    P2,XF2                  ; ML * A4L
+    ldd     XF1,S1                  ; MR * A5L
+    add,dc  S5,S12,S12              ; c + R2 + P2, new R2 doubleword
+
+    fstd    P4,XF4                  ; MR * A6R
+    ldd     XF5,S5                  ; MR * A7L
+    add,dc  S3,S13,S13              ; c + R3 + P3, new R3 doubleword
+
+    fstd    P6,XF6                  ; ML * A6L
+    ldd     XF3,S3                  ; ML * A5R
+    add,dc  zero,T2,T2              ; c + partial P4
+    add     S0,S2,T1                ; A4 cross product sum
+
+
+    std     S8,0(pR)                ; save R0
+    add,dc  zero,zero,S0            ; A4 cross product sum carry
+    depd,z  T1,31,32,S2             ; A4 cross product sum << 32
+
+    std     S9,8(pR)                ; save R1
+    shrpd   S0,T1,32,S0             ; A4 carry | cross product sum >> 32
+    add     S4,S6,T3                ; A6 cross product sum
+
+    std     S12,16(pR)              ; save R2
+    add,dc  zero,zero,S4            ; A6 cross product sum carry
+    depd,z  T3,31,32,S6             ; A6 cross product sum << 32
+
+
+    std     S13,24(pR)              ; save R3
+    shrpd   S4,T3,32,S4             ; A6 carry | cross product sum >> 32
+    add     S1,S3,T1                ; A5 cross product sum
+
+    ldd     XF0,S8                  ; MR * A4R
+    add,dc  zero,zero,S1            ; A5 cross product sum carry
+    depd,z  T1,31,32,S3             ; A5 cross product sum << 32
+
+    ldd     XF2,S10                 ; ML * A4L
+    ldd     XF9,S9                  ; MR * A5R
+    shrpd   S1,T1,32,S1             ; A5 carry | cross product sum >> 32
+    add     S5,S7,T3                ; A7 cross product sum
+
+    ldd     XF4,S12                 ; MR * A6R
+    ldd     XF11,S11                ; ML * A5L
+    add,dc  zero,zero,S5            ; A7 cross product sum carry
+    depd,z  T3,31,32,S7             ; A7 cross product sum << 32
+
+    ldd     XF6,S14                 ; ML * A6L
+    ldd     XF13,S13                ; MR * A7R
+    shrpd   S5,T3,32,S5             ; A7 carry | cross product sum >> 32
+    add     S2,S8,S8                ; M * A4 right doubleword
+
+
+    ldd     XF15,S15                ; ML * A7L
+    add,dc  S0,S10,S10              ; M * A4 left doubleword
+    add     S3,S9,S9                ; M * A5 right doubleword
+
+    add,dc  S1,S11,S11              ; M * A5 left doubleword
+    add     S6,S12,S12              ; M * A6 right doubleword
+
+    ldd     32(pR),S0               ; Addend word 4
+    ldd     40(pR),S1               ; Addend word 5
+    add,dc  S4,S14,S14              ; M * A6 left doubleword
+    add     S7,S13,S13              ; M * A7 right doubleword
+
+    ldd     48(pR),S2               ; Addend word 6
+    ldd     56(pR),S3               ; Addend word 7
+    add,dc  S5,S15,S15              ; M * A7 left doubleword
+    add     S8,T2,S8                ; P4 doubleword
+
+    ldd     64(pR),S4               ; Addend word 8
+    ldd     SV5,s5                  ; restore s5
+    add,dc  S10,S9,S9               ; P5 doubleword
+    add,dc  S11,S12,S12             ; P6 doubleword
+
+
+    ldd     SV6,s6                  ; restore s6
+    ldd     SV7,s7                  ; restore s7
+    add,dc  S14,S13,S13             ; P7 doubleword
+    add,dc  zero,S15,S15            ; P8 doubleword
+
+    add     S0,S8,S8                ; new R4 doubleword
+
+    ldd     SV0,s0                  ; restore s0
+    std     S8,32(pR)               ; save R4
+    add,dc  S1,S9,S9                ; new R5 doubleword
+
+    ldd     SV1,s1                  ; restore s1
+    std     S9,40(pR)               ; save R5
+    add,dc  S2,S12,S12              ; new R6 doubleword
+
+    ldd     SV2,s2                  ; restore s2
+    std     S12,48(pR)              ; save R6
+    add,dc  S3,S13,S13              ; new R7 doubleword
+
+    ldd     SV3,s3                  ; restore s3
+    std     S13,56(pR)              ; save R7
+    add,dc  S4,S15,S15              ; new R8 doubleword
+
+    ldd     SV4,s4                  ; restore s4
+    std     S15,64(pR)              ; save result[8]
+    add,dc  zero,zero,v0            ; return carry from R8
+
+    CMPIB,*= 0,v0,$L0               ; if no overflow, exit
+    LDO     8(pR),pR
+
+$FINAL1                             ; Final carry propagation
+    LDD     64(pR),v0
+    LDO     8(pR),pR
+    ADDI    1,v0,v0
+    CMPIB,*= 0,v0,$FINAL1           ; Keep looping if there is a carry.
+    STD     v0,56(pR)
+$L0
+    bv      zero(rp)                ; -> caller
+    ldo     -ST_SZ(sp),sp           ; pop stack
+
+/*  ======================================================================  */
+/*      end of module                                                       */
+/*  ======================================================================  */
+
+
+        bve (rp)
+        .EXIT
+        nop
+                .PROCEND
+                .SPACE         $TEXT$
+                .SUBSPA        $CODE$
+                .EXPORT        multacc512,ENTRY
+
+        .end
diff --git a/security/nss/lib/freebl/mpi/hppa20.s b/security/nss/lib/freebl/mpi/hppa20.s
new file mode 100644
index 000000000..c72de8a12
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hppa20.s
@@ -0,0 +1,904 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifdef __LP64__
+        .LEVEL   2.0W
+#else
+;       .LEVEL   1.1
+;       .ALLOW   2.0N
+        .LEVEL   2.0
+#endif
+        .SPACE   $TEXT$,SORT=8
+        .SUBSPA  $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
+
+; ***************************************************************
+;
+;                 maxpy_[little/big]
+;
+; ***************************************************************
+
+; There is no default -- you must specify one or the other.
+#define LITTLE_WORDIAN 1
+
+#ifdef LITTLE_WORDIAN
+#define EIGHT 8
+#define SIXTEEN 16
+#define THIRTY_TWO 32
+#define UN_EIGHT -8
+#define UN_SIXTEEN -16
+#define UN_TWENTY_FOUR -24
+#endif
+
+#ifdef BIG_WORDIAN
+#define EIGHT -8
+#define SIXTEEN -16
+#define THIRTY_TWO -32
+#define UN_EIGHT 8
+#define UN_SIXTEEN 16
+#define UN_TWENTY_FOUR 24
+#endif
+
+; This performs a multiple-precision integer version of "daxpy",
+; Using the selected addressing direction.  "Little-wordian" means that
+; the least significant word of a number is stored at the lowest address.
+; "Big-wordian" means that the most significant word is at the lowest
+; address.  Either way, the incoming address of the vector is that
+; of the least significant word.  That means that, for little-wordian
+; addressing, we move the address upward as we propagate carries
+; from the least significant word to the most significant.  For
+; big-wordian we move the address downward.
+
+; We use the following registers:
+;
+;     r2   return PC, of course
+;     r26 = arg1 =  length
+;     r25 = arg2 =  address of scalar
+;     r24 = arg3 =  multiplicand vector
+;     r23 = arg4 =  result vector
+;
+;     fr9 = scalar loaded once only from r25
+
+; The cycle counts shown in the bodies below are simply the result of a
+; scheduling by hand.  The actual PCX-U hardware does it differently.
+; The intention is that the overall speed is the same.
+
+; The pipeline startup and shutdown code is constructed in the usual way,
+; by taking the loop bodies and removing unnecessary instructions.
+; We have left the comments describing cycle numbers in the code.
+; These are intended for reference when comparing with the main loop,
+; and have no particular relationship to actual cycle numbers.
+
+#ifdef LITTLE_WORDIAN
+maxpy_little
+#else
+maxpy_big
+#endif
+        .PROC
+        .CALLINFO FRAME=120,ENTRY_GR=4
+        .ENTRY
+        STW,MA  %r3,128(%sp)
+        STW     %r4,-124(%sp)
+
+        ADDIB,< -1,%r26,$L0         ; If N = 0, exit immediately.
+        FLDD    0(%r25),%fr9        ; fr9 = scalar
+
+; First startup
+
+        FLDD    0(%r24),%fr24       ; Cycle 1
+        XMPYU   %fr9R,%fr24R,%fr27  ; Cycle 3
+        XMPYU   %fr9R,%fr24L,%fr25  ; Cycle 4
+        XMPYU   %fr9L,%fr24L,%fr26  ; Cycle 5
+        CMPIB,> 3,%r26,$N_IS_SMALL  ; Pick out cases N = 1, 2, or 3
+        XMPYU   %fr9L,%fr24R,%fr24  ; Cycle 6
+        FLDD    EIGHT(%r24),%fr28   ; Cycle 8
+        XMPYU   %fr9L,%fr28R,%fr31  ; Cycle 10
+        FSTD    %fr24,-96(%sp)
+        XMPYU   %fr9R,%fr28L,%fr30  ; Cycle 11
+        FSTD    %fr25,-80(%sp)
+        LDO     SIXTEEN(%r24),%r24  ; Cycle 12
+        FSTD    %fr31,-64(%sp)
+        XMPYU   %fr9R,%fr28R,%fr29  ; Cycle 13
+        FSTD    %fr27,-48(%sp)
+
+; Second startup
+
+        XMPYU   %fr9L,%fr28L,%fr28  ; Cycle 1
+        FSTD    %fr30,-56(%sp)
+        FLDD    0(%r24),%fr24
+
+        FSTD    %fr26,-88(%sp)      ; Cycle 2
+
+        XMPYU   %fr9R,%fr24R,%fr27  ; Cycle 3
+        FSTD    %fr28,-104(%sp)
+
+        XMPYU   %fr9R,%fr24L,%fr25  ; Cycle 4
+        LDD     -96(%sp),%r3
+        FSTD    %fr29,-72(%sp)
+
+        XMPYU   %fr9L,%fr24L,%fr26  ; Cycle 5
+        LDD     -64(%sp),%r19
+        LDD     -80(%sp),%r21
+
+        XMPYU   %fr9L,%fr24R,%fr24  ; Cycle 6
+        LDD     -56(%sp),%r20
+        ADD     %r21,%r3,%r3
+
+        ADD,DC  %r20,%r19,%r19      ; Cycle 7
+        LDD     -88(%sp),%r4
+        SHRPD   %r3,%r0,32,%r21
+        LDD     -48(%sp),%r1
+
+        FLDD    EIGHT(%r24),%fr28   ; Cycle 8
+        LDD     -104(%sp),%r31
+        ADD,DC  %r0,%r0,%r20
+        SHRPD   %r19,%r3,32,%r3
+
+        LDD     -72(%sp),%r29       ; Cycle 9
+        SHRPD   %r20,%r19,32,%r20
+        ADD     %r21,%r1,%r1
+
+        XMPYU   %fr9L,%fr28R,%fr31  ; Cycle 10
+        ADD,DC  %r3,%r4,%r4
+        FSTD    %fr24,-96(%sp)
+
+        XMPYU   %fr9R,%fr28L,%fr30  ; Cycle 11
+        ADD,DC  %r0,%r20,%r20
+        LDD     0(%r23),%r3
+        FSTD    %fr25,-80(%sp)
+
+        LDO     SIXTEEN(%r24),%r24  ; Cycle 12
+        FSTD    %fr31,-64(%sp)
+
+        XMPYU   %fr9R,%fr28R,%fr29  ; Cycle 13
+        ADD     %r0,%r0,%r0         ; clear the carry bit
+        ADDIB,<= -4,%r26,$ENDLOOP   ; actually happens in cycle 12
+        FSTD    %fr27,-48(%sp)
+;        MFCTL   %cr16,%r21         ; for timing
+;        STD     %r21,-112(%sp)
+
+; Here is the loop.
+
+$LOOP   XMPYU   %fr9L,%fr28L,%fr28  ; Cycle 1
+        ADD,DC  %r29,%r4,%r4
+        FSTD    %fr30,-56(%sp)
+        FLDD    0(%r24),%fr24
+
+        LDO     SIXTEEN(%r23),%r23  ; Cycle 2
+        ADD,DC  %r0,%r20,%r20
+        FSTD    %fr26,-88(%sp)
+
+        XMPYU   %fr9R,%fr24R,%fr27  ; Cycle 3
+        ADD     %r3,%r1,%r1
+        FSTD    %fr28,-104(%sp)
+        LDD     UN_EIGHT(%r23),%r21
+
+        XMPYU   %fr9R,%fr24L,%fr25  ; Cycle 4
+        ADD,DC  %r21,%r4,%r28
+        FSTD    %fr29,-72(%sp)    
+        LDD     -96(%sp),%r3
+
+        XMPYU   %fr9L,%fr24L,%fr26  ; Cycle 5
+        ADD,DC  %r20,%r31,%r22
+        LDD     -64(%sp),%r19
+        LDD     -80(%sp),%r21
+
+        XMPYU   %fr9L,%fr24R,%fr24  ; Cycle 6
+        ADD     %r21,%r3,%r3
+        LDD     -56(%sp),%r20
+        STD     %r1,UN_SIXTEEN(%r23)
+
+        ADD,DC  %r20,%r19,%r19      ; Cycle 7
+        SHRPD   %r3,%r0,32,%r21
+        LDD     -88(%sp),%r4
+        LDD     -48(%sp),%r1
+
+        ADD,DC  %r0,%r0,%r20        ; Cycle 8
+        SHRPD   %r19,%r3,32,%r3
+        FLDD    EIGHT(%r24),%fr28
+        LDD     -104(%sp),%r31
+
+        SHRPD   %r20,%r19,32,%r20   ; Cycle 9
+        ADD     %r21,%r1,%r1
+        STD     %r28,UN_EIGHT(%r23)
+        LDD     -72(%sp),%r29
+
+        XMPYU   %fr9L,%fr28R,%fr31  ; Cycle 10
+        ADD,DC  %r3,%r4,%r4
+        FSTD    %fr24,-96(%sp)
+
+        XMPYU   %fr9R,%fr28L,%fr30  ; Cycle 11
+        ADD,DC  %r0,%r20,%r20
+        FSTD    %fr25,-80(%sp)
+        LDD     0(%r23),%r3
+
+        LDO     SIXTEEN(%r24),%r24  ; Cycle 12
+        FSTD    %fr31,-64(%sp)
+
+        XMPYU   %fr9R,%fr28R,%fr29  ; Cycle 13
+        ADD     %r22,%r1,%r1
+        ADDIB,> -2,%r26,$LOOP       ; actually happens in cycle 12
+        FSTD    %fr27,-48(%sp)
+
+$ENDLOOP
+
+; Shutdown code, first stage.
+
+;        MFCTL   %cr16,%r21         ; for timing
+;        STD     %r21,UN_SIXTEEN(%r23)
+;        LDD     -112(%sp),%r21
+;        STD     %r21,UN_EIGHT(%r23)
+
+        XMPYU   %fr9L,%fr28L,%fr28  ; Cycle 1
+        ADD,DC  %r29,%r4,%r4
+        CMPIB,= 0,%r26,$ONEMORE
+        FSTD    %fr30,-56(%sp)
+
+        LDO     SIXTEEN(%r23),%r23  ; Cycle 2
+        ADD,DC  %r0,%r20,%r20
+        FSTD    %fr26,-88(%sp)
+
+        ADD     %r3,%r1,%r1         ; Cycle 3
+        FSTD    %fr28,-104(%sp)
+        LDD     UN_EIGHT(%r23),%r21
+
+        ADD,DC  %r21,%r4,%r28       ; Cycle 4
+        FSTD    %fr29,-72(%sp)    
+        STD     %r28,UN_EIGHT(%r23) ; moved up from cycle 9
+        LDD     -96(%sp),%r3
+
+        ADD,DC  %r20,%r31,%r22      ; Cycle 5
+        STD     %r1,UN_SIXTEEN(%r23)
+$JOIN4
+        LDD     -64(%sp),%r19
+        LDD     -80(%sp),%r21
+
+        ADD     %r21,%r3,%r3        ; Cycle 6
+        LDD     -56(%sp),%r20
+
+        ADD,DC  %r20,%r19,%r19      ; Cycle 7
+        SHRPD   %r3,%r0,32,%r21
+        LDD     -88(%sp),%r4
+        LDD     -48(%sp),%r1
+
+        ADD,DC  %r0,%r0,%r20        ; Cycle 8
+        SHRPD   %r19,%r3,32,%r3
+        LDD     -104(%sp),%r31
+
+        SHRPD   %r20,%r19,32,%r20   ; Cycle 9
+        ADD     %r21,%r1,%r1
+        LDD     -72(%sp),%r29
+
+        ADD,DC  %r3,%r4,%r4         ; Cycle 10
+
+        ADD,DC  %r0,%r20,%r20       ; Cycle 11
+        LDD     0(%r23),%r3
+
+        ADD     %r22,%r1,%r1        ; Cycle 13
+
+; Shutdown code, second stage.
+
+        ADD,DC  %r29,%r4,%r4        ; Cycle 1
+
+        LDO     SIXTEEN(%r23),%r23  ; Cycle 2
+        ADD,DC  %r0,%r20,%r20
+
+        LDD     UN_EIGHT(%r23),%r21 ; Cycle 3
+        ADD     %r3,%r1,%r1
+
+        ADD,DC  %r21,%r4,%r28       ; Cycle 4
+
+        ADD,DC  %r20,%r31,%r22      ; Cycle 5
+
+        STD     %r1,UN_SIXTEEN(%r23); Cycle 6
+
+        STD     %r28,UN_EIGHT(%r23) ; Cycle 9
+
+        LDD     0(%r23),%r3         ; Cycle 11
+
+; Shutdown code, third stage.
+
+        LDO     SIXTEEN(%r23),%r23
+        ADD     %r3,%r22,%r1
+$JOIN1  ADD,DC  %r0,%r0,%r21
+        CMPIB,*= 0,%r21,$L0         ; if no overflow, exit
+        STD     %r1,UN_SIXTEEN(%r23)
+
+; Final carry propagation
+
+$FINAL1 LDO     EIGHT(%r23),%r23
+        LDD     UN_SIXTEEN(%r23),%r21
+        ADDI    1,%r21,%r21
+        CMPIB,*= 0,%r21,$FINAL1     ; Keep looping if there is a carry.
+        STD     %r21,UN_SIXTEEN(%r23)
+        B       $L0
+        NOP
+
+; Here is the code that handles the difficult cases N=1, N=2, and N=3.
+; We do the usual trick -- branch out of the startup code at appropriate
+; points, and branch into the shutdown code.
+
+$N_IS_SMALL
+        CMPIB,= 0,%r26,$N_IS_ONE
+        FSTD    %fr24,-96(%sp)      ; Cycle 10
+        FLDD    EIGHT(%r24),%fr28   ; Cycle 8
+        XMPYU   %fr9L,%fr28R,%fr31  ; Cycle 10
+        XMPYU   %fr9R,%fr28L,%fr30  ; Cycle 11
+        FSTD    %fr25,-80(%sp)
+        FSTD    %fr31,-64(%sp)      ; Cycle 12
+        XMPYU   %fr9R,%fr28R,%fr29  ; Cycle 13
+        FSTD    %fr27,-48(%sp)
+        XMPYU   %fr9L,%fr28L,%fr28  ; Cycle 1
+        CMPIB,= 2,%r26,$N_IS_THREE
+        FSTD    %fr30,-56(%sp)
+
+; N = 2
+        FSTD    %fr26,-88(%sp)      ; Cycle 2
+        FSTD    %fr28,-104(%sp)     ; Cycle 3
+        LDD     -96(%sp),%r3        ; Cycle 4
+        FSTD    %fr29,-72(%sp)
+        B       $JOIN4
+        ADD     %r0,%r0,%r22
+
+$N_IS_THREE
+        FLDD    SIXTEEN(%r24),%fr24
+        FSTD    %fr26,-88(%sp)      ; Cycle 2
+        XMPYU   %fr9R,%fr24R,%fr27  ; Cycle 3
+        FSTD    %fr28,-104(%sp)
+        XMPYU   %fr9R,%fr24L,%fr25  ; Cycle 4
+        LDD     -96(%sp),%r3
+        FSTD    %fr29,-72(%sp)
+        XMPYU   %fr9L,%fr24L,%fr26  ; Cycle 5
+        LDD     -64(%sp),%r19
+        LDD     -80(%sp),%r21
+        B       $JOIN3
+        ADD     %r0,%r0,%r22
+
+$N_IS_ONE
+        FSTD    %fr25,-80(%sp)
+        FSTD    %fr27,-48(%sp)
+        FSTD    %fr26,-88(%sp)      ; Cycle 2
+        B       $JOIN5
+        ADD     %r0,%r0,%r22
+
+; We came out of the unrolled loop with wrong parity.  Do one more
+; single cycle.  This is quite tricky, because of the way the
+; carry chains and SHRPD chains have been chopped up.
+
+$ONEMORE
+
+        FLDD    0(%r24),%fr24
+
+        LDO     SIXTEEN(%r23),%r23  ; Cycle 2
+        ADD,DC  %r0,%r20,%r20
+        FSTD    %fr26,-88(%sp)
+
+        XMPYU   %fr9R,%fr24R,%fr27  ; Cycle 3
+        FSTD    %fr28,-104(%sp)
+        LDD     UN_EIGHT(%r23),%r21
+        ADD     %r3,%r1,%r1
+
+        XMPYU   %fr9R,%fr24L,%fr25  ; Cycle 4
+        ADD,DC  %r21,%r4,%r28
+        STD     %r28,UN_EIGHT(%r23) ; moved from cycle 9
+        LDD     -96(%sp),%r3
+        FSTD    %fr29,-72(%sp)    
+
+        XMPYU   %fr9L,%fr24L,%fr26  ; Cycle 5
+        ADD,DC  %r20,%r31,%r22
+        LDD     -64(%sp),%r19
+        LDD     -80(%sp),%r21
+
+        STD     %r1,UN_SIXTEEN(%r23); Cycle 6
+$JOIN3
+        XMPYU   %fr9L,%fr24R,%fr24
+        LDD     -56(%sp),%r20
+        ADD     %r21,%r3,%r3
+
+        ADD,DC  %r20,%r19,%r19      ; Cycle 7
+        LDD     -88(%sp),%r4
+        SHRPD   %r3,%r0,32,%r21
+        LDD     -48(%sp),%r1
+
+        LDD     -104(%sp),%r31      ; Cycle 8
+        ADD,DC  %r0,%r0,%r20
+        SHRPD   %r19,%r3,32,%r3
+
+        LDD     -72(%sp),%r29       ; Cycle 9
+        SHRPD   %r20,%r19,32,%r20
+        ADD     %r21,%r1,%r1
+
+        ADD,DC  %r3,%r4,%r4         ; Cycle 10
+        FSTD    %fr24,-96(%sp)
+
+        ADD,DC  %r0,%r20,%r20       ; Cycle 11
+        LDD     0(%r23),%r3
+        FSTD    %fr25,-80(%sp)
+
+        ADD     %r22,%r1,%r1        ; Cycle 13
+        FSTD    %fr27,-48(%sp)
+
+; Shutdown code, stage 1-1/2.
+
+        ADD,DC  %r29,%r4,%r4        ; Cycle 1
+
+        LDO     SIXTEEN(%r23),%r23  ; Cycle 2
+        ADD,DC  %r0,%r20,%r20     
+        FSTD    %fr26,-88(%sp)
+
+        LDD     UN_EIGHT(%r23),%r21 ; Cycle 3
+        ADD     %r3,%r1,%r1
+
+        ADD,DC  %r21,%r4,%r28       ; Cycle 4
+        STD     %r28,UN_EIGHT(%r23) ; moved from cycle 9
+
+        ADD,DC  %r20,%r31,%r22      ; Cycle 5
+        STD     %r1,UN_SIXTEEN(%r23)
+$JOIN5
+        LDD     -96(%sp),%r3        ; moved from cycle 4
+        LDD     -80(%sp),%r21
+        ADD     %r21,%r3,%r3        ; Cycle 6
+        ADD,DC  %r0,%r0,%r19        ; Cycle 7
+        LDD     -88(%sp),%r4
+        SHRPD   %r3,%r0,32,%r21
+        LDD     -48(%sp),%r1
+        SHRPD   %r19,%r3,32,%r3     ; Cycle 8
+        ADD     %r21,%r1,%r1        ; Cycle 9
+        ADD,DC  %r3,%r4,%r4         ; Cycle 10
+        LDD     0(%r23),%r3         ; Cycle 11
+        ADD     %r22,%r1,%r1        ; Cycle 13
+
+; Shutdown code, stage 2-1/2.
+
+        ADD,DC  %r0,%r4,%r4         ; Cycle 1
+        LDO     SIXTEEN(%r23),%r23  ; Cycle 2
+        LDD     UN_EIGHT(%r23),%r21 ; Cycle 3
+        ADD     %r3,%r1,%r1
+        STD     %r1,UN_SIXTEEN(%r23)
+        ADD,DC  %r21,%r4,%r1
+        B       $JOIN1
+        LDO     EIGHT(%r23),%r23
+
+; exit
+
+$L0
+        LDW     -124(%sp),%r4
+        BVE     (%r2)
+        .EXIT
+        LDW,MB  -128(%sp),%r3
+
+        .PROCEND
+
+; ***************************************************************
+;
+;                 add_diag_[little/big]
+;
+; ***************************************************************
+
+; The arguments are as follows:
+;     r2   return PC, of course
+;     r26 = arg1 =  length
+;     r25 = arg2 =  vector to square
+;     r24 = arg3 =  result vector
+
+#ifdef LITTLE_WORDIAN
+add_diag_little
+#else
+add_diag_big
+#endif
+        .PROC
+        .CALLINFO FRAME=120,ENTRY_GR=4
+        .ENTRY
+        STW,MA  %r3,128(%sp)
+        STW     %r4,-124(%sp)
+
+        ADDIB,< -1,%r26,$Z0         ; If N=0, exit immediately.
+        NOP
+
+; Startup code
+
+        FLDD    0(%r25),%fr7        ; Cycle 2 (alternate body)
+        XMPYU   %fr7R,%fr7R,%fr29   ; Cycle 4
+        XMPYU   %fr7L,%fr7R,%fr27   ; Cycle 5
+        XMPYU   %fr7L,%fr7L,%fr30
+        LDO     SIXTEEN(%r25),%r25  ; Cycle 6
+        FSTD    %fr29,-88(%sp)
+        FSTD    %fr27,-72(%sp)      ; Cycle 7
+        CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body)
+        FSTD    %fr30,-96(%sp)
+        FLDD    UN_EIGHT(%r25),%fr7 ; Cycle 2
+        LDD     -88(%sp),%r22       ; Cycle 3
+        LDD     -72(%sp),%r31       ; Cycle 4
+        XMPYU   %fr7R,%fr7R,%fr28
+        XMPYU   %fr7L,%fr7R,%fr24   ; Cycle 5
+        XMPYU   %fr7L,%fr7L,%fr31
+        LDD     -96(%sp),%r20       ; Cycle 6
+        FSTD    %fr28,-80(%sp)
+        ADD     %r0,%r0,%r0         ; clear the carry bit
+        ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7
+        FSTD    %fr24,-64(%sp)
+
+; Here is the loop.  It is unrolled twice, modelled after the "alternate body" and then the "main body".
+
+$DIAGLOOP
+        SHRPD   %r31,%r0,31,%r3     ; Cycle 1 (alternate body)
+        LDO     SIXTEEN(%r25),%r25
+        LDD     0(%r24),%r1
+        FSTD    %fr31,-104(%sp)
+        SHRPD   %r0,%r31,31,%r4     ; Cycle 2
+        ADD,DC  %r22,%r3,%r3
+        FLDD    UN_SIXTEEN(%r25),%fr7   
+        ADD,DC  %r0,%r20,%r20       ; Cycle 3
+        ADD     %r1,%r3,%r3
+        XMPYU   %fr7R,%fr7R,%fr29   ; Cycle 4
+        LDD     -80(%sp),%r21
+        STD     %r3,0(%r24)
+        XMPYU   %fr7L,%fr7R,%fr27   ; Cycle 5
+        XMPYU   %fr7L,%fr7L,%fr30
+        LDD     -64(%sp),%r29       
+        LDD     EIGHT(%r24),%r1  
+        ADD,DC  %r4,%r20,%r20       ; Cycle 6
+        LDD     -104(%sp),%r19
+        FSTD    %fr29,-88(%sp)
+        ADD     %r20,%r1,%r1        ; Cycle 7
+        FSTD    %fr27,-72(%sp)
+        SHRPD   %r29,%r0,31,%r4     ; Cycle 1 (main body)
+        LDO     THIRTY_TWO(%r24),%r24
+        LDD     UN_SIXTEEN(%r24),%r28
+        FSTD    %fr30,-96(%sp)
+        SHRPD   %r0,%r29,31,%r3     ; Cycle 2
+        ADD,DC  %r21,%r4,%r4
+        FLDD    UN_EIGHT(%r25),%fr7
+        STD     %r1,UN_TWENTY_FOUR(%r24)
+        ADD,DC  %r0,%r19,%r19       ; Cycle 3
+        ADD     %r28,%r4,%r4
+        XMPYU   %fr7R,%fr7R,%fr28   ; Cycle 4
+        LDD     -88(%sp),%r22
+        STD     %r4,UN_SIXTEEN(%r24)
+        XMPYU   %fr7L,%fr7R,%fr24   ; Cycle 5
+        XMPYU   %fr7L,%fr7L,%fr31
+        LDD     -72(%sp),%r31
+        LDD     UN_EIGHT(%r24),%r28
+        ADD,DC  %r3,%r19,%r19       ; Cycle 6
+        LDD     -96(%sp),%r20
+        FSTD    %fr28,-80(%sp)
+        ADD     %r19,%r28,%r28      ; Cycle 7
+        FSTD    %fr24,-64(%sp)
+        ADDIB,> -2,%r26,$DIAGLOOP   ; Cycle 8
+        STD     %r28,UN_EIGHT(%r24)
+
+$ENDDIAGLOOP
+
+        ADD,DC  %r0,%r22,%r22    
+        CMPIB,= 0,%r26,$ONEMOREDIAG
+        SHRPD   %r31,%r0,31,%r3
+
+; Shutdown code, first stage.
+
+        FSTD    %fr31,-104(%sp)     ; Cycle 1 (alternate body)
+        LDD     0(%r24),%r28
+        SHRPD   %r0,%r31,31,%r4     ; Cycle 2
+        ADD     %r3,%r22,%r3
+        ADD,DC  %r0,%r20,%r20       ; Cycle 3
+        LDD     -80(%sp),%r21
+        ADD     %r3,%r28,%r3
+        LDD     -64(%sp),%r29       ; Cycle 4
+        STD     %r3,0(%r24)
+        LDD     EIGHT(%r24),%r1     ; Cycle 5
+        LDO     SIXTEEN(%r25),%r25  ; Cycle 6
+        LDD     -104(%sp),%r19
+        ADD,DC  %r4,%r20,%r20
+        ADD     %r20,%r1,%r1        ; Cycle 7
+        ADD,DC  %r0,%r21,%r21       ; Cycle 8
+        STD     %r1,EIGHT(%r24)
+
+; Shutdown code, second stage.
+
+        SHRPD   %r29,%r0,31,%r4     ; Cycle 1 (main body)
+        LDO     THIRTY_TWO(%r24),%r24
+        LDD     UN_SIXTEEN(%r24),%r1
+        SHRPD   %r0,%r29,31,%r3      ; Cycle 2
+        ADD     %r4,%r21,%r4
+        ADD,DC  %r0,%r19,%r19       ; Cycle 3
+        ADD     %r4,%r1,%r4
+        STD     %r4,UN_SIXTEEN(%r24); Cycle 4
+        LDD     UN_EIGHT(%r24),%r28 ; Cycle 5
+        ADD,DC  %r3,%r19,%r19       ; Cycle 6       
+        ADD     %r19,%r28,%r28      ; Cycle 7
+        ADD,DC  %r0,%r0,%r22        ; Cycle 8
+        CMPIB,*= 0,%r22,$Z0         ; if no overflow, exit
+        STD     %r28,UN_EIGHT(%r24)
+
+; Final carry propagation
+
+$FDIAG2
+        LDO     EIGHT(%r24),%r24
+        LDD     UN_EIGHT(%r24),%r26
+        ADDI    1,%r26,%r26
+        CMPIB,*= 0,%r26,$FDIAG2     ; Keep looping if there is a carry.
+        STD     %r26,UN_EIGHT(%r24)
+
+        B   $Z0
+        NOP
+
+; Here is the code that handles the difficult case N=1.
+; We do the usual trick -- branch out of the startup code at appropriate
+; points, and branch into the shutdown code.
+
+$DIAG_N_IS_ONE
+
+        LDD     -88(%sp),%r22
+        LDD     -72(%sp),%r31
+        B       $JOINDIAG
+        LDD     -96(%sp),%r20
+
+; We came out of the unrolled loop with wrong parity.  Do one more
+; single cycle.  This is the "alternate body".  It will, of course,
+; give us opposite registers from the other case, so we need
+; completely different shutdown code.
+
+$ONEMOREDIAG
+        FSTD    %fr31,-104(%sp)     ; Cycle 1 (alternate body)
+        LDD     0(%r24),%r28
+        FLDD    0(%r25),%fr7        ; Cycle 2
+        SHRPD   %r0,%r31,31,%r4
+        ADD     %r3,%r22,%r3
+        ADD,DC  %r0,%r20,%r20       ; Cycle 3
+        LDD     -80(%sp),%r21
+        ADD     %r3,%r28,%r3
+        LDD     -64(%sp),%r29       ; Cycle 4
+        STD     %r3,0(%r24)
+        XMPYU   %fr7R,%fr7R,%fr29
+        LDD     EIGHT(%r24),%r1     ; Cycle 5
+        XMPYU   %fr7L,%fr7R,%fr27
+        XMPYU   %fr7L,%fr7L,%fr30
+        LDD     -104(%sp),%r19      ; Cycle 6
+        FSTD    %fr29,-88(%sp)
+        ADD,DC  %r4,%r20,%r20
+        FSTD    %fr27,-72(%sp)      ; Cycle 7
+        ADD     %r20,%r1,%r1
+        ADD,DC  %r0,%r21,%r21       ; Cycle 8
+        STD     %r1,EIGHT(%r24)
+
+; Shutdown code, first stage.
+
+        SHRPD   %r29,%r0,31,%r4     ; Cycle 1 (main body)
+        LDO     THIRTY_TWO(%r24),%r24
+        FSTD    %fr30,-96(%sp)
+        LDD     UN_SIXTEEN(%r24),%r1
+        SHRPD   %r0,%r29,31,%r3     ; Cycle 2
+        ADD     %r4,%r21,%r4
+        ADD,DC  %r0,%r19,%r19       ; Cycle 3
+        LDD     -88(%sp),%r22
+        ADD     %r4,%r1,%r4
+        LDD     -72(%sp),%r31       ; Cycle 4
+        STD     %r4,UN_SIXTEEN(%r24)
+        LDD     UN_EIGHT(%r24),%r28 ; Cycle 5
+        LDD     -96(%sp),%r20       ; Cycle 6
+        ADD,DC  %r3,%r19,%r19
+        ADD     %r19,%r28,%r28      ; Cycle 7
+        ADD,DC  %r0,%r22,%r22       ; Cycle 8
+        STD     %r28,UN_EIGHT(%r24)
+
+; Shutdown code, second stage.
+
+$JOINDIAG
+        SHRPD   %r31,%r0,31,%r3     ; Cycle 1 (alternate body)
+        LDD     0(%r24),%r28        
+        SHRPD   %r0,%r31,31,%r4     ; Cycle 2
+        ADD     %r3,%r22,%r3
+        ADD,DC  %r0,%r20,%r20       ; Cycle 3
+        ADD     %r3,%r28,%r3
+        STD     %r3,0(%r24)         ; Cycle 4
+        LDD     EIGHT(%r24),%r1     ; Cycle 5
+        ADD,DC  %r4,%r20,%r20
+        ADD     %r20,%r1,%r1        ; Cycle 7
+        ADD,DC  %r0,%r0,%r21        ; Cycle 8
+        CMPIB,*= 0,%r21,$Z0         ; if no overflow, exit
+        STD     %r1,EIGHT(%r24)
+
+; Final carry propagation
+
+$FDIAG1
+        LDO     EIGHT(%r24),%r24
+        LDD     EIGHT(%r24),%r26
+        ADDI    1,%r26,%r26
+        CMPIB,*= 0,%r26,$FDIAG1    ; Keep looping if there is a carry.
+        STD     %r26,EIGHT(%r24)
+
+$Z0
+        LDW     -124(%sp),%r4
+        BVE     (%r2)
+        .EXIT
+        LDW,MB  -128(%sp),%r3
+        .PROCEND
+;	.ALLOW
+
+        .SPACE         $TEXT$
+        .SUBSPA        $CODE$
+#ifdef LITTLE_WORDIAN
+#ifdef __GNUC__
+; GNU-as (as of 2.19) does not support LONG_RETURN
+        .EXPORT        maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
+        .EXPORT        add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
+#else
+        .EXPORT        maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
+        .EXPORT        add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
+#endif
+#else
+        .EXPORT        maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
+        .EXPORT        add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
+#endif
+        .END
+
+
+; How to use "maxpy_PA20_little" and "maxpy_PA20_big"
+; 
+; The routine "maxpy_PA20_little" or "maxpy_PA20_big"
+; performs a 64-bit x any-size multiply, and adds the
+; result to an area of memory.  That is, it performs
+; something like
+; 
+;      A B C D
+;    *       Z
+;   __________
+;    P Q R S T
+; 
+; and then adds the "PQRST" vector into an area of memory,
+; handling all carries.
+; 
+; Digression on nomenclature and endian-ness:
+; 
+; Each of the capital letters in the above represents a 64-bit
+; quantity.  That is, you could think of the discussion as
+; being in terms of radix-16-quintillion arithmetic.  The data
+; type being manipulated is "unsigned long long int".  This
+; requires the 64-bit extension of the HP-UX C compiler,
+; available at release 10.  You need these compiler flags to
+; enable these extensions:
+; 
+;       -Aa +e +DA2.0 +DS2.0
+; 
+; (The first specifies ANSI C, the second enables the
+; extensions, which are beyond ANSI C, and the third and
+; fourth tell the compiler to use whatever features of the
+; PA2.0 architecture it wishes, in order to made the code more
+; efficient.  Since the presence of the assembly code will
+; make the program unable to run on anything less than PA2.0,
+; you might as well gain the performance enhancements in the C
+; code as well.)
+; 
+; Questions of "endian-ness" often come up, usually in the
+; context of byte ordering in a word.  These routines have a
+; similar issue, that could be called "wordian-ness".
+; Independent of byte ordering (PA is always big-endian), one
+; can make two choices when representing extremely large
+; numbers as arrays of 64-bit doublewords in memory.
+; 
+; "Little-wordian" layout means that the least significant
+; word of a number is stored at the lowest address.
+; 
+;   MSW     LSW
+;    |       |
+;    V       V
+; 
+;    A B C D E
+; 
+;    ^     ^ ^
+;    |     | |____ address 0
+;    |     |
+;    |     |_______address 8
+;    |
+;    address 32
+; 
+; "Big-wordian" means that the most significant word is at the
+; lowest address.
+; 
+;   MSW     LSW
+;    |       |
+;    V       V
+; 
+;    A B C D E
+; 
+;    ^     ^ ^
+;    |     | |____ address 32
+;    |     |
+;    |     |_______address 24
+;    |
+;    address 0
+; 
+; When you compile the file, you must specify one or the other, with
+; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN".
+; 
+;     Incidentally, you assemble this file as part of your
+;     project with the same C compiler as the rest of the program.
+;     My "makefile" for a superprecision arithmetic package has
+;     the following stuff:
+; 
+;     # definitions:
+;     CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1
+;     CFLAGS = +O3
+;     LDFLAGS = -L /usr/lib -Wl,-aarchive
+; 
+;     # general build rule for ".s" files:
+;     .s.o:
+;             $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN
+; 
+;     # Now any bind step that calls for pa20.o will assemble pa20.s
+; 
+; End of digression, back to arithmetic:
+; 
+; The way we multiply two huge numbers is, of course, to multiply
+; the "ABCD" vector by each of the "WXYZ" doublewords, adding
+; the result vectors with increasing offsets, the way we learned
+; in school, back before we all used calculators:
+; 
+;            A B C D
+;          * W X Y Z
+;         __________
+;          P Q R S T
+;        E F G H I
+;      M N O P Q
+;  + R S T U V
+;    _______________
+;    F I N A L S U M
+; 
+; So we call maxpy_PA20_big (in my case; my package is
+; big-wordian) repeatedly, giving the W, X, Y, and Z arguments
+; in turn as the "scalar", and giving the "ABCD" vector each
+; time.  We direct it to add its result into an area of memory
+; that we have cleared at the start.  We skew the exact
+; location into that area with each call.
+; 
+; The prototype for the function is
+; 
+; extern void maxpy_PA20_big(
+;    int length,        /* Number of doublewords in the multiplicand vector. */
+;    const long long int *scalaraddr,    /* Address to fetch the scalar. */
+;    const long long int *multiplicand,  /* The multiplicand vector. */
+;    long long int *result);             /* Where to accumulate the result. */
+; 
+; (You should place a copy of this prototype in an include file
+; or in your C file.)
+; 
+; Now, IN ALL CASES, the given address for the multiplicand or
+; the result is that of the LEAST SIGNIFICANT DOUBLEWORD.
+; That word is, of course, the word at which the routine
+; starts processing.  "maxpy_PA20_little" then increases the
+; addresses as it computes.  "maxpy_PA20_big" decreases them.
+; 
+; In our example above, "length" would be 4 in each case.
+; "multiplicand" would be the "ABCD" vector.  Specifically,
+; the address of the element "D".  "scalaraddr" would be the
+; address of "W", "X", "Y", or "Z" on the four calls that we
+; would make.  (The order doesn't matter, of course.)
+; "result" would be the appropriate address in the result
+; area.  When multiplying by "Z", that would be the least
+; significant word.  When multiplying by "Y", it would be the
+; next higher word (8 bytes higher if little-wordian; 8 bytes
+; lower if big-wordian), and so on.  The size of the result
+; area must be the the sum of the sizes of the multiplicand
+; and multiplier vectors, and must be initialized to zero
+; before we start.
+; 
+; Whenever the routine adds its partial product into the result
+; vector, it follows carry chains as far as they need to go.
+; 
+; Here is the super-precision multiply routine that I use for
+; my package.  The package is big-wordian.  I have taken out
+; handling of exponents (it's a floating point package):
+; 
+; static void mul_PA20(
+;   int size,
+;   const long long int *arg1,
+;   const long long int *arg2,
+;   long long int *result)
+; {
+;    int i;
+; 
+;    for (i=0 ; i<2*size ; i++) result[i] = 0ULL;
+; 
+;    for (i=0 ; i<size ; i++) {
+;       maxpy_PA20_big(size, &arg2[i], &arg1[size-1], &result[size+i]);
+;    }
+; }
diff --git a/security/nss/lib/freebl/mpi/hppatch.adb b/security/nss/lib/freebl/mpi/hppatch.adb
new file mode 100644
index 000000000..6875032ef
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hppatch.adb
@@ -0,0 +1,21 @@
+#/bin/sh
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# script to change the system id in an object file from PA-RISC 2.0 to 1.1
+
+adb -w $1 << EOF
+?m 0 -1 0
+0x0?X
+0x0?W (@0x0&~0x40000)|(~@0x0&0x40000)
+
+0?"change checksum"
+0x7c?X
+0x7c?W (@0x7c&~0x40000)|(~@0x7c&0x40000)
+$q
+EOF
+
+exit 0
+
diff --git a/security/nss/lib/freebl/mpi/logtab.h b/security/nss/lib/freebl/mpi/logtab.h
new file mode 100644
index 000000000..24cb13c5b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/logtab.h
@@ -0,0 +1,28 @@
+/*
+ *  logtab.h
+ *
+ *  Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const float s_logv_2[] = {
+    0.000000000f, 0.000000000f, 1.000000000f, 0.630929754f, /*  0  1  2  3 */
+    0.500000000f, 0.430676558f, 0.386852807f, 0.356207187f, /*  4  5  6  7 */
+    0.333333333f, 0.315464877f, 0.301029996f, 0.289064826f, /*  8  9 10 11 */
+    0.278942946f, 0.270238154f, 0.262649535f, 0.255958025f, /* 12 13 14 15 */
+    0.250000000f, 0.244650542f, 0.239812467f, 0.235408913f, /* 16 17 18 19 */
+    0.231378213f, 0.227670249f, 0.224243824f, 0.221064729f, /* 20 21 22 23 */
+    0.218104292f, 0.215338279f, 0.212746054f, 0.210309918f, /* 24 25 26 27 */
+    0.208014598f, 0.205846832f, 0.203795047f, 0.201849087f, /* 28 29 30 31 */
+    0.200000000f, 0.198239863f, 0.196561632f, 0.194959022f, /* 32 33 34 35 */
+    0.193426404f, 0.191958720f, 0.190551412f, 0.189200360f, /* 36 37 38 39 */
+    0.187901825f, 0.186652411f, 0.185449023f, 0.184288833f, /* 40 41 42 43 */
+    0.183169251f, 0.182087900f, 0.181042597f, 0.180031327f, /* 44 45 46 47 */
+    0.179052232f, 0.178103594f, 0.177183820f, 0.176291434f, /* 48 49 50 51 */
+    0.175425064f, 0.174583430f, 0.173765343f, 0.172969690f, /* 52 53 54 55 */
+    0.172195434f, 0.171441601f, 0.170707280f, 0.169991616f, /* 56 57 58 59 */
+    0.169293808f, 0.168613099f, 0.167948779f, 0.167300179f, /* 60 61 62 63 */
+    0.166666667f
+};
diff --git a/security/nss/lib/freebl/mpi/make-logtab b/security/nss/lib/freebl/mpi/make-logtab
new file mode 100755
index 000000000..fadba1c86
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/make-logtab
@@ -0,0 +1,29 @@
+#!/usr/bin/perl
+
+#
+# make-logtab
+#
+# Generate a table of logarithms of 2 in various bases, for use in
+# estimating the output sizes of various bases.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+$ARRAYNAME = $ENV{'ARRAYNAME'} || "s_logv_2";
+$ARRAYTYPE = $ENV{'ARRAYTYPE'} || "float";
+
+printf("const %s %s[] = {\n   %0.9ff, %0.9ff, ", 
+       $ARRAYTYPE, $ARRAYNAME, 0, 0);
+$brk = 2;
+for($ix = 2; $ix < 64; $ix++) {
+    printf("%0.9ff, ", (log(2)/log($ix)));
+    $brk = ($brk + 1) & 3;
+    if(!$brk) {
+	printf(" /* %2d %2d %2d %2d */\n   ",
+	       $ix - 3, $ix - 2, $ix - 1, $ix);
+    }
+}
+printf("%0.9ff\n};\n\n", (log(2)/log($ix)));
+
+exit 0;
diff --git a/security/nss/lib/freebl/mpi/make-test-arrays b/security/nss/lib/freebl/mpi/make-test-arrays
new file mode 100755
index 000000000..ecdd55202
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/make-test-arrays
@@ -0,0 +1,98 @@
+#!/usr/bin/perl
+
+#
+# make-test-arrays
+#
+# Given a test-arrays file, which specifies the test suite names, the
+# names of the functions which perform those test suites, and
+# descriptive comments, this script generates C structures for the
+# mpi-test program.  The input consists of lines of the form:
+#
+# suite-name:function-name:comment
+#
+# The output is written to the standard output.  Blank lines are
+# ignored, and comments beginning with '#' are stripped.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Read parameters from the environment, if available
+$NAMEVAR = $ENV{'NAMEVAR'} || "g_names";
+$COUNTVAR = $ENV{'COUNTVAR'} || "g_count";
+$FUNCVAR = $ENV{'FUNCVAR'} || "g_tests";
+$DESCVAR = $ENV{'DESCVAR'} || "g_descs";
+$FUNCLEN = 13;
+$NAMELEN = 18;
+$DESCLEN = 45;
+
+#------------------------------------------------------------------------
+# Suck in input from the files on the command line, or standard input
+while(<>) {
+    chomp;
+    s/\#.*$//;
+    next if /^\s*$/;
+
+    ($suite, $func, $desc) = split(/:/, $_);
+
+    $tmp = { "suite" => $suite,
+	     "func"  => $func,
+	     "desc"  => $desc };
+
+    push(@item, $tmp);
+}
+$count = scalar(@item);
+$last = pop(@item);
+
+#------------------------------------------------------------------------
+# Output the table of names
+print "/* Table mapping test suite names to index numbers */\n";
+printf("const int   %s = %d;\n", $COUNTVAR, $count);
+printf("const char *%s[] = {\n", $NAMEVAR);
+
+foreach $elt (@item) {
+    printf("   \"%s\",%s/* %s%s */\n", $elt->{"suite"},
+	   " " x ($NAMELEN - length($elt->{"suite"})),
+	   $elt->{"desc"},
+	   " " x ($DESCLEN - length($elt->{"desc"})));
+}
+printf("   \"%s\" %s/* %s%s */\n", $last->{"suite"},
+       " " x ($NAMELEN - length($last->{"suite"})),
+       $last->{"desc"},
+       " " x ($DESCLEN - length($last->{"desc"})));
+print "};\n\n";
+
+#------------------------------------------------------------------------
+# Output the driver function prototypes
+print "/* Test function prototypes */\n";
+foreach $elt (@item, $last) {
+    printf("int  %s(void);\n", $elt->{"func"});
+}
+print "\n";
+
+#------------------------------------------------------------------------
+# Output the table of functions
+print "/* Table mapping index numbers to functions */\n";
+printf("int (*%s[])(void)  = {\n   ", $FUNCVAR);
+$brk = 0;
+
+foreach $elt (@item) {
+    print($elt->{"func"}, ", ", 
+	  " " x ($FUNCLEN - length($elt->{"func"})));
+    $brk = ($brk + 1) & 3;
+    print "\n   " unless($brk);
+}
+print $last->{"func"}, "\n};\n\n";
+
+#------------------------------------------------------------------------
+# Output the table of descriptions
+print "/* Table mapping index numbers to descriptions */\n";
+printf("const char *%s[] = {\n", $DESCVAR);
+
+foreach $elt (@item) {
+    printf("   \"%s\",\n", $elt->{"desc"});
+}
+printf("   \"%s\"\n};\n\n", $last->{"desc"});
+
+exit 0;
+
diff --git a/security/nss/lib/freebl/mpi/mdxptest.c b/security/nss/lib/freebl/mpi/mdxptest.c
new file mode 100644
index 000000000..adbcfc3d1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mdxptest.c
@@ -0,0 +1,306 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "mpi.h"
+#include "mpi-priv.h"
+
+/* #define OLD_WAY 1  */
+
+/* This key is the 1024-bit test key used for speed testing of RSA private
+** key ops.
+*/
+
+#define CONST const
+
+static CONST unsigned char default_n[128] = {
+    0xc2, 0xae, 0x96, 0x89, 0xaf, 0xce, 0xd0, 0x7b, 0x3b, 0x35, 0xfd, 0x0f, 0xb1, 0xf4, 0x7a, 0xd1,
+    0x3c, 0x7d, 0xb5, 0x86, 0xf2, 0x68, 0x36, 0xc9, 0x97, 0xe6, 0x82, 0x94, 0x86, 0xaa, 0x05, 0x39,
+    0xec, 0x11, 0x51, 0xcc, 0x5c, 0xa1, 0x59, 0xba, 0x29, 0x18, 0xf3, 0x28, 0xf1, 0x9d, 0xe3, 0xae,
+    0x96, 0x5d, 0x6d, 0x87, 0x73, 0xf6, 0xf6, 0x1f, 0xd0, 0x2d, 0xfb, 0x2f, 0x7a, 0x13, 0x7f, 0xc8,
+    0x0c, 0x7a, 0xe9, 0x85, 0xfb, 0xce, 0x74, 0x86, 0xf8, 0xef, 0x2f, 0x85, 0x37, 0x73, 0x0f, 0x62,
+    0x4e, 0x93, 0x17, 0xb7, 0x7e, 0x84, 0x9a, 0x94, 0x11, 0x05, 0xca, 0x0d, 0x31, 0x4b, 0x2a, 0xc8,
+    0xdf, 0xfe, 0xe9, 0x0c, 0x13, 0xc7, 0xf2, 0xad, 0x19, 0x64, 0x28, 0x3c, 0xb5, 0x6a, 0xc8, 0x4b,
+    0x79, 0xea, 0x7c, 0xce, 0x75, 0x92, 0x45, 0x3e, 0xa3, 0x9d, 0x64, 0x6f, 0x04, 0x69, 0x19, 0x17
+};
+
+static CONST unsigned char default_d[128] = {
+    0x13, 0xcb, 0xbc, 0xf2, 0xf3, 0x35, 0x8c, 0x6d, 0x7b, 0x6f, 0xd9, 0xf3, 0xa6, 0x9c, 0xbd, 0x80,
+    0x59, 0x2e, 0x4f, 0x2f, 0x11, 0xa7, 0x17, 0x2b, 0x18, 0x8f, 0x0f, 0xe8, 0x1a, 0x69, 0x5f, 0x6e,
+    0xac, 0x5a, 0x76, 0x7e, 0xd9, 0x4c, 0x6e, 0xdb, 0x47, 0x22, 0x8a, 0x57, 0x37, 0x7a, 0x5e, 0x94,
+    0x7a, 0x25, 0xb5, 0xe5, 0x78, 0x1d, 0x3c, 0x99, 0xaf, 0x89, 0x7d, 0x69, 0x2e, 0x78, 0x9d, 0x1d,
+    0x84, 0xc8, 0xc1, 0xd7, 0x1a, 0xb2, 0x6d, 0x2d, 0x8a, 0xd9, 0xab, 0x6b, 0xce, 0xae, 0xb0, 0xa0,
+    0x58, 0x55, 0xad, 0x5c, 0x40, 0x8a, 0xd6, 0x96, 0x08, 0x8a, 0xe8, 0x63, 0xe6, 0x3d, 0x6c, 0x20,
+    0x49, 0xc7, 0xaf, 0x0f, 0x25, 0x73, 0xd3, 0x69, 0x43, 0x3b, 0xf2, 0x32, 0xf8, 0x3d, 0x5e, 0xee,
+    0x7a, 0xca, 0xd6, 0x94, 0x55, 0xe5, 0xbd, 0x25, 0x34, 0x8d, 0x63, 0x40, 0xb5, 0x8a, 0xc3, 0x01
+};
+
+#define DEFAULT_ITERS 50
+
+typedef clock_t timetype;
+#define gettime(x) *(x) = clock()
+#define subtime(a, b) a -= b
+#define msec(x) ((clock_t)((double)x * 1000.0 / CLOCKS_PER_SEC))
+#define sec(x) (x / CLOCKS_PER_SEC)
+
+struct TimingContextStr {
+    timetype start;
+    timetype end;
+    timetype interval;
+
+    int minutes;
+    int seconds;
+    int millisecs;
+};
+
+typedef struct TimingContextStr TimingContext;
+
+TimingContext *
+CreateTimingContext(void)
+{
+    return (TimingContext *)malloc(sizeof(TimingContext));
+}
+
+void
+DestroyTimingContext(TimingContext *ctx)
+{
+    free(ctx);
+}
+
+void
+TimingBegin(TimingContext *ctx)
+{
+    gettime(&ctx->start);
+}
+
+static void
+timingUpdate(TimingContext *ctx)
+{
+
+    ctx->millisecs = msec(ctx->interval) % 1000;
+    ctx->seconds = sec(ctx->interval);
+    ctx->minutes = ctx->seconds / 60;
+    ctx->seconds %= 60;
+}
+
+void
+TimingEnd(TimingContext *ctx)
+{
+    gettime(&ctx->end);
+    ctx->interval = ctx->end;
+    subtime(ctx->interval, ctx->start);
+    timingUpdate(ctx);
+}
+
+char *
+TimingGenerateString(TimingContext *ctx)
+{
+    static char sBuf[4096];
+
+    sprintf(sBuf, "%d minutes, %d.%03d seconds", ctx->minutes,
+            ctx->seconds, ctx->millisecs);
+    return sBuf;
+}
+
+static void
+dumpBytes(unsigned char *b, int l)
+{
+    int i;
+    if (l <= 0)
+        return;
+    for (i = 0; i < l; ++i) {
+        if (i % 16 == 0)
+            printf("\t");
+        printf(" %02x", b[i]);
+        if (i % 16 == 15)
+            printf("\n");
+    }
+    if ((i % 16) != 0)
+        printf("\n");
+    printf("\n");
+}
+
+static mp_err
+testNewFuncs(const unsigned char *modulusBytes, int modulus_len)
+{
+    mp_err mperr = MP_OKAY;
+    mp_int modulus;
+    unsigned char buf[512];
+
+    mperr = mp_init(&modulus);
+    mperr = mp_read_unsigned_octets(&modulus, modulusBytes, modulus_len);
+    mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len);
+    mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len + 1);
+    mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len + 4);
+    mperr = mp_to_unsigned_octets(&modulus, buf, modulus_len);
+    mperr = mp_to_signed_octets(&modulus, buf, modulus_len + 1);
+    mp_clear(&modulus);
+    return mperr;
+}
+
+int
+testModExp(const unsigned char *modulusBytes,
+           const unsigned int expo,
+           const unsigned char *input,
+           unsigned char *output,
+           int modulus_len)
+{
+    mp_err mperr = MP_OKAY;
+    mp_int modulus;
+    mp_int base;
+    mp_int exponent;
+    mp_int result;
+
+    mperr = mp_init(&modulus);
+    mperr += mp_init(&base);
+    mperr += mp_init(&exponent);
+    mperr += mp_init(&result);
+    /* we initialize all mp_ints unconditionally, even if some fail.
+    ** This guarantees that the DIGITS pointer is valid (even if null).
+    ** So, mp_clear will do the right thing below.
+    */
+    if (mperr == MP_OKAY) {
+        mperr = mp_read_unsigned_octets(&modulus,
+                                        modulusBytes + (sizeof default_n - modulus_len), modulus_len);
+        mperr += mp_read_unsigned_octets(&base, input, modulus_len);
+        mp_set(&exponent, expo);
+        if (mperr == MP_OKAY) {
+#if OLD_WAY
+            mperr = s_mp_exptmod(&base, &exponent, &modulus, &result);
+#else
+            mperr = mp_exptmod(&base, &exponent, &modulus, &result);
+#endif
+            if (mperr == MP_OKAY) {
+                mperr = mp_to_fixlen_octets(&result, output, modulus_len);
+            }
+        }
+    }
+    mp_clear(&base);
+    mp_clear(&result);
+
+    mp_clear(&modulus);
+    mp_clear(&exponent);
+
+    return (int)mperr;
+}
+
+int
+doModExp(const unsigned char *modulusBytes,
+         const unsigned char *exponentBytes,
+         const unsigned char *input,
+         unsigned char *output,
+         int modulus_len)
+{
+    mp_err mperr = MP_OKAY;
+    mp_int modulus;
+    mp_int base;
+    mp_int exponent;
+    mp_int result;
+
+    mperr = mp_init(&modulus);
+    mperr += mp_init(&base);
+    mperr += mp_init(&exponent);
+    mperr += mp_init(&result);
+    /* we initialize all mp_ints unconditionally, even if some fail.
+    ** This guarantees that the DIGITS pointer is valid (even if null).
+    ** So, mp_clear will do the right thing below.
+    */
+    if (mperr == MP_OKAY) {
+        mperr = mp_read_unsigned_octets(&modulus,
+                                        modulusBytes + (sizeof default_n - modulus_len), modulus_len);
+        mperr += mp_read_unsigned_octets(&exponent, exponentBytes, modulus_len);
+        mperr += mp_read_unsigned_octets(&base, input, modulus_len);
+        if (mperr == MP_OKAY) {
+#if OLD_WAY
+            mperr = s_mp_exptmod(&base, &exponent, &modulus, &result);
+#else
+            mperr = mp_exptmod(&base, &exponent, &modulus, &result);
+#endif
+            if (mperr == MP_OKAY) {
+                mperr = mp_to_fixlen_octets(&result, output, modulus_len);
+            }
+        }
+    }
+    mp_clear(&base);
+    mp_clear(&result);
+
+    mp_clear(&modulus);
+    mp_clear(&exponent);
+
+    return (int)mperr;
+}
+
+int
+main(int argc, char **argv)
+{
+    TimingContext *timeCtx;
+    char *progName;
+    long iters = DEFAULT_ITERS;
+    unsigned int modulus_len;
+    int i;
+    int rv;
+    unsigned char buf[1024];
+    unsigned char buf2[1024];
+
+    progName = strrchr(argv[0], '/');
+    if (!progName)
+        progName = strrchr(argv[0], '\\');
+    progName = progName ? progName + 1 : argv[0];
+
+    if (argc >= 2) {
+        iters = atol(argv[1]);
+    }
+
+    if (argc >= 3) {
+        modulus_len = atol(argv[2]);
+    } else
+        modulus_len = sizeof default_n;
+
+    /* no library init function !? */
+
+    memset(buf, 0x41, sizeof buf);
+
+    if (iters < 2) {
+        testNewFuncs(default_n, modulus_len);
+        testNewFuncs(default_n + 1, modulus_len - 1);
+        testNewFuncs(default_n + 2, modulus_len - 2);
+        testNewFuncs(default_n + 3, modulus_len - 3);
+
+        rv = testModExp(default_n, 0, buf, buf2, modulus_len);
+        dumpBytes((unsigned char *)buf2, modulus_len);
+
+        rv = testModExp(default_n, 1, buf, buf2, modulus_len);
+        dumpBytes((unsigned char *)buf2, modulus_len);
+
+        rv = testModExp(default_n, 2, buf, buf2, modulus_len);
+        dumpBytes((unsigned char *)buf2, modulus_len);
+
+        rv = testModExp(default_n, 3, buf, buf2, modulus_len);
+        dumpBytes((unsigned char *)buf2, modulus_len);
+    }
+    rv = doModExp(default_n, default_d, buf, buf2, modulus_len);
+    if (rv != 0) {
+        fprintf(stderr, "Error in modexp operation:\n");
+        exit(1);
+    }
+    dumpBytes((unsigned char *)buf2, modulus_len);
+
+    timeCtx = CreateTimingContext();
+    TimingBegin(timeCtx);
+    i = iters;
+    while (i--) {
+        rv = doModExp(default_n, default_d, buf, buf2, modulus_len);
+        if (rv != 0) {
+            fprintf(stderr, "Error in modexp operation\n");
+            exit(1);
+        }
+    }
+    TimingEnd(timeCtx);
+    printf("%ld iterations in %s\n", iters, TimingGenerateString(timeCtx));
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/montmulf.c b/security/nss/lib/freebl/mpi/montmulf.c
new file mode 100644
index 000000000..ce8fbc31d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.c
@@ -0,0 +1,286 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef SOLARIS
+#define RF_INLINE_MACROS 1
+#endif
+
+static const double TwoTo16 = 65536.0;
+static const double TwoToMinus16 = 1.0 / 65536.0;
+static const double Zero = 0.0;
+static const double TwoTo32 = 65536.0 * 65536.0;
+static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
+
+#ifdef RF_INLINE_MACROS
+
+double upper32(double);
+double lower32(double, double);
+double mod(double, double, double);
+
+void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+                          const double * /* 2^16*/,
+                          const double * /* 0 */,
+                          double * /*result16*/,
+                          double * /* result32 */,
+                          float * /*source - should be unsigned int* converted to float* */);
+
+#else
+#ifdef MP_USE_FLOOR
+#include <math.h>
+#else
+#define floor(d) ((double)((unsigned long long)(d)))
+#endif
+
+static double
+upper32(double x)
+{
+    return floor(x * TwoToMinus32);
+}
+
+static double
+lower32(double x, double y)
+{
+    return x - TwoTo32 * floor(x * TwoToMinus32);
+}
+
+static double
+mod(double x, double oneoverm, double m)
+{
+    return x - m * floor(x * oneoverm);
+}
+
+#endif
+
+static void
+cleanup(double *dt, int from, int tlen)
+{
+    int i;
+    double tmp, tmp1, x, x1;
+
+    tmp = tmp1 = Zero;
+    /* original code **
+     for(i=2*from;i<2*tlen-2;i++)
+       {
+         x=dt[i];
+         dt[i]=lower32(x,Zero)+tmp1;
+         tmp1=tmp;
+         tmp=upper32(x);
+       }
+     dt[tlen-2]+=tmp1;
+     dt[tlen-1]+=tmp;
+     **end original code ***/
+    /* new code ***/
+    for (i = 2 * from; i < 2 * tlen; i += 2) {
+        x = dt[i];
+        x1 = dt[i + 1];
+        dt[i] = lower32(x, Zero) + tmp;
+        dt[i + 1] = lower32(x1, Zero) + tmp1;
+        tmp = upper32(x);
+        tmp1 = upper32(x1);
+    }
+    /** end new code **/
+}
+
+void
+conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+{
+    int i;
+    long long t, t1, a, b, c, d;
+
+    t1 = 0;
+    a = (long long)d16[0];
+    b = (long long)d16[1];
+    for (i = 0; i < ilen - 1; i++) {
+        c = (long long)d16[2 * i + 2];
+        t1 += (unsigned int)a;
+        t = (a >> 32);
+        d = (long long)d16[2 * i + 3];
+        t1 += (b & 0xffff) << 16;
+        t += (b >> 16) + (t1 >> 32);
+        i32[i] = (unsigned int)t1;
+        t1 = t;
+        a = c;
+        b = d;
+    }
+    t1 += (unsigned int)a;
+    t = (a >> 32);
+    t1 += (b & 0xffff) << 16;
+    i32[i] = (unsigned int)t1;
+}
+
+void
+conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+{
+    int i;
+
+#pragma pipeloop(0)
+    for (i = 0; i < len; i++)
+        d32[i] = (double)(i32[i]);
+}
+
+void
+conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+{
+    int i;
+    unsigned int a;
+
+#pragma pipeloop(0)
+    for (i = 0; i < len; i++) {
+        a = i32[i];
+        d16[2 * i] = (double)(a & 0xffff);
+        d16[2 * i + 1] = (double)(a >> 16);
+    }
+}
+
+void
+conv_i32_to_d32_and_d16(double *d32, double *d16,
+                        unsigned int *i32, int len)
+{
+    int i = 0;
+    unsigned int a;
+
+#pragma pipeloop(0)
+#ifdef RF_INLINE_MACROS
+    for (; i < len - 3; i += 4) {
+        i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+                             &(d16[2 * i]), &(d32[i]), (float *)(&(i32[i])));
+    }
+#endif
+    for (; i < len; i++) {
+        a = i32[i];
+        d32[i] = (double)(i32[i]);
+        d16[2 * i] = (double)(a & 0xffff);
+        d16[2 * i + 1] = (double)(a >> 16);
+    }
+}
+
+void
+adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+{
+    long long acc;
+    int i;
+
+    if (i32[len] > 0)
+        i = -1;
+    else {
+        for (i = len - 1; i >= 0; i--) {
+            if (i32[i] != nint[i])
+                break;
+        }
+    }
+    if ((i < 0) || (i32[i] > nint[i])) {
+        acc = 0;
+        for (i = 0; i < len; i++) {
+            acc = acc + (unsigned long long)(i32[i]) - (unsigned long long)(nint[i]);
+            i32[i] = (unsigned int)acc;
+            acc = acc >> 32;
+        }
+    }
+}
+
+/*
+** the lengths of the input arrays should be at least the following:
+** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+** all of them should be different from one another
+**
+*/
+void
+mont_mulf_noconv(unsigned int *result,
+                 double *dm1, double *dm2, double *dt,
+                 double *dn, unsigned int *nint,
+                 int nlen, double dn0)
+{
+    int i, j, jj;
+    int tmp;
+    double digit, m2j, nextm2j, a, b;
+    double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+
+    pdm1 = &(dm1[0]);
+    pdm2 = &(dm2[0]);
+    pdn = &(dn[0]);
+    pdm2[2 * nlen] = Zero;
+
+    if (nlen != 16) {
+        for (i = 0; i < 4 * nlen + 2; i++)
+            dt[i] = Zero;
+
+        a = dt[0] = pdm1[0] * pdm2[0];
+        digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
+
+        pdtj = &(dt[0]);
+        for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
+            m2j = pdm2[j];
+            a = pdtj[0] + pdn[0] * digit;
+            b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
+            pdtj[1] = b;
+
+#pragma pipeloop(0)
+            for (i = 1; i < nlen; i++) {
+                pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
+            }
+            if ((jj == 30)) {
+                cleanup(dt, j / 2 + 1, 2 * nlen + 1);
+                jj = 0;
+            }
+
+            digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16);
+        }
+    } else {
+        a = dt[0] = pdm1[0] * pdm2[0];
+
+        dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
+            dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = dt[54] =
+                dt[53] = dt[52] = dt[51] = dt[50] = dt[49] = dt[48] =
+                    dt[47] = dt[46] = dt[45] = dt[44] = dt[43] = dt[42] =
+                        dt[41] = dt[40] = dt[39] = dt[38] = dt[37] = dt[36] =
+                            dt[35] = dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
+                                dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = dt[24] =
+                                    dt[23] = dt[22] = dt[21] = dt[20] = dt[19] = dt[18] =
+                                        dt[17] = dt[16] = dt[15] = dt[14] = dt[13] = dt[12] =
+                                            dt[11] = dt[10] = dt[9] = dt[8] = dt[7] = dt[6] =
+                                                dt[5] = dt[4] = dt[3] = dt[2] = dt[1] = Zero;
+
+        pdn_0 = pdn[0];
+        pdm1_0 = pdm1[0];
+
+        digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
+        pdtj = &(dt[0]);
+
+        for (j = 0; j < 32; j++, pdtj++) {
+
+            m2j = pdm2[j];
+            a = pdtj[0] + pdn_0 * digit;
+            b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16;
+            pdtj[1] = b;
+
+            /**** this loop will be fully unrolled:
+             for(i=1;i<16;i++)
+               {
+                 pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+               }
+             *************************************/
+            pdtj[2] += pdm1[1] * m2j + pdn[1] * digit;
+            pdtj[4] += pdm1[2] * m2j + pdn[2] * digit;
+            pdtj[6] += pdm1[3] * m2j + pdn[3] * digit;
+            pdtj[8] += pdm1[4] * m2j + pdn[4] * digit;
+            pdtj[10] += pdm1[5] * m2j + pdn[5] * digit;
+            pdtj[12] += pdm1[6] * m2j + pdn[6] * digit;
+            pdtj[14] += pdm1[7] * m2j + pdn[7] * digit;
+            pdtj[16] += pdm1[8] * m2j + pdn[8] * digit;
+            pdtj[18] += pdm1[9] * m2j + pdn[9] * digit;
+            pdtj[20] += pdm1[10] * m2j + pdn[10] * digit;
+            pdtj[22] += pdm1[11] * m2j + pdn[11] * digit;
+            pdtj[24] += pdm1[12] * m2j + pdn[12] * digit;
+            pdtj[26] += pdm1[13] * m2j + pdn[13] * digit;
+            pdtj[28] += pdm1[14] * m2j + pdn[14] * digit;
+            pdtj[30] += pdm1[15] * m2j + pdn[15] * digit;
+            /* no need for cleenup, cannot overflow */
+            digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16);
+        }
+    }
+
+    conv_d16_to_i32(result, dt + 2 * nlen, (long long *)dt, nlen + 1);
+
+    adjust_montf_result(result, nint, nlen);
+}
diff --git a/security/nss/lib/freebl/mpi/montmulf.h b/security/nss/lib/freebl/mpi/montmulf.h
new file mode 100644
index 000000000..69bed4acb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.h
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*  The functions that are to be called from outside of the .s file have the
+ *  following interfaces and array size requirements:
+ */
+
+void conv_i32_to_d32(double *d32, unsigned int *i32, int len);
+
+/*  Converts an array of int's to an array of doubles, so that each double
+ *  corresponds to an int.  len is the number of items converted.
+ *  Does not allocate the output array.
+ *  The pointers d32 and i32 should point to arrays of size at least  len
+ *  (doubles and unsigned ints, respectively)
+ */
+
+void conv_i32_to_d16(double *d16, unsigned int *i32, int len);
+
+/*  Converts an array of int's to an array of doubles so that each element
+ *  of the int array is converted to a pair of doubles, the first one
+ *  corresponding to the lower (least significant) 16 bits of the int and
+ *  the second one corresponding to the upper (most significant) 16 bits of
+ *  the 32-bit int. len is the number of ints converted.
+ *  Does not allocate the output array.
+ *  The pointer d16 should point to an array of doubles of size at least
+ *  2*len and i32 should point an array of ints of size at least  len
+ */
+
+void conv_i32_to_d32_and_d16(double *d32, double *d16,
+                             unsigned int *i32, int len);
+
+/*  Does the above two conversions together, it is much faster than doing
+ *  both of those in succession
+ */
+
+void mont_mulf_noconv(unsigned int *result,
+                      double *dm1, double *dm2, double *dt,
+                      double *dn, unsigned int *nint,
+                      int nlen, double dn0);
+
+/*  Does the Montgomery multiplication of the numbers stored in the arrays
+ *  pointed to by dm1 and dm2, writing the result to the array pointed to by
+ *  result. It uses the array pointed to by dt as a temporary work area.
+ *  nint should point to the modulus in the array-of-integers representation,
+ *  dn should point to its array-of-doubles as obtained as a result of the
+ *  function call   conv_i32_to_d32(dn, nint, nlen);
+ *  nlen is the length of the array containing the modulus.
+ *  The representation used for dm1 is the one that is a result of the function
+ *  call   conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the
+ *  result of the function call   conv_i32_to_d16(dm2, m2, nlen).
+ *  Note that m1 and m2 should both be of length nlen, so they should be
+ *  padded with 0's if necessary before the conversion. The result comes in
+ *  this form (int representation, padded with 0's).
+ *  dn0 is the value of the 16 least significant bits of n0'.
+ *  The function does not allocate memory for any of the arrays, so the
+ *  pointers should point to arrays with the following minimal sizes:
+ *  result - nlen+1
+ *  dm1    - nlen
+ *  dm2    - 2*nlen+1  ( the +1 is necessary for technical reasons )
+ *  dt     - 4*nlen+2
+ *  dn     - nlen
+ *  nint   - nlen
+ *  No two arrays should point to overlapping areas of memory.
+ */
diff --git a/security/nss/lib/freebl/mpi/montmulf.il b/security/nss/lib/freebl/mpi/montmulf.il
new file mode 100644
index 000000000..4952d0fb8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.il
@@ -0,0 +1,108 @@
+!  
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+        .inline upper32,8
+        std     %o0,[%sp+0x48]
+        ldd     [%sp+0x48],%f10
+
+	fdtox	%f10,%f10
+	fitod	%f10,%f0
+        .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+        .inline lower32,8
+        std     %o0,[%sp+0x48]
+        ldd     [%sp+0x48],%f10
+        std     %o2,[%sp+0x48]
+        ldd     [%sp+0x48],%f12
+
+	fdtox	%f10,%f10
+	fmovs	%f12,%f10
+	fxtod	%f10,%f0
+        .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+        .inline mod,12
+        std     %o0,[%sp+0x48]
+        ldd     [%sp+0x48],%f2
+        std     %o2,[%sp+0x48]
+        ldd     [%sp+0x48],%f4
+        std     %o4,[%sp+0x48]
+        ldd     [%sp+0x48],%f6
+
+	fmuld	%f2,%f4,%f4
+	fdtox	%f4,%f4
+	fxtod	%f4,%f4
+	fmuld	%f4,%f6,%f4
+	fsubd	%f2,%f4,%f0
+        .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+!			    double * /* 0 */,
+!			    double * /*result16*/, double * /* result32 */
+!			    float *  /*source - should be unsigned int*
+!		            	       converted to float* */);
+!
+        .inline i16_to_d16_and_d32x4,24
+        ldd     [%o0],%f2  ! 1/(2^16)
+        ldd     [%o1],%f4  ! 2^16
+	ldd	[%o2],%f22
+
+	fmovd	%f22,%f6
+	ld	[%o5],%f7
+	fmovd	%f22,%f10
+	ld	[%o5+4],%f11
+	fmovd	%f22,%f14
+	ld	[%o5+8],%f15
+	fmovd	%f22,%f18
+	ld	[%o5+12],%f19
+	fxtod	%f6,%f6
+	std	%f6,[%o4]
+	fxtod	%f10,%f10
+	std	%f10,[%o4+8]
+	fxtod	%f14,%f14
+	std	%f14,[%o4+16]
+	fxtod	%f18,%f18
+	std	%f18,[%o4+24]
+	fmuld	%f2,%f6,%f8
+	fmuld	%f2,%f10,%f12
+	fmuld	%f2,%f14,%f16
+	fmuld	%f2,%f18,%f20
+	fdtox	%f8,%f8
+	fdtox	%f12,%f12
+	fdtox	%f16,%f16
+	fdtox	%f20,%f20
+	fxtod	%f8,%f8
+	std	%f8,[%o3+8]
+	fxtod	%f12,%f12
+	std	%f12,[%o3+24]
+	fxtod	%f16,%f16
+	std	%f16,[%o3+40]
+	fxtod	%f20,%f20
+	std	%f20,[%o3+56]
+	fmuld	%f8,%f4,%f8
+	fmuld	%f12,%f4,%f12
+	fmuld	%f16,%f4,%f16
+	fmuld	%f20,%f4,%f20
+	fsubd	%f6,%f8,%f8
+	std	%f8,[%o3]
+	fsubd	%f10,%f12,%f12
+	std	%f12,[%o3+16]
+	fsubd	%f14,%f16,%f16
+	std	%f16,[%o3+32]
+	fsubd	%f18,%f20,%f20
+	std	%f20,[%o3+48]
+        .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulf.s b/security/nss/lib/freebl/mpi/montmulf.s
new file mode 100644
index 000000000..69d2a3c51
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.s
@@ -0,0 +1,1938 @@
+!  
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+	.section	".text",#alloc,#execinstr
+	.file	"montmulf.c"
+
+	.section	".data",#alloc,#write
+	.align	8
+TwoTo16:		/* frequency 1.0 confidence 0.0 */
+	.word	1089470464
+	.word	0
+	.type	TwoTo16,#object
+	.size	TwoTo16,8
+TwoToMinus16:		/* frequency 1.0 confidence 0.0 */
+	.word	1055916032
+	.word	0
+	.type	TwoToMinus16,#object
+	.size	TwoToMinus16,8
+Zero:		/* frequency 1.0 confidence 0.0 */
+	.word	0
+	.word	0
+	.type	Zero,#object
+	.size	Zero,8
+TwoTo32:		/* frequency 1.0 confidence 0.0 */
+	.word	1106247680
+	.word	0
+	.type	TwoTo32,#object
+	.size	TwoTo32,8
+TwoToMinus32:		/* frequency 1.0 confidence 0.0 */
+	.word	1039138816
+	.word	0
+	.type	TwoToMinus32,#object
+	.size	TwoToMinus32,8
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 ( 0  0) */		.align	4
+!
+! SUBROUTINE cleanup
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+                                   	.global cleanup
+                                   cleanup:		/* frequency 1.0 confidence 0.0 */
+! FILE montmulf.c
+
+!    1		                    !#define RF_INLINE_MACROS
+!    3		                    !static double TwoTo16=65536.0;
+!    4		                    !static double TwoToMinus16=1.0/65536.0;
+!    5		                    !static double Zero=0.0;
+!    6		                    !static double TwoTo32=65536.0*65536.0;
+!    7		                    !static double TwoToMinus32=1.0/(65536.0*65536.0);
+!    9		                    !#ifdef RF_INLINE_MACROS
+!   11		                    !double upper32(double);
+!   12		                    !double lower32(double, double);
+!   13		                    !double mod(double, double, double);
+!   15		                    !#else
+!   17		                    !static double upper32(double x)
+!   18		                    !{
+!   19		                    !  return floor(x*TwoToMinus32);
+!   20		                    !}
+!   22		                    !static double lower32(double x, double y)
+!   23		                    !{
+!   24		                    !  return x-TwoTo32*floor(x*TwoToMinus32);
+!   25		                    !}
+!   27		                    !static double mod(double x, double oneoverm, double m)
+!   28		                    !{
+!   29		                    !  return x-m*floor(x*oneoverm);
+!   30		                    !}
+!   32		                    !#endif
+!   35		                    !void cleanup(double *dt, int from, int tlen)
+!   36		                    !{
+!   37		                    ! int i;
+!   38		                    ! double tmp,tmp1,x,x1;
+!   40		                    ! tmp=tmp1=Zero;
+
+/* 000000	  40 ( 0  1) */		sethi	%hi(Zero),%g2
+
+!   41		                    ! /* original code **
+!   42		                    ! for(i=2*from;i<2*tlen-2;i++)
+!   43		                    !   {
+!   44		                    !     x=dt[i];
+!   45		                    !     dt[i]=lower32(x,Zero)+tmp1;
+!   46		                    !     tmp1=tmp;
+!   47		                    !     tmp=upper32(x);
+!   48		                    !   }
+!   49		                    ! dt[tlen-2]+=tmp1;
+!   50		                    ! dt[tlen-1]+=tmp;
+!   51		                    ! **end original code ***/
+!   52		                    ! /* new code ***/
+!   53		                    ! for(i=2*from;i<2*tlen;i+=2)
+
+/* 0x0004	  53 ( 1  2) */		sll	%o2,1,%g3
+/* 0x0008	  40 ( 1  4) */		ldd	[%g2+%lo(Zero)],%f0
+/* 0x000c	     ( 1  2) */		add	%g2,%lo(Zero),%g2
+/* 0x0010	  53 ( 2  3) */		sll	%o1,1,%g4
+/* 0x0014	  36 ( 3  4) */		sll	%o1,4,%g1
+/* 0x0018	  40 ( 3  4) */		fmovd	%f0,%f4
+/* 0x001c	  53 ( 3  4) */		cmp	%g4,%g3
+/* 0x0020	     ( 3  4) */		bge,pt	%icc,.L77000116	! tprob=0.56
+/* 0x0024	     ( 4  5) */		fmovd	%f0,%f2
+/* 0x0028	  36 ( 4  5) */		add	%o0,%g1,%g1
+/* 0x002c	     ( 4  5) */		sub	%g3,1,%g3
+
+!   54		                    !   {
+!   55		                    !     x=dt[i];
+
+/* 0x0030	  55 ( 5  8) */		ldd	[%g1],%f8
+                                   .L900000114:		/* frequency 6.4 confidence 0.0 */
+/* 0x0034	     ( 0  3) */		fdtox	%f8,%f6
+
+!   56		                    !     x1=dt[i+1];
+
+/* 0x0038	  56 ( 0  3) */		ldd	[%g1+8],%f10
+
+!   57		                    !     dt[i]=lower32(x,Zero)+tmp;
+!   58		                    !     dt[i+1]=lower32(x1,Zero)+tmp1;
+!   59		                    !     tmp=upper32(x);
+!   60		                    !     tmp1=upper32(x1);
+
+/* 0x003c	  60 ( 0  1) */		add	%g4,2,%g4
+/* 0x0040	     ( 1  4) */		fdtox	%f8,%f8
+/* 0x0044	     ( 1  2) */		cmp	%g4,%g3
+/* 0x0048	     ( 5  6) */		fmovs	%f0,%f6
+/* 0x004c	     ( 7 10) */		fxtod	%f6,%f6
+/* 0x0050	     ( 8 11) */		fdtox	%f10,%f0
+/* 0x0054	  57 (10 13) */		faddd	%f6,%f2,%f2
+/* 0x0058	     (10 11) */		std	%f2,[%g1]
+/* 0x005c	     (12 15) */		ldd	[%g2],%f2
+/* 0x0060	     (14 15) */		fmovs	%f2,%f0
+/* 0x0064	     (16 19) */		fxtod	%f0,%f6
+/* 0x0068	     (17 20) */		fdtox	%f10,%f0
+/* 0x006c	     (18 21) */		fitod	%f8,%f2
+/* 0x0070	  58 (19 22) */		faddd	%f6,%f4,%f4
+/* 0x0074	     (19 20) */		std	%f4,[%g1+8]
+/* 0x0078	  60 (19 20) */		add	%g1,16,%g1
+/* 0x007c	     (20 23) */		fitod	%f0,%f4
+/* 0x0080	     (20 23) */		ldd	[%g2],%f0
+/* 0x0084	     (20 21) */		ble,a,pt	%icc,.L900000114	! tprob=0.86
+/* 0x0088	     (21 24) */		ldd	[%g1],%f8
+                                   .L77000116:		/* frequency 1.0 confidence 0.0 */
+/* 0x008c	     ( 0  2) */		retl	! Result = 
+/* 0x0090	     ( 1  2) */		nop
+/* 0x0094	   0 ( 0  0) */		.type	cleanup,2
+/* 0x0094	     ( 0  0) */		.size	cleanup,(.-cleanup)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 ( 0  0) */		.align	4
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+                                   	.global conv_d16_to_i32
+                                   conv_d16_to_i32:		/* frequency 1.0 confidence 0.0 */
+/* 000000	     ( 0  1) */		save	%sp,-136,%sp
+
+!   61		                    !   }
+!   62		                    !  /** end new code **/
+!   63		                    !}
+!   66		                    !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+!   67		                    !{
+!   68		                    !int i;
+!   69		                    !long long t, t1, a, b, c, d;
+!   71		                    ! t1=0;
+!   72		                    ! a=(long long)d16[0];
+
+/* 0x0004	  72 ( 1  4) */		ldd	[%i1],%f0
+
+!   73		                    ! b=(long long)d16[1];
+!   74		                    ! for(i=0; i<ilen-1; i++)
+
+/* 0x0008	  74 ( 1  2) */		sub	%i3,1,%g2
+/* 0x000c	  67 ( 1  2) */		or	%g0,%i0,%g5
+/* 0x0010	  74 ( 2  3) */		cmp	%g2,0
+/* 0x0014	  71 ( 2  3) */		or	%g0,0,%o4
+/* 0x0018	  72 ( 3  6) */		fdtox	%f0,%f0
+/* 0x001c	     ( 3  4) */		std	%f0,[%sp+120]
+/* 0x0020	  74 ( 3  4) */		or	%g0,0,%o7
+/* 0x0024	  67 ( 4  5) */		or	%g0,%i3,%o0
+/* 0x0028	     ( 4  5) */		sub	%i3,2,%o2
+/* 0x002c	  73 ( 5  8) */		ldd	[%i1+8],%f0
+/* 0x0030	  67 ( 5  6) */		sethi	%hi(0xfc00),%o0
+/* 0x0034	     ( 5  6) */		add	%o2,1,%g3
+/* 0x0038	     ( 6  7) */		add	%o0,1023,%o1
+/* 0x003c	     ( 6  7) */		or	%g0,%g5,%o5
+/* 0x0040	  73 ( 7 10) */		fdtox	%f0,%f0
+/* 0x0044	     ( 7  8) */		std	%f0,[%sp+112]
+/* 0x0048	  72 (11 13) */		ldx	[%sp+120],%g4
+/* 0x004c	  73 (12 14) */		ldx	[%sp+112],%g1
+/* 0x0050	  74 (12 13) */		ble,pt	%icc,.L900000214	! tprob=0.56
+/* 0x0054	     (12 13) */		sethi	%hi(0xfc00),%g2
+/* 0x0058	  67 (13 14) */		or	%g0,-1,%g2
+/* 0x005c	  74 (13 14) */		cmp	%g3,3
+/* 0x0060	  67 (14 15) */		srl	%g2,0,%o3
+/* 0x0064	     (14 15) */		or	%g0,%i1,%g2
+/* 0x0068	  74 (14 15) */		bl,pn	%icc,.L77000134	! tprob=0.44
+/* 0x006c	     (15 18) */		ldd	[%g2+16],%f0
+
+!   75		                    !   {
+!   76		                    !     c=(long long)d16[2*i+2];
+!   77		                    !     t1+=a&0xffffffff;
+!   78		                    !     t=(a>>32);
+!   79		                    !     d=(long long)d16[2*i+3];
+!   80		                    !     t1+=(b&0xffff)<<16;
+
+/* 0x0070	  80 (15 16) */		and	%g1,%o1,%o0
+
+!   81		                    !     t+=(b>>16)+(t1>>32);
+!   82		                    !     i32[i]=t1&0xffffffff;
+!   83		                    !     t1=t;
+!   84		                    !     a=c;
+!   85		                    !     b=d;
+
+/* 0x0074	  85 (15 16) */		add	%g2,16,%g2
+/* 0x0078	  80 (16 17) */		sllx	%o0,16,%g3
+/* 0x007c	  77 (16 17) */		and	%g4,%o3,%o0
+/* 0x0080	  76 (17 20) */		fdtox	%f0,%f0
+/* 0x0084	     (17 18) */		std	%f0,[%sp+104]
+/* 0x0088	  74 (17 18) */		add	%o0,%g3,%o4
+/* 0x008c	  79 (18 21) */		ldd	[%g2+8],%f2
+/* 0x0090	  81 (18 19) */		srax	%g1,16,%o0
+/* 0x0094	  82 (18 19) */		and	%o4,%o3,%o7
+/* 0x0098	  81 (19 20) */		stx	%o0,[%sp+112]
+/* 0x009c	     (19 20) */		srax	%o4,32,%o0
+/* 0x00a0	  85 (19 20) */		add	%g5,4,%o5
+/* 0x00a4	  81 (20 21) */		stx	%o0,[%sp+120]
+/* 0x00a8	  78 (20 21) */		srax	%g4,32,%o4
+/* 0x00ac	  79 (20 23) */		fdtox	%f2,%f0
+/* 0x00b0	     (21 22) */		std	%f0,[%sp+96]
+/* 0x00b4	  81 (22 24) */		ldx	[%sp+112],%o0
+/* 0x00b8	     (23 25) */		ldx	[%sp+120],%g4
+/* 0x00bc	  76 (25 27) */		ldx	[%sp+104],%g3
+/* 0x00c0	  81 (25 26) */		add	%o0,%g4,%g4
+/* 0x00c4	  79 (26 28) */		ldx	[%sp+96],%g1
+/* 0x00c8	  81 (26 27) */		add	%o4,%g4,%o4
+/* 0x00cc	  82 (27 28) */		st	%o7,[%g5]
+/* 0x00d0	     (27 28) */		or	%g0,1,%o7
+/* 0x00d4	  84 (27 28) */		or	%g0,%g3,%g4
+                                   .L900000209:		/* frequency 64.0 confidence 0.0 */
+/* 0x00d8	  76 (17 19) */		ldd	[%g2+16],%f0
+/* 0x00dc	  85 (17 18) */		add	%o7,1,%o7
+/* 0x00e0	     (17 18) */		add	%o5,4,%o5
+/* 0x00e4	     (18 18) */		cmp	%o7,%o2
+/* 0x00e8	     (18 19) */		add	%g2,16,%g2
+/* 0x00ec	  76 (19 22) */		fdtox	%f0,%f0
+/* 0x00f0	     (20 21) */		std	%f0,[%sp+104]
+/* 0x00f4	  79 (21 23) */		ldd	[%g2+8],%f0
+/* 0x00f8	     (23 26) */		fdtox	%f0,%f0
+/* 0x00fc	     (24 25) */		std	%f0,[%sp+96]
+/* 0x0100	  80 (25 26) */		and	%g1,%o1,%g3
+/* 0x0104	     (26 27) */		sllx	%g3,16,%g3
+/* 0x0108	     ( 0  0) */		stx	%g3,[%sp+120]
+/* 0x010c	  77 (26 27) */		and	%g4,%o3,%g3
+/* 0x0110	  74 ( 0  0) */		stx	%o7,[%sp+128]
+/* 0x0114	     ( 0  0) */		ldx	[%sp+120],%o7
+/* 0x0118	     (27 27) */		add	%g3,%o7,%g3
+/* 0x011c	     ( 0  0) */		ldx	[%sp+128],%o7
+/* 0x0120	  81 (28 29) */		srax	%g1,16,%g1
+/* 0x0124	  74 (28 28) */		add	%g3,%o4,%g3
+/* 0x0128	  81 (29 30) */		srax	%g3,32,%o4
+/* 0x012c	     ( 0  0) */		stx	%o4,[%sp+112]
+/* 0x0130	  78 (30 31) */		srax	%g4,32,%o4
+/* 0x0134	  81 ( 0  0) */		ldx	[%sp+112],%g4
+/* 0x0138	     (30 31) */		add	%g1,%g4,%g4
+/* 0x013c	  79 (31 33) */		ldx	[%sp+96],%g1
+/* 0x0140	  81 (31 32) */		add	%o4,%g4,%o4
+/* 0x0144	  82 (32 33) */		and	%g3,%o3,%g3
+/* 0x0148	  84 ( 0  0) */		ldx	[%sp+104],%g4
+/* 0x014c	  85 (33 34) */		ble,pt	%icc,.L900000209	! tprob=0.50
+/* 0x0150	     (33 34) */		st	%g3,[%o5-4]
+                                   .L900000212:		/* frequency 8.0 confidence 0.0 */
+/* 0x0154	  85 ( 0  1) */		ba	.L900000214	! tprob=1.00
+/* 0x0158	     ( 0  1) */		sethi	%hi(0xfc00),%g2
+                                   .L77000134:		/* frequency 0.7 confidence 0.0 */
+                                   .L900000213:		/* frequency 6.4 confidence 0.0 */
+/* 0x015c	  77 ( 0  1) */		and	%g4,%o3,%o0
+/* 0x0160	  80 ( 0  1) */		and	%g1,%o1,%g3
+/* 0x0164	  76 ( 0  3) */		fdtox	%f0,%f0
+/* 0x0168	  77 ( 1  2) */		add	%o4,%o0,%o0
+/* 0x016c	  76 ( 1  2) */		std	%f0,[%sp+104]
+/* 0x0170	  85 ( 1  2) */		add	%o7,1,%o7
+/* 0x0174	  80 ( 2  3) */		sllx	%g3,16,%o4
+/* 0x0178	  79 ( 2  5) */		ldd	[%g2+24],%f2
+/* 0x017c	  85 ( 2  3) */		add	%g2,16,%g2
+/* 0x0180	  80 ( 3  4) */		add	%o0,%o4,%o4
+/* 0x0184	  81 ( 3  4) */		stx	%o7,[%sp+128]
+/* 0x0188	     ( 4  5) */		srax	%g1,16,%o0
+/* 0x018c	     ( 4  5) */		stx	%o0,[%sp+112]
+/* 0x0190	  82 ( 4  5) */		and	%o4,%o3,%g3
+/* 0x0194	  81 ( 5  6) */		srax	%o4,32,%o0
+/* 0x0198	     ( 5  6) */		stx	%o0,[%sp+120]
+/* 0x019c	  79 ( 5  8) */		fdtox	%f2,%f0
+/* 0x01a0	     ( 6  7) */		std	%f0,[%sp+96]
+/* 0x01a4	  78 ( 6  7) */		srax	%g4,32,%o4
+/* 0x01a8	  81 ( 7  9) */		ldx	[%sp+120],%o7
+/* 0x01ac	     ( 8 10) */		ldx	[%sp+112],%g4
+/* 0x01b0	  76 (10 12) */		ldx	[%sp+104],%g1
+/* 0x01b4	  81 (10 11) */		add	%g4,%o7,%g4
+/* 0x01b8	     (11 13) */		ldx	[%sp+128],%o7
+/* 0x01bc	     (11 12) */		add	%o4,%g4,%o4
+/* 0x01c0	  79 (12 14) */		ldx	[%sp+96],%o0
+/* 0x01c4	  84 (12 13) */		or	%g0,%g1,%g4
+/* 0x01c8	  82 (13 14) */		st	%g3,[%o5]
+/* 0x01cc	  85 (13 14) */		add	%o5,4,%o5
+/* 0x01d0	     (13 14) */		cmp	%o7,%o2
+/* 0x01d4	     (14 15) */		or	%g0,%o0,%g1
+/* 0x01d8	     (14 15) */		ble,a,pt	%icc,.L900000213	! tprob=0.86
+/* 0x01dc	     (14 17) */		ldd	[%g2+16],%f0
+                                   .L77000127:		/* frequency 1.0 confidence 0.0 */
+
+!   86		                    !   }
+!   87		                    !     t1+=a&0xffffffff;
+!   88		                    !     t=(a>>32);
+!   89		                    !     t1+=(b&0xffff)<<16;
+!   90		                    !     i32[i]=t1&0xffffffff;
+
+/* 0x01e0	  90 ( 0  1) */		sethi	%hi(0xfc00),%g2
+                                   .L900000214:		/* frequency 1.0 confidence 0.0 */
+/* 0x01e4	  90 ( 0  1) */		or	%g0,-1,%g3
+/* 0x01e8	     ( 0  1) */		add	%g2,1023,%g2
+/* 0x01ec	     ( 1  2) */		srl	%g3,0,%g3
+/* 0x01f0	     ( 1  2) */		and	%g1,%g2,%g2
+/* 0x01f4	     ( 2  3) */		and	%g4,%g3,%g4
+/* 0x01f8	     ( 3  4) */		sllx	%g2,16,%g2
+/* 0x01fc	     ( 3  4) */		add	%o4,%g4,%g4
+/* 0x0200	     ( 4  5) */		add	%g4,%g2,%g2
+/* 0x0204	     ( 5  6) */		sll	%o7,2,%g4
+/* 0x0208	     ( 5  6) */		and	%g2,%g3,%g2
+/* 0x020c	     ( 6  7) */		st	%g2,[%g5+%g4]
+/* 0x0210	     ( 7  9) */		ret	! Result = 
+/* 0x0214	     ( 9 10) */		restore	%g0,%g0,%g0
+/* 0x0218	   0 ( 0  0) */		.type	conv_d16_to_i32,2
+/* 0x0218	     ( 0  0) */		.size	conv_d16_to_i32,(.-conv_d16_to_i32)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 ( 0  0) */		.align	8
+!
+! CONSTANT POOL
+!
+                                   .L_const_seg_900000301:		/* frequency 1.0 confidence 0.0 */
+/* 000000	   0 ( 0  0) */		.word	1127219200,0
+/* 0x0008	   0 ( 0  0) */		.align	4
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+                                   	.global conv_i32_to_d32
+                                   conv_i32_to_d32:		/* frequency 1.0 confidence 0.0 */
+/* 000000	     ( 0  1) */		orcc	%g0,%o2,%g1
+
+!   92		                    !}
+!   94		                    !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+!   95		                    !{
+!   96		                    !int i;
+!   98		                    !#pragma pipeloop(0)
+!   99		                    ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004	  99 ( 0  1) */		ble,pt	%icc,.L77000140	! tprob=0.56
+/* 0x0008	     ( 0  1) */		nop
+/* 0x000c	     ( 1  2) */		sethi	%hi(.L_const_seg_900000301),%g2
+/* 0x0010	  95 ( 1  2) */		or	%g0,%o1,%g4
+/* 0x0014	  99 ( 2  3) */		add	%g2,%lo(.L_const_seg_900000301),%g2
+/* 0x0018	     ( 2  3) */		or	%g0,0,%o5
+/* 0x001c	  95 ( 3  4) */		or	%g0,%o0,%g5
+/* 0x0020	  99 ( 3  4) */		sub	%o2,1,%g3
+/* 0x0024	     ( 4  5) */		cmp	%o2,9
+/* 0x0028	     ( 4  5) */		bl,pn	%icc,.L77000144	! tprob=0.44
+/* 0x002c	     ( 4  7) */		ldd	[%g2],%f8
+/* 0x0030	     ( 5  8) */		ld	[%o1],%f7
+/* 0x0034	     ( 5  6) */		add	%o1,16,%g4
+/* 0x0038	     ( 5  6) */		sub	%o2,5,%g1
+/* 0x003c	     ( 6  9) */		ld	[%o1+4],%f5
+/* 0x0040	     ( 6  7) */		or	%g0,4,%o5
+/* 0x0044	     ( 7 10) */		ld	[%o1+8],%f3
+/* 0x0048	     ( 7  8) */		fmovs	%f8,%f6
+/* 0x004c	     ( 8 11) */		ld	[%o1+12],%f1
+                                   .L900000305:		/* frequency 64.0 confidence 0.0 */
+/* 0x0050	     ( 8 16) */		ld	[%g4],%f11
+/* 0x0054	     ( 8  9) */		add	%o5,5,%o5
+/* 0x0058	     ( 8  9) */		add	%g4,20,%g4
+/* 0x005c	     ( 8 11) */		fsubd	%f6,%f8,%f6
+/* 0x0060	     ( 9 10) */		std	%f6,[%g5]
+/* 0x0064	     ( 9  9) */		cmp	%o5,%g1
+/* 0x0068	     ( 9 10) */		add	%g5,40,%g5
+/* 0x006c	     ( 0  0) */		fmovs	%f8,%f4
+/* 0x0070	     (10 18) */		ld	[%g4-16],%f7
+/* 0x0074	     (10 13) */		fsubd	%f4,%f8,%f12
+/* 0x0078	     ( 0  0) */		fmovs	%f8,%f2
+/* 0x007c	     (11 12) */		std	%f12,[%g5-32]
+/* 0x0080	     (12 20) */		ld	[%g4-12],%f5
+/* 0x0084	     (12 15) */		fsubd	%f2,%f8,%f12
+/* 0x0088	     ( 0  0) */		fmovs	%f8,%f0
+/* 0x008c	     (13 14) */		std	%f12,[%g5-24]
+/* 0x0090	     (14 22) */		ld	[%g4-8],%f3
+/* 0x0094	     (14 17) */		fsubd	%f0,%f8,%f12
+/* 0x0098	     ( 0  0) */		fmovs	%f8,%f10
+/* 0x009c	     (15 16) */		std	%f12,[%g5-16]
+/* 0x00a0	     (16 24) */		ld	[%g4-4],%f1
+/* 0x00a4	     (16 19) */		fsubd	%f10,%f8,%f10
+/* 0x00a8	     ( 0  0) */		fmovs	%f8,%f6
+/* 0x00ac	     (17 18) */		ble,pt	%icc,.L900000305	! tprob=0.50
+/* 0x00b0	     (17 18) */		std	%f10,[%g5-8]
+                                   .L900000308:		/* frequency 8.0 confidence 0.0 */
+/* 0x00b4	     ( 0  1) */		fmovs	%f8,%f4
+/* 0x00b8	     ( 0  1) */		add	%g5,32,%g5
+/* 0x00bc	     ( 0  1) */		cmp	%o5,%g3
+/* 0x00c0	     ( 1  2) */		fmovs	%f8,%f2
+/* 0x00c4	     ( 2  3) */		fmovs	%f8,%f0
+/* 0x00c8	     ( 4  7) */		fsubd	%f6,%f8,%f6
+/* 0x00cc	     ( 4  5) */		std	%f6,[%g5-32]
+/* 0x00d0	     ( 5  8) */		fsubd	%f4,%f8,%f4
+/* 0x00d4	     ( 5  6) */		std	%f4,[%g5-24]
+/* 0x00d8	     ( 6  9) */		fsubd	%f2,%f8,%f2
+/* 0x00dc	     ( 6  7) */		std	%f2,[%g5-16]
+/* 0x00e0	     ( 7 10) */		fsubd	%f0,%f8,%f0
+/* 0x00e4	     ( 7  8) */		bg,pn	%icc,.L77000140	! tprob=0.14
+/* 0x00e8	     ( 7  8) */		std	%f0,[%g5-8]
+                                   .L77000144:		/* frequency 0.7 confidence 0.0 */
+/* 0x00ec	     ( 0  3) */		ld	[%g4],%f1
+                                   .L900000309:		/* frequency 6.4 confidence 0.0 */
+/* 0x00f0	     ( 0  3) */		ldd	[%g2],%f8
+/* 0x00f4	     ( 0  1) */		add	%o5,1,%o5
+/* 0x00f8	     ( 0  1) */		add	%g4,4,%g4
+/* 0x00fc	     ( 1  2) */		cmp	%o5,%g3
+/* 0x0100	     ( 2  3) */		fmovs	%f8,%f0
+/* 0x0104	     ( 4  7) */		fsubd	%f0,%f8,%f0
+/* 0x0108	     ( 4  5) */		std	%f0,[%g5]
+/* 0x010c	     ( 4  5) */		add	%g5,8,%g5
+/* 0x0110	     ( 4  5) */		ble,a,pt	%icc,.L900000309	! tprob=0.86
+/* 0x0114	     ( 6  9) */		ld	[%g4],%f1
+                                   .L77000140:		/* frequency 1.0 confidence 0.0 */
+/* 0x0118	     ( 0  2) */		retl	! Result = 
+/* 0x011c	     ( 1  2) */		nop
+/* 0x0120	   0 ( 0  0) */		.type	conv_i32_to_d32,2
+/* 0x0120	     ( 0  0) */		.size	conv_i32_to_d32,(.-conv_i32_to_d32)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 ( 0  0) */		.align	8
+!
+! CONSTANT POOL
+!
+                                   .L_const_seg_900000401:		/* frequency 1.0 confidence 0.0 */
+/* 000000	   0 ( 0  0) */		.word	1127219200,0
+/* 0x0008	   0 ( 0  0) */		.align	4
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+                                   	.global conv_i32_to_d16
+                                   conv_i32_to_d16:		/* frequency 1.0 confidence 0.0 */
+/* 000000	     ( 0  1) */		save	%sp,-104,%sp
+/* 0x0004	     ( 1  2) */		orcc	%g0,%i2,%o0
+
+!  100		                    !}
+!  103		                    !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+!  104		                    !{
+!  105		                    !int i;
+!  106		                    !unsigned int a;
+!  108		                    !#pragma pipeloop(0)
+!  109		                    ! for(i=0;i<len;i++)
+
+/* 0x0008	 109 ( 1  2) */		ble,pt	%icc,.L77000150	! tprob=0.56
+/* 0x000c	     ( 1  2) */		nop
+/* 0x0010	     ( 2  3) */		sub	%o0,1,%o5
+/* 0x0014	     ( 2  3) */		sethi	%hi(0xfc00),%g2
+
+!  110		                    !   {
+!  111		                    !     a=i32[i];
+!  112		                    !     d16[2*i]=(double)(a&0xffff);
+!  113		                    !     d16[2*i+1]=(double)(a>>16);
+
+/* 0x0018	 113 ( 3  4) */		sethi	%hi(.L_const_seg_900000401),%o0
+/* 0x001c	     ( 3  4) */		add	%o5,1,%g3
+/* 0x0020	     ( 4  5) */		add	%g2,1023,%o4
+/* 0x0024	 109 ( 4  5) */		or	%g0,0,%g1
+/* 0x0028	     ( 5  6) */		cmp	%g3,3
+/* 0x002c	     ( 5  6) */		or	%g0,%i1,%o7
+/* 0x0030	     ( 6  7) */		add	%o0,%lo(.L_const_seg_900000401),%o3
+/* 0x0034	     ( 6  7) */		or	%g0,%i0,%g2
+/* 0x0038	     ( 6  7) */		bl,pn	%icc,.L77000154	! tprob=0.44
+/* 0x003c	     ( 7  8) */		add	%o7,4,%o0
+/* 0x0040	 112 ( 7 10) */		ldd	[%o3],%f0
+/* 0x0044	 113 ( 7  8) */		or	%g0,1,%g1
+/* 0x0048	 111 ( 8 11) */		ld	[%o0-4],%o1
+/* 0x004c	   0 ( 8  9) */		or	%g0,%o0,%o7
+/* 0x0050	 112 (10 11) */		and	%o1,%o4,%o0
+                                   .L900000406:		/* frequency 64.0 confidence 0.0 */
+/* 0x0054	 112 (22 23) */		st	%o0,[%sp+96]
+/* 0x0058	 113 (22 23) */		add	%g1,1,%g1
+/* 0x005c	     (22 23) */		add	%g2,16,%g2
+/* 0x0060	     (23 23) */		cmp	%g1,%o5
+/* 0x0064	     (23 24) */		add	%o7,4,%o7
+/* 0x0068	 112 (29 31) */		ld	[%sp+96],%f3
+/* 0x006c	     ( 0  0) */		fmovs	%f0,%f2
+/* 0x0070	     (31 34) */		fsubd	%f2,%f0,%f2
+/* 0x0074	 113 (32 33) */		srl	%o1,16,%o0
+/* 0x0078	 112 (32 33) */		std	%f2,[%g2-16]
+/* 0x007c	 113 (33 34) */		st	%o0,[%sp+92]
+/* 0x0080	     (40 42) */		ld	[%sp+92],%f3
+/* 0x0084	 111 (41 43) */		ld	[%o7-4],%o1
+/* 0x0088	 113 ( 0  0) */		fmovs	%f0,%f2
+/* 0x008c	     (42 45) */		fsubd	%f2,%f0,%f2
+/* 0x0090	 112 (43 44) */		and	%o1,%o4,%o0
+/* 0x0094	 113 (43 44) */		ble,pt	%icc,.L900000406	! tprob=0.50
+/* 0x0098	     (43 44) */		std	%f2,[%g2-8]
+                                   .L900000409:		/* frequency 8.0 confidence 0.0 */
+/* 0x009c	 112 ( 0  1) */		st	%o0,[%sp+96]
+/* 0x00a0	     ( 0  1) */		fmovs	%f0,%f2
+/* 0x00a4	 113 ( 0  1) */		add	%g2,16,%g2
+/* 0x00a8	     ( 1  2) */		srl	%o1,16,%o0
+/* 0x00ac	 112 ( 4  7) */		ld	[%sp+96],%f3
+/* 0x00b0	     ( 6  9) */		fsubd	%f2,%f0,%f2
+/* 0x00b4	     ( 6  7) */		std	%f2,[%g2-16]
+/* 0x00b8	 113 ( 7  8) */		st	%o0,[%sp+92]
+/* 0x00bc	     (10 11) */		fmovs	%f0,%f2
+/* 0x00c0	     (11 14) */		ld	[%sp+92],%f3
+/* 0x00c4	     (13 16) */		fsubd	%f2,%f0,%f0
+/* 0x00c8	     (13 14) */		std	%f0,[%g2-8]
+/* 0x00cc	     (14 16) */		ret	! Result = 
+/* 0x00d0	     (16 17) */		restore	%g0,%g0,%g0
+                                   .L77000154:		/* frequency 0.7 confidence 0.0 */
+/* 0x00d4	 111 ( 0  3) */		ld	[%o7],%o0
+                                   .L900000410:		/* frequency 6.4 confidence 0.0 */
+/* 0x00d8	 112 ( 0  1) */		and	%o0,%o4,%o1
+/* 0x00dc	     ( 0  1) */		st	%o1,[%sp+96]
+/* 0x00e0	 113 ( 0  1) */		add	%g1,1,%g1
+/* 0x00e4	 112 ( 1  4) */		ldd	[%o3],%f0
+/* 0x00e8	 113 ( 1  2) */		srl	%o0,16,%o0
+/* 0x00ec	     ( 1  2) */		add	%o7,4,%o7
+/* 0x00f0	     ( 2  3) */		cmp	%g1,%o5
+/* 0x00f4	 112 ( 3  4) */		fmovs	%f0,%f2
+/* 0x00f8	     ( 4  7) */		ld	[%sp+96],%f3
+/* 0x00fc	     ( 6  9) */		fsubd	%f2,%f0,%f2
+/* 0x0100	     ( 6  7) */		std	%f2,[%g2]
+/* 0x0104	 113 ( 7  8) */		st	%o0,[%sp+92]
+/* 0x0108	     (10 11) */		fmovs	%f0,%f2
+/* 0x010c	     (11 14) */		ld	[%sp+92],%f3
+/* 0x0110	     (13 16) */		fsubd	%f2,%f0,%f0
+/* 0x0114	     (13 14) */		std	%f0,[%g2+8]
+/* 0x0118	     (13 14) */		add	%g2,16,%g2
+/* 0x011c	     (13 14) */		ble,a,pt	%icc,.L900000410	! tprob=0.86
+/* 0x0120	     (14 17) */		ld	[%o7],%o0
+                                   .L77000150:		/* frequency 1.0 confidence 0.0 */
+/* 0x0124	     ( 0  2) */		ret	! Result = 
+/* 0x0128	     ( 2  3) */		restore	%g0,%g0,%g0
+/* 0x012c	   0 ( 0  0) */		.type	conv_i32_to_d16,2
+/* 0x012c	     ( 0  0) */		.size	conv_i32_to_d16,(.-conv_i32_to_d16)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 ( 0  0) */		.align	8
+!
+! CONSTANT POOL
+!
+                                   .L_const_seg_900000501:		/* frequency 1.0 confidence 0.0 */
+/* 000000	   0 ( 0  0) */		.word	1127219200,0
+/* 0x0008	   0 ( 0  0) */		.align	4
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+                                   	.global conv_i32_to_d32_and_d16
+                                   conv_i32_to_d32_and_d16:		/* frequency 1.0 confidence 0.0 */
+/* 000000	     ( 0  1) */		save	%sp,-104,%sp
+/* 0x0004	     ( 1  2) */		or	%g0,%i3,%i4
+/* 0x0008	     ( 1  2) */		or	%g0,%i2,%g1
+
+!  114		                    !   }
+!  115		                    !}
+!  118		                    !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+!  119		                    !			  double * /* 0 */,
+!  120		                    !			  double * /*result16*/, double * /* result32 */,
+!  121		                    !			  float *  /*source - should be unsigned int*
+!  122		                    !		          	       converted to float* */);
+!  126		                    !void conv_i32_to_d32_and_d16(double *d32, double *d16, 
+!  127		                    !			     unsigned int *i32, int len)
+!  128		                    !{
+!  129		                    !int i;
+!  130		                    !unsigned int a;
+!  132		                    !#pragma pipeloop(0)
+!  133		                    ! for(i=0;i<len-3;i+=4)
+
+/* 0x000c	 133 ( 2  3) */		sub	%i4,3,%g2
+/* 0x0010	     ( 2  3) */		or	%g0,0,%o7
+/* 0x0014	     ( 3  4) */		cmp	%g2,0
+/* 0x0018	 128 ( 3  4) */		or	%g0,%i0,%i3
+/* 0x001c	 133 ( 3  4) */		ble,pt	%icc,.L900000515	! tprob=0.56
+/* 0x0020	     ( 4  5) */		cmp	%o7,%i4
+
+!  134		                    !   {
+!  135		                    !     i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+!  136		                    !			  &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x0024	 136 ( 4  5) */		sethi	%hi(Zero),%g2
+/* 0x0028	 133 ( 5  6) */		or	%g0,%g1,%o3
+/* 0x002c	     ( 5  6) */		sub	%i4,4,%o2
+/* 0x0030	 136 ( 6  7) */		add	%g2,%lo(Zero),%o1
+/* 0x0034	 133 ( 6  7) */		or	%g0,0,%o5
+/* 0x0038	     ( 7  8) */		or	%g0,0,%o4
+/* 0x003c	 136 ( 7  8) */		or	%g0,%o3,%g4
+                                   .L900000514:		/* frequency 6.4 confidence 0.0 */
+/* 0x0040	     ( 0  3) */		ldd	[%o1],%f2
+/* 0x0044	 136 ( 0  1) */		add	%i3,%o5,%g2
+/* 0x0048	     ( 0  1) */		add	%i1,%o4,%g3
+/* 0x004c	     ( 1  4) */		ldd	[%o1-8],%f0
+/* 0x0050	     ( 1  2) */		add	%o7,4,%o7
+/* 0x0054	     ( 1  2) */		add	%o3,16,%o3
+/* 0x0058	     ( 2  3) */		fmovd	%f2,%f14
+/* 0x005c	     ( 2  5) */		ld	[%g4],%f15
+/* 0x0060	     ( 2  3) */		cmp	%o7,%o2
+/* 0x0064	     ( 3  4) */		fmovd	%f2,%f10
+/* 0x0068	     ( 3  6) */		ld	[%g4+4],%f11
+/* 0x006c	     ( 4  5) */		fmovd	%f2,%f6
+/* 0x0070	     ( 4  7) */		ld	[%g4+8],%f7
+/* 0x0074	     ( 5  8) */		ld	[%g4+12],%f3
+/* 0x0078	     ( 5  8) */		fxtod	%f14,%f14
+/* 0x007c	     ( 6  9) */		fxtod	%f10,%f10
+/* 0x0080	     ( 6  9) */		ldd	[%o1-16],%f16
+/* 0x0084	     ( 7 10) */		fxtod	%f6,%f6
+/* 0x0088	     ( 7  8) */		std	%f14,[%i3+%o5]
+/* 0x008c	     ( 7  8) */		add	%o5,32,%o5
+/* 0x0090	     ( 8 11) */		fxtod	%f2,%f2
+/* 0x0094	     ( 8 11) */		fmuld	%f0,%f14,%f12
+/* 0x0098	     ( 8  9) */		std	%f10,[%g2+8]
+/* 0x009c	     ( 9 12) */		fmuld	%f0,%f10,%f8
+/* 0x00a0	     ( 9 10) */		std	%f6,[%g2+16]
+/* 0x00a4	     (10 13) */		fmuld	%f0,%f6,%f4
+/* 0x00a8	     (10 11) */		std	%f2,[%g2+24]
+/* 0x00ac	     (11 14) */		fmuld	%f0,%f2,%f0
+/* 0x00b0	     (11 14) */		fdtox	%f12,%f12
+/* 0x00b4	     (12 15) */		fdtox	%f8,%f8
+/* 0x00b8	     (13 16) */		fdtox	%f4,%f4
+/* 0x00bc	     (14 17) */		fdtox	%f0,%f0
+/* 0x00c0	     (15 18) */		fxtod	%f12,%f12
+/* 0x00c4	     (15 16) */		std	%f12,[%g3+8]
+/* 0x00c8	     (16 19) */		fxtod	%f8,%f8
+/* 0x00cc	     (16 17) */		std	%f8,[%g3+24]
+/* 0x00d0	     (17 20) */		fxtod	%f4,%f4
+/* 0x00d4	     (17 18) */		std	%f4,[%g3+40]
+/* 0x00d8	     (18 21) */		fxtod	%f0,%f0
+/* 0x00dc	     (18 21) */		fmuld	%f12,%f16,%f12
+/* 0x00e0	     (18 19) */		std	%f0,[%g3+56]
+/* 0x00e4	     (19 22) */		fmuld	%f8,%f16,%f8
+/* 0x00e8	     (20 23) */		fmuld	%f4,%f16,%f4
+/* 0x00ec	     (21 24) */		fmuld	%f0,%f16,%f0
+/* 0x00f0	     (21 24) */		fsubd	%f14,%f12,%f12
+/* 0x00f4	     (21 22) */		std	%f12,[%i1+%o4]
+/* 0x00f8	     (22 25) */		fsubd	%f10,%f8,%f8
+/* 0x00fc	     (22 23) */		std	%f8,[%g3+16]
+/* 0x0100	     (22 23) */		add	%o4,64,%o4
+/* 0x0104	     (23 26) */		fsubd	%f6,%f4,%f4
+/* 0x0108	     (23 24) */		std	%f4,[%g3+32]
+/* 0x010c	     (24 27) */		fsubd	%f2,%f0,%f0
+/* 0x0110	     (24 25) */		std	%f0,[%g3+48]
+/* 0x0114	     (24 25) */		ble,pt	%icc,.L900000514	! tprob=0.86
+/* 0x0118	     (25 26) */		or	%g0,%o3,%g4
+                                   .L77000159:		/* frequency 1.0 confidence 0.0 */
+
+!  137		                    !   }
+!  138		                    ! for(;i<len;i++)
+
+/* 0x011c	 138 ( 0  1) */		cmp	%o7,%i4
+                                   .L900000515:		/* frequency 1.0 confidence 0.0 */
+/* 0x0120	 138 ( 0  1) */		bge,pt	%icc,.L77000164	! tprob=0.56
+/* 0x0124	     ( 0  1) */		nop
+
+!  139		                    !   {
+!  140		                    !     a=i32[i];
+!  141		                    !     d32[i]=(double)(i32[i]);
+!  142		                    !     d16[2*i]=(double)(a&0xffff);
+!  143		                    !     d16[2*i+1]=(double)(a>>16);
+
+/* 0x0128	 143 ( 0  1) */		sethi	%hi(.L_const_seg_900000501),%o1
+/* 0x012c	 138 ( 1  2) */		sethi	%hi(0xfc00),%o0
+/* 0x0130	 141 ( 1  4) */		ldd	[%o1+%lo(.L_const_seg_900000501)],%f0
+/* 0x0134	 138 ( 1  2) */		sub	%i4,%o7,%g3
+/* 0x0138	     ( 2  3) */		sll	%o7,2,%g2
+/* 0x013c	     ( 2  3) */		add	%o0,1023,%o3
+/* 0x0140	     ( 3  4) */		sll	%o7,3,%g4
+/* 0x0144	     ( 3  4) */		cmp	%g3,3
+/* 0x0148	     ( 4  5) */		add	%g1,%g2,%o0
+/* 0x014c	     ( 4  5) */		add	%o1,%lo(.L_const_seg_900000501),%o2
+/* 0x0150	     ( 5  6) */		add	%i3,%g4,%o4
+/* 0x0154	     ( 5  6) */		sub	%i4,1,%o1
+/* 0x0158	     ( 6  7) */		sll	%o7,4,%g5
+/* 0x015c	     ( 6  7) */		bl,pn	%icc,.L77000161	! tprob=0.44
+/* 0x0160	     ( 7  8) */		add	%i1,%g5,%o5
+/* 0x0164	 141 ( 7 10) */		ld	[%g1+%g2],%f3
+/* 0x0168	 143 ( 7  8) */		add	%o4,8,%o4
+/* 0x016c	 140 ( 8 11) */		ld	[%g1+%g2],%g1
+/* 0x0170	 143 ( 8  9) */		add	%o5,16,%o5
+/* 0x0174	     ( 8  9) */		add	%o7,1,%o7
+/* 0x0178	 141 ( 9 10) */		fmovs	%f0,%f2
+/* 0x017c	 143 ( 9 10) */		add	%o0,4,%o0
+/* 0x0180	 142 (10 11) */		and	%g1,%o3,%g2
+/* 0x0184	 141 (11 14) */		fsubd	%f2,%f0,%f2
+/* 0x0188	     (11 12) */		std	%f2,[%o4-8]
+/* 0x018c	 143 (11 12) */		srl	%g1,16,%g1
+/* 0x0190	 142 (12 13) */		st	%g2,[%sp+96]
+/* 0x0194	     (15 16) */		fmovs	%f0,%f2
+/* 0x0198	     (16 19) */		ld	[%sp+96],%f3
+/* 0x019c	     (18 21) */		fsubd	%f2,%f0,%f2
+/* 0x01a0	     (18 19) */		std	%f2,[%o5-16]
+/* 0x01a4	 143 (19 20) */		st	%g1,[%sp+92]
+/* 0x01a8	     (22 23) */		fmovs	%f0,%f2
+/* 0x01ac	     (23 26) */		ld	[%sp+92],%f3
+/* 0x01b0	     (25 28) */		fsubd	%f2,%f0,%f2
+/* 0x01b4	     (25 26) */		std	%f2,[%o5-8]
+                                   .L900000509:		/* frequency 64.0 confidence 0.0 */
+/* 0x01b8	 141 (26 28) */		ld	[%o0],%f3
+/* 0x01bc	 143 (26 27) */		add	%o7,2,%o7
+/* 0x01c0	     (26 27) */		add	%o5,32,%o5
+/* 0x01c4	 140 (27 29) */		ld	[%o0],%g1
+/* 0x01c8	 143 (27 27) */		cmp	%o7,%o1
+/* 0x01cc	     (27 28) */		add	%o4,16,%o4
+/* 0x01d0	 141 ( 0  0) */		fmovs	%f0,%f2
+/* 0x01d4	     (28 31) */		fsubd	%f2,%f0,%f2
+/* 0x01d8	     (29 30) */		std	%f2,[%o4-16]
+/* 0x01dc	 142 (29 30) */		and	%g1,%o3,%g2
+/* 0x01e0	     (30 31) */		st	%g2,[%sp+96]
+/* 0x01e4	     (37 39) */		ld	[%sp+96],%f3
+/* 0x01e8	     ( 0  0) */		fmovs	%f0,%f2
+/* 0x01ec	     (39 42) */		fsubd	%f2,%f0,%f2
+/* 0x01f0	 143 (40 41) */		srl	%g1,16,%g1
+/* 0x01f4	 142 (40 41) */		std	%f2,[%o5-32]
+/* 0x01f8	 143 (41 42) */		st	%g1,[%sp+92]
+/* 0x01fc	     (48 50) */		ld	[%sp+92],%f3
+/* 0x0200	     ( 0  0) */		fmovs	%f0,%f2
+/* 0x0204	     (50 53) */		fsubd	%f2,%f0,%f2
+/* 0x0208	     (51 52) */		std	%f2,[%o5-24]
+/* 0x020c	     (51 52) */		add	%o0,4,%o0
+/* 0x0210	 141 (52 54) */		ld	[%o0],%f3
+/* 0x0214	 140 (53 55) */		ld	[%o0],%g1
+/* 0x0218	 141 ( 0  0) */		fmovs	%f0,%f2
+/* 0x021c	     (54 57) */		fsubd	%f2,%f0,%f2
+/* 0x0220	     (55 56) */		std	%f2,[%o4-8]
+/* 0x0224	 142 (55 56) */		and	%g1,%o3,%g2
+/* 0x0228	     (56 57) */		st	%g2,[%sp+96]
+/* 0x022c	     (63 65) */		ld	[%sp+96],%f3
+/* 0x0230	     ( 0  0) */		fmovs	%f0,%f2
+/* 0x0234	     (65 68) */		fsubd	%f2,%f0,%f2
+/* 0x0238	 143 (66 67) */		srl	%g1,16,%g1
+/* 0x023c	 142 (66 67) */		std	%f2,[%o5-16]
+/* 0x0240	 143 (67 68) */		st	%g1,[%sp+92]
+/* 0x0244	     (74 76) */		ld	[%sp+92],%f3
+/* 0x0248	     ( 0  0) */		fmovs	%f0,%f2
+/* 0x024c	     (76 79) */		fsubd	%f2,%f0,%f2
+/* 0x0250	     (77 78) */		std	%f2,[%o5-8]
+/* 0x0254	     (77 78) */		bl,pt	%icc,.L900000509	! tprob=0.50
+/* 0x0258	     (77 78) */		add	%o0,4,%o0
+                                   .L900000512:		/* frequency 8.0 confidence 0.0 */
+/* 0x025c	 143 ( 0  1) */		cmp	%o7,%i4
+/* 0x0260	     ( 0  1) */		bge,pn	%icc,.L77000164	! tprob=0.14
+/* 0x0264	     ( 0  1) */		nop
+                                   .L77000161:		/* frequency 0.7 confidence 0.0 */
+/* 0x0268	 141 ( 0  3) */		ld	[%o0],%f3
+                                   .L900000513:		/* frequency 6.4 confidence 0.0 */
+/* 0x026c	 141 ( 0  3) */		ldd	[%o2],%f0
+/* 0x0270	 143 ( 0  1) */		add	%o7,1,%o7
+/* 0x0274	 140 ( 1  4) */		ld	[%o0],%o1
+/* 0x0278	 143 ( 1  2) */		add	%o0,4,%o0
+/* 0x027c	     ( 1  2) */		cmp	%o7,%i4
+/* 0x0280	 141 ( 2  3) */		fmovs	%f0,%f2
+/* 0x0284	 142 ( 3  4) */		and	%o1,%o3,%g1
+/* 0x0288	 141 ( 4  7) */		fsubd	%f2,%f0,%f2
+/* 0x028c	     ( 4  5) */		std	%f2,[%o4]
+/* 0x0290	 143 ( 4  5) */		srl	%o1,16,%o1
+/* 0x0294	 142 ( 5  6) */		st	%g1,[%sp+96]
+/* 0x0298	 143 ( 5  6) */		add	%o4,8,%o4
+/* 0x029c	 142 ( 8  9) */		fmovs	%f0,%f2
+/* 0x02a0	     ( 9 12) */		ld	[%sp+96],%f3
+/* 0x02a4	     (11 14) */		fsubd	%f2,%f0,%f2
+/* 0x02a8	     (11 12) */		std	%f2,[%o5]
+/* 0x02ac	 143 (12 13) */		st	%o1,[%sp+92]
+/* 0x02b0	     (15 16) */		fmovs	%f0,%f2
+/* 0x02b4	     (16 19) */		ld	[%sp+92],%f3
+/* 0x02b8	     (18 21) */		fsubd	%f2,%f0,%f0
+/* 0x02bc	     (18 19) */		std	%f0,[%o5+8]
+/* 0x02c0	     (18 19) */		add	%o5,16,%o5
+/* 0x02c4	     (18 19) */		bl,a,pt	%icc,.L900000513	! tprob=0.86
+/* 0x02c8	     (19 22) */		ld	[%o0],%f3
+                                   .L77000164:		/* frequency 1.0 confidence 0.0 */
+/* 0x02cc	     ( 0  2) */		ret	! Result = 
+/* 0x02d0	     ( 2  3) */		restore	%g0,%g0,%g0
+/* 0x02d4	   0 ( 0  0) */		.type	conv_i32_to_d32_and_d16,2
+/* 0x02d4	     ( 0  0) */		.size	conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 ( 0  0) */		.align	4
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+                                   	.global adjust_montf_result
+                                   adjust_montf_result:		/* frequency 1.0 confidence 0.0 */
+
+!  144		                    !   }
+!  145		                    !}
+!  148		                    !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+!  149		                    !{
+!  150		                    !long long acc;
+!  151		                    !int i;
+!  153		                    ! if(i32[len]>0) i=-1;
+
+/* 000000	 153 ( 0  1) */		sll	%o2,2,%g1
+/* 0x0004	     ( 0  1) */		or	%g0,-1,%g3
+/* 0x0008	     ( 1  4) */		ld	[%o0+%g1],%g1
+/* 0x000c	     ( 3  4) */		cmp	%g1,0
+/* 0x0010	     ( 3  4) */		bleu,pn	%icc,.L77000175	! tprob=0.50
+/* 0x0014	     ( 3  4) */		or	%g0,%o1,%o3
+/* 0x0018	     ( 4  5) */		ba	.L900000611	! tprob=1.00
+/* 0x001c	     ( 4  5) */		cmp	%g3,0
+                                   .L77000175:		/* frequency 0.8 confidence 0.0 */
+
+!  154		                    ! else
+!  155		                    !   {
+!  156		                    !     for(i=len-1; i>=0; i++)
+
+/* 0x0020	 156 ( 0  1) */		subcc	%o2,1,%g3
+/* 0x0024	     ( 0  1) */		bneg,pt	%icc,.L900000611	! tprob=0.60
+/* 0x0028	     ( 1  2) */		cmp	%g3,0
+/* 0x002c	     ( 1  2) */		sll	%g3,2,%g1
+/* 0x0030	     ( 2  3) */		add	%o0,%g1,%g2
+/* 0x0034	     ( 2  3) */		add	%o1,%g1,%g1
+
+!  157		                    !       {
+!  158		                    !	 if(i32[i]!=nint[i]) break;
+
+/* 0x0038	 158 ( 3  6) */		ld	[%g1],%g5
+                                   .L900000610:		/* frequency 5.3 confidence 0.0 */
+/* 0x003c	 158 ( 0  3) */		ld	[%g2],%o5
+/* 0x0040	     ( 0  1) */		add	%g1,4,%g1
+/* 0x0044	     ( 0  1) */		add	%g2,4,%g2
+/* 0x0048	     ( 2  3) */		cmp	%o5,%g5
+/* 0x004c	     ( 2  3) */		bne,pn	%icc,.L77000182	! tprob=0.16
+/* 0x0050	     ( 2  3) */		nop
+/* 0x0054	     ( 3  4) */		addcc	%g3,1,%g3
+/* 0x0058	     ( 3  4) */		bpos,a,pt	%icc,.L900000610	! tprob=0.84
+/* 0x005c	     ( 3  6) */		ld	[%g1],%g5
+                                   .L77000182:		/* frequency 1.0 confidence 0.0 */
+
+!  159		                    !       }
+!  160		                    !   }
+!  161		                    ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x0060	 161 ( 0  1) */		cmp	%g3,0
+                                   .L900000611:		/* frequency 1.0 confidence 0.0 */
+/* 0x0064	 161 ( 0  1) */		bl,pn	%icc,.L77000198	! tprob=0.50
+/* 0x0068	     ( 0  1) */		sll	%g3,2,%g2
+/* 0x006c	     ( 1  4) */		ld	[%o1+%g2],%g1
+/* 0x0070	     ( 2  5) */		ld	[%o0+%g2],%g2
+/* 0x0074	     ( 4  5) */		cmp	%g2,%g1
+/* 0x0078	     ( 4  5) */		bleu,pt	%icc,.L77000191	! tprob=0.56
+/* 0x007c	     ( 4  5) */		nop
+                                   .L77000198:		/* frequency 0.8 confidence 0.0 */
+
+!  162		                    !   {
+!  163		                    !     acc=0;
+!  164		                    !     for(i=0;i<len;i++)
+
+/* 0x0080	 164 ( 0  1) */		cmp	%o2,0
+/* 0x0084	     ( 0  1) */		ble,pt	%icc,.L77000191	! tprob=0.60
+/* 0x0088	     ( 0  1) */		nop
+/* 0x008c	 161 ( 1  2) */		or	%g0,-1,%g2
+/* 0x0090	     ( 1  2) */		sub	%o2,1,%g4
+/* 0x0094	     ( 2  3) */		srl	%g2,0,%g3
+/* 0x0098	 163 ( 2  3) */		or	%g0,0,%g5
+/* 0x009c	 164 ( 3  4) */		or	%g0,0,%o5
+/* 0x00a0	 161 ( 3  4) */		or	%g0,%o0,%o4
+/* 0x00a4	     ( 4  5) */		cmp	%o2,3
+/* 0x00a8	     ( 4  5) */		add	%o1,4,%g2
+/* 0x00ac	 164 ( 4  5) */		bl,pn	%icc,.L77000199	! tprob=0.40
+/* 0x00b0	     ( 5  6) */		add	%o0,8,%g1
+
+!  165		                    !       {
+!  166		                    !	 acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00b4	 166 ( 5  8) */		ld	[%o0],%o2
+/* 0x00b8	   0 ( 5  6) */		or	%g0,%g2,%o3
+/* 0x00bc	 166 ( 6  9) */		ld	[%o1],%o1
+/* 0x00c0	   0 ( 6  7) */		or	%g0,%g1,%o4
+
+!  167		                    !	 i32[i]=acc&0xffffffff;
+!  168		                    !	 acc=acc>>32;
+
+/* 0x00c4	 168 ( 6  7) */		or	%g0,2,%o5
+/* 0x00c8	 166 ( 7 10) */		ld	[%o0+4],%g1
+/* 0x00cc	 164 ( 8  9) */		sub	%o2,%o1,%o2
+/* 0x00d0	     ( 9 10) */		or	%g0,%o2,%g5
+/* 0x00d4	 167 ( 9 10) */		and	%o2,%g3,%o2
+/* 0x00d8	     ( 9 10) */		st	%o2,[%o0]
+/* 0x00dc	 168 (10 11) */		srax	%g5,32,%g5
+                                   .L900000605:		/* frequency 64.0 confidence 0.0 */
+/* 0x00e0	 166 (12 20) */		ld	[%o3],%o2
+/* 0x00e4	 168 (12 13) */		add	%o5,1,%o5
+/* 0x00e8	     (12 13) */		add	%o3,4,%o3
+/* 0x00ec	     (13 13) */		cmp	%o5,%g4
+/* 0x00f0	     (13 14) */		add	%o4,4,%o4
+/* 0x00f4	 164 (14 14) */		sub	%g1,%o2,%g1
+/* 0x00f8	     (15 15) */		add	%g1,%g5,%g5
+/* 0x00fc	 167 (16 17) */		and	%g5,%g3,%o2
+/* 0x0100	 166 (16 24) */		ld	[%o4-4],%g1
+/* 0x0104	 167 (17 18) */		st	%o2,[%o4-8]
+/* 0x0108	 168 (17 18) */		ble,pt	%icc,.L900000605	! tprob=0.50
+/* 0x010c	     (17 18) */		srax	%g5,32,%g5
+                                   .L900000608:		/* frequency 8.0 confidence 0.0 */
+/* 0x0110	 166 ( 0  3) */		ld	[%o3],%g2
+/* 0x0114	 164 ( 2  3) */		sub	%g1,%g2,%g1
+/* 0x0118	     ( 3  4) */		add	%g1,%g5,%g1
+/* 0x011c	 167 ( 4  5) */		and	%g1,%g3,%g2
+/* 0x0120	     ( 5  7) */		retl	! Result = 
+/* 0x0124	     ( 6  7) */		st	%g2,[%o4-4]
+                                   .L77000199:		/* frequency 0.6 confidence 0.0 */
+/* 0x0128	 166 ( 0  3) */		ld	[%o4],%g1
+                                   .L900000609:		/* frequency 5.3 confidence 0.0 */
+/* 0x012c	 166 ( 0  3) */		ld	[%o3],%g2
+/* 0x0130	     ( 0  1) */		add	%g5,%g1,%g1
+/* 0x0134	 168 ( 0  1) */		add	%o5,1,%o5
+/* 0x0138	     ( 1  2) */		add	%o3,4,%o3
+/* 0x013c	     ( 1  2) */		cmp	%o5,%g4
+/* 0x0140	 166 ( 2  3) */		sub	%g1,%g2,%g1
+/* 0x0144	 167 ( 3  4) */		and	%g1,%g3,%g2
+/* 0x0148	     ( 3  4) */		st	%g2,[%o4]
+/* 0x014c	 168 ( 3  4) */		add	%o4,4,%o4
+/* 0x0150	     ( 4  5) */		srax	%g1,32,%g5
+/* 0x0154	     ( 4  5) */		ble,a,pt	%icc,.L900000609	! tprob=0.84
+/* 0x0158	     ( 4  7) */		ld	[%o4],%g1
+                                   .L77000191:		/* frequency 1.0 confidence 0.0 */
+/* 0x015c	     ( 0  2) */		retl	! Result = 
+/* 0x0160	     ( 1  2) */		nop
+/* 0x0164	   0 ( 0  0) */		.type	adjust_montf_result,2
+/* 0x0164	     ( 0  0) */		.size	adjust_montf_result,(.-adjust_montf_result)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 ( 0  0) */		.align	32
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+                                   	.global mont_mulf_noconv
+                                   mont_mulf_noconv:		/* frequency 1.0 confidence 0.0 */
+/* 000000	     ( 0  1) */		save	%sp,-144,%sp
+/* 0x0004	     ( 1  2) */		st	%i0,[%fp+68]
+
+!  169		                    !       }
+!  170		                    !   }
+!  171		                    !}
+!  175		                    !void cleanup(double *dt, int from, int tlen);
+!  177		                    !/*
+!  178		                    !** the lengths of the input arrays should be at least the following:
+!  179		                    !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+!  180		                    !** all of them should be different from one another
+!  181		                    !**
+!  182		                    !*/
+!  183		                    !void mont_mulf_noconv(unsigned int *result,
+!  184		                    !		     double *dm1, double *dm2, double *dt,
+!  185		                    !		     double *dn, unsigned int *nint,
+!  186		                    !		     int nlen, double dn0)
+!  187		                    !{
+!  188		                    ! int i, j, jj;
+!  189		                    ! int tmp;
+!  190		                    ! double digit, m2j, nextm2j, a, b;
+!  191		                    ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+!  193		                    ! pdm1=&(dm1[0]);
+!  194		                    ! pdm2=&(dm2[0]);
+!  195		                    ! pdn=&(dn[0]);
+!  196		                    ! pdm2[2*nlen]=Zero;
+
+/* 0x0008	 196 ( 1  2) */		sethi	%hi(Zero),%g2
+/* 0x000c	 187 ( 1  2) */		or	%g0,%i2,%o1
+/* 0x0010	     ( 2  3) */		st	%i5,[%fp+88]
+/* 0x0014	     ( 2  3) */		or	%g0,%i3,%o2
+/* 0x0018	 196 ( 2  3) */		add	%g2,%lo(Zero),%g4
+/* 0x001c	     ( 3  6) */		ldd	[%g2+%lo(Zero)],%f2
+/* 0x0020	 187 ( 3  4) */		or	%g0,%o2,%g5
+/* 0x0024	 196 ( 3  4) */		or	%g0,%o1,%i0
+/* 0x0028	 187 ( 4  5) */		or	%g0,%i4,%i2
+
+!  198		                    ! if (nlen!=16)
+!  199		                    !   {
+!  200		                    !     for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+!  202		                    !     a=dt[0]=pdm1[0]*pdm2[0];
+!  203		                    !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+!  205		                    !     pdtj=&(dt[0]);
+!  206		                    !     for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+!  207		                    !       {
+!  208		                    !	 m2j=pdm2[j];
+!  209		                    !	 a=pdtj[0]+pdn[0]*digit;
+!  210		                    !	 b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+!  211		                    !	 pdtj[1]=b;
+!  213		                    !#pragma pipeloop(0)
+!  214		                    !	 for(i=1;i<nlen;i++)
+!  215		                    !	   {
+!  216		                    !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+!  217		                    !	   }
+!  218		                    ! 	 if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+!  219		                    !	 
+!  220		                    !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+!  221		                    !       }
+!  222		                    !   }
+!  223		                    ! else
+!  224		                    !   {
+!  225		                    !     a=dt[0]=pdm1[0]*pdm2[0];
+!  227		                    !     dt[65]=     dt[64]=     dt[63]=     dt[62]=     dt[61]=     dt[60]=
+!  228		                    !     dt[59]=     dt[58]=     dt[57]=     dt[56]=     dt[55]=     dt[54]=
+!  229		                    !     dt[53]=     dt[52]=     dt[51]=     dt[50]=     dt[49]=     dt[48]=
+!  230		                    !     dt[47]=     dt[46]=     dt[45]=     dt[44]=     dt[43]=     dt[42]=
+!  231		                    !     dt[41]=     dt[40]=     dt[39]=     dt[38]=     dt[37]=     dt[36]=
+!  232		                    !     dt[35]=     dt[34]=     dt[33]=     dt[32]=     dt[31]=     dt[30]=
+!  233		                    !     dt[29]=     dt[28]=     dt[27]=     dt[26]=     dt[25]=     dt[24]=
+!  234		                    !     dt[23]=     dt[22]=     dt[21]=     dt[20]=     dt[19]=     dt[18]=
+!  235		                    !     dt[17]=     dt[16]=     dt[15]=     dt[14]=     dt[13]=     dt[12]=
+!  236		                    !     dt[11]=     dt[10]=     dt[ 9]=     dt[ 8]=     dt[ 7]=     dt[ 6]=
+!  237		                    !     dt[ 5]=     dt[ 4]=     dt[ 3]=     dt[ 2]=     dt[ 1]=Zero;
+!  239		                    !     pdn_0=pdn[0];
+!  240		                    !     pdm1_0=pdm1[0];
+!  242		                    !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+!  243		                    !     pdtj=&(dt[0]);
+!  245		                    !     for(j=0;j<32;j++,pdtj++)
+!  246		                    !       {
+!  248		                    !	 m2j=pdm2[j];
+!  249		                    !	 a=pdtj[0]+pdn_0*digit;
+!  250		                    !	 b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+!  251		                    !	 pdtj[1]=b;
+!  253		                    !	 /**** this loop will be fully unrolled:
+!  254		                    !	 for(i=1;i<16;i++)
+!  255		                    !	   {
+!  256		                    !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+!  257		                    !	   }
+!  258		                    !	 *************************************/
+!  259		                    !	     pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+!  260		                    !	     pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+!  261		                    !	     pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+!  262		                    !	     pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+!  263		                    !	     pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+!  264		                    !	     pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+!  265		                    !	     pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+!  266		                    !	     pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+!  267		                    !	     pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+!  268		                    !	     pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+!  269		                    !	     pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+!  270		                    !	     pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+!  271		                    !	     pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+!  272		                    !	     pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+!  273		                    !	     pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+!  274		                    !	 /* no need for cleenup, cannot overflow */
+!  275		                    !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+!  276		                    !       }
+!  277		                    !   }
+!  279		                    ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+!  281		                    ! adjust_montf_result(result,nint,nlen); 
+
+/* 0x002c	 281 ( 4  5) */		or	%g0,1,%o4
+/* 0x0030	 187 ( 6  9) */		ldd	[%fp+96],%f0
+/* 0x0034	 196 ( 7 10) */		ld	[%fp+92],%o0
+/* 0x0038	 187 ( 8  9) */		fmovd	%f0,%f16
+/* 0x003c	 196 ( 9 10) */		sll	%o0,4,%g2
+/* 0x0040	     ( 9 10) */		or	%g0,%o0,%g1
+/* 0x0044	 198 (10 11) */		cmp	%o0,16
+/* 0x0048	     (10 11) */		be,pn	%icc,.L77000289	! tprob=0.50
+/* 0x004c	     (10 11) */		std	%f2,[%o1+%g2]
+/* 0x0050	 200 (11 12) */		sll	%o0,2,%g2
+/* 0x0054	     (11 14) */		ldd	[%g4],%f2
+/* 0x0058	     (12 13) */		add	%g2,2,%o1
+/* 0x005c	     (12 13) */		add	%g2,1,%o3
+/* 0x0060	 196 (13 14) */		sll	%o0,1,%o7
+/* 0x0064	 200 (13 14) */		cmp	%o1,0
+/* 0x0068	     (13 14) */		ble,a,pt	%icc,.L900000755	! tprob=0.55
+/* 0x006c	     (14 17) */		ldd	[%i1],%f0
+/* 0x0070	     (14 15) */		cmp	%o1,3
+/* 0x0074	 281 (14 15) */		or	%g0,1,%o1
+/* 0x0078	     (14 15) */		bl,pn	%icc,.L77000279	! tprob=0.40
+/* 0x007c	     (15 16) */		add	%o2,8,%o0
+/* 0x0080	     (15 16) */		std	%f2,[%g5]
+/* 0x0084	   0 (16 17) */		or	%g0,%o0,%o2
+                                   .L900000726:		/* frequency 64.0 confidence 0.0 */
+/* 0x0088	     ( 3  5) */		ldd	[%g4],%f0
+/* 0x008c	     ( 3  4) */		add	%o4,1,%o4
+/* 0x0090	     ( 3  4) */		add	%o2,8,%o2
+/* 0x0094	     ( 4  4) */		cmp	%o4,%o3
+/* 0x0098	     ( 5  6) */		ble,pt	%icc,.L900000726	! tprob=0.50
+/* 0x009c	     ( 5  6) */		std	%f0,[%o2-8]
+                                   .L900000729:		/* frequency 8.0 confidence 0.0 */
+/* 0x00a0	     ( 0  1) */		ba	.L900000755	! tprob=1.00
+/* 0x00a4	     ( 0  3) */		ldd	[%i1],%f0
+                                   .L77000279:		/* frequency 0.6 confidence 0.0 */
+/* 0x00a8	     ( 0  1) */		std	%f2,[%o2]
+                                   .L900000754:		/* frequency 5.3 confidence 0.0 */
+/* 0x00ac	     ( 0  3) */		ldd	[%g4],%f2
+/* 0x00b0	     ( 0  1) */		cmp	%o1,%o3
+/* 0x00b4	     ( 0  1) */		add	%o2,8,%o2
+/* 0x00b8	     ( 1  2) */		add	%o1,1,%o1
+/* 0x00bc	     ( 1  2) */		ble,a,pt	%icc,.L900000754	! tprob=0.87
+/* 0x00c0	     ( 3  4) */		std	%f2,[%o2]
+                                   .L77000284:		/* frequency 0.8 confidence 0.0 */
+/* 0x00c4	 202 ( 0  3) */		ldd	[%i1],%f0
+                                   .L900000755:		/* frequency 0.8 confidence 0.0 */
+/* 0x00c8	 202 ( 0  3) */		ldd	[%i0],%f2
+/* 0x00cc	     ( 0  1) */		add	%o7,1,%o2
+/* 0x00d0	 206 ( 0  1) */		cmp	%o7,0
+/* 0x00d4	     ( 1  2) */		sll	%o2,1,%o0
+/* 0x00d8	     ( 1  2) */		sub	%o7,1,%o1
+/* 0x00dc	 202 ( 2  5) */		fmuld	%f0,%f2,%f0
+/* 0x00e0	     ( 2  3) */		std	%f0,[%g5]
+/* 0x00e4	     ( 2  3) */		sub	%g1,1,%o7
+/* 0x00e8	     ( 3  6) */		ldd	[%g4],%f6
+/* 0x00ec	   0 ( 3  4) */		or	%g0,%o7,%g3
+/* 0x00f0	     ( 3  4) */		or	%g0,0,%l0
+/* 0x00f4	     ( 4  7) */		ldd	[%g4-8],%f2
+/* 0x00f8	     ( 4  5) */		or	%g0,0,%i5
+/* 0x00fc	     ( 4  5) */		or	%g0,%o1,%o5
+/* 0x0100	     ( 5  8) */		fdtox	%f0,%f0
+/* 0x0104	     ( 5  8) */		ldd	[%g4-16],%f4
+/* 0x0108	     ( 5  6) */		or	%g0,%o0,%o3
+/* 0x010c	 210 ( 6  7) */		add	%i0,8,%o4
+/* 0x0110	     ( 6  7) */		or	%g0,0,%i4
+/* 0x0114	     ( 9 10) */		fmovs	%f6,%f0
+/* 0x0118	     (11 14) */		fxtod	%f0,%f0
+/* 0x011c	 203 (14 17) */		fmuld	%f0,%f16,%f0
+/* 0x0120	     (17 20) */		fmuld	%f0,%f2,%f2
+/* 0x0124	     (20 23) */		fdtox	%f2,%f2
+/* 0x0128	     (23 26) */		fxtod	%f2,%f2
+/* 0x012c	     (26 29) */		fmuld	%f2,%f4,%f2
+/* 0x0130	     (29 32) */		fsubd	%f0,%f2,%f22
+/* 0x0134	 206 (29 30) */		ble,pt	%icc,.L900000748	! tprob=0.60
+/* 0x0138	     (29 30) */		sll	%g1,4,%g2
+/* 0x013c	 210 (30 33) */		ldd	[%i2],%f0
+                                   .L900000749:		/* frequency 5.3 confidence 0.0 */
+/* 0x0140	 210 ( 0  3) */		fmuld	%f0,%f22,%f8
+/* 0x0144	     ( 0  3) */		ldd	[%i1],%f0
+/* 0x0148	 214 ( 0  1) */		cmp	%g1,1
+/* 0x014c	 210 ( 1  4) */		ldd	[%o4+%i4],%f6
+/* 0x0150	     ( 1  2) */		add	%i1,8,%o0
+/* 0x0154	 214 ( 1  2) */		or	%g0,1,%o1
+/* 0x0158	 210 ( 2  5) */		ldd	[%i3],%f2
+/* 0x015c	     ( 2  3) */		add	%i3,16,%l1
+/* 0x0160	     ( 3  6) */		fmuld	%f0,%f6,%f6
+/* 0x0164	     ( 3  6) */		ldd	[%g4-8],%f4
+/* 0x0168	     ( 4  7) */		faddd	%f2,%f8,%f2
+/* 0x016c	     ( 4  7) */		ldd	[%i3+8],%f0
+/* 0x0170	 208 ( 5  8) */		ldd	[%i0+%i4],%f20
+/* 0x0174	 210 ( 6  9) */		faddd	%f0,%f6,%f0
+/* 0x0178	     ( 7 10) */		fmuld	%f2,%f4,%f2
+/* 0x017c	     (10 13) */		faddd	%f0,%f2,%f18
+/* 0x0180	 211 (10 11) */		std	%f18,[%i3+8]
+/* 0x0184	 214 (10 11) */		ble,pt	%icc,.L900000753	! tprob=0.54
+/* 0x0188	     (11 12) */		srl	%i5,31,%g2
+/* 0x018c	     (11 12) */		cmp	%g3,7
+/* 0x0190	 210 (12 13) */		add	%i2,8,%g2
+/* 0x0194	 214 (12 13) */		bl,pn	%icc,.L77000281	! tprob=0.36
+/* 0x0198	     (13 14) */		add	%g2,24,%o2
+/* 0x019c	 216 (13 16) */		ldd	[%o0+16],%f14
+/* 0x01a0	     (13 14) */		add	%i3,48,%l1
+/* 0x01a4	     (14 17) */		ldd	[%o0+24],%f12
+/* 0x01a8	   0 (14 15) */		or	%g0,%o2,%g2
+/* 0x01ac	 214 (14 15) */		sub	%g1,3,%o2
+/* 0x01b0	 216 (15 18) */		ldd	[%o0],%f2
+/* 0x01b4	     (15 16) */		or	%g0,5,%o1
+/* 0x01b8	     (16 19) */		ldd	[%g2-24],%f0
+/* 0x01bc	     (17 20) */		ldd	[%o0+8],%f6
+/* 0x01c0	     (17 20) */		fmuld	%f2,%f20,%f2
+/* 0x01c4	     (17 18) */		add	%o0,32,%o0
+/* 0x01c8	     (18 21) */		ldd	[%g2-16],%f8
+/* 0x01cc	     (18 21) */		fmuld	%f0,%f22,%f4
+/* 0x01d0	     (19 22) */		ldd	[%i3+16],%f0
+/* 0x01d4	     (19 22) */		fmuld	%f6,%f20,%f10
+/* 0x01d8	     (20 23) */		ldd	[%g2-8],%f6
+/* 0x01dc	     (21 24) */		faddd	%f2,%f4,%f4
+/* 0x01e0	     (21 24) */		ldd	[%i3+32],%f2
+                                   .L900000738:		/* frequency 512.0 confidence 0.0 */
+/* 0x01e4	 216 (16 24) */		ldd	[%g2],%f24
+/* 0x01e8	     (16 17) */		add	%o1,3,%o1
+/* 0x01ec	     (16 17) */		add	%g2,24,%g2
+/* 0x01f0	     (16 19) */		fmuld	%f8,%f22,%f8
+/* 0x01f4	     (17 25) */		ldd	[%l1],%f28
+/* 0x01f8	     (17 17) */		cmp	%o1,%o2
+/* 0x01fc	     (17 18) */		add	%o0,24,%o0
+/* 0x0200	     (18 26) */		ldd	[%o0-24],%f26
+/* 0x0204	     (18 21) */		faddd	%f0,%f4,%f0
+/* 0x0208	     (18 19) */		add	%l1,48,%l1
+/* 0x020c	     (19 22) */		faddd	%f10,%f8,%f10
+/* 0x0210	     (19 22) */		fmuld	%f14,%f20,%f4
+/* 0x0214	     (19 20) */		std	%f0,[%l1-80]
+/* 0x0218	     (20 28) */		ldd	[%g2-16],%f8
+/* 0x021c	     (20 23) */		fmuld	%f6,%f22,%f6
+/* 0x0220	     (21 29) */		ldd	[%l1-32],%f0
+/* 0x0224	     (22 30) */		ldd	[%o0-16],%f14
+/* 0x0228	     (22 25) */		faddd	%f2,%f10,%f2
+/* 0x022c	     (23 26) */		faddd	%f4,%f6,%f10
+/* 0x0230	     (23 26) */		fmuld	%f12,%f20,%f4
+/* 0x0234	     (23 24) */		std	%f2,[%l1-64]
+/* 0x0238	     (24 32) */		ldd	[%g2-8],%f6
+/* 0x023c	     (24 27) */		fmuld	%f24,%f22,%f24
+/* 0x0240	     (25 33) */		ldd	[%l1-16],%f2
+/* 0x0244	     (26 34) */		ldd	[%o0-8],%f12
+/* 0x0248	     (26 29) */		faddd	%f28,%f10,%f10
+/* 0x024c	     (27 28) */		std	%f10,[%l1-48]
+/* 0x0250	     (27 30) */		fmuld	%f26,%f20,%f10
+/* 0x0254	     (27 28) */		ble,pt	%icc,.L900000738	! tprob=0.50
+/* 0x0258	     (27 30) */		faddd	%f4,%f24,%f4
+                                   .L900000741:		/* frequency 64.0 confidence 0.0 */
+/* 0x025c	 216 ( 0  3) */		fmuld	%f8,%f22,%f28
+/* 0x0260	     ( 0  3) */		ldd	[%g2],%f24
+/* 0x0264	     ( 0  3) */		faddd	%f0,%f4,%f26
+/* 0x0268	     ( 1  4) */		fmuld	%f12,%f20,%f8
+/* 0x026c	     ( 1  2) */		add	%l1,32,%l1
+/* 0x0270	     ( 1  2) */		cmp	%o1,%g3
+/* 0x0274	     ( 2  5) */		fmuld	%f14,%f20,%f14
+/* 0x0278	     ( 2  5) */		ldd	[%l1-32],%f4
+/* 0x027c	     ( 2  3) */		add	%g2,8,%g2
+/* 0x0280	     ( 3  6) */		faddd	%f10,%f28,%f12
+/* 0x0284	     ( 3  6) */		fmuld	%f6,%f22,%f6
+/* 0x0288	     ( 3  6) */		ldd	[%l1-16],%f0
+/* 0x028c	     ( 4  7) */		fmuld	%f24,%f22,%f10
+/* 0x0290	     ( 4  5) */		std	%f26,[%l1-64]
+/* 0x0294	     ( 6  9) */		faddd	%f2,%f12,%f2
+/* 0x0298	     ( 6  7) */		std	%f2,[%l1-48]
+/* 0x029c	     ( 7 10) */		faddd	%f14,%f6,%f6
+/* 0x02a0	     ( 8 11) */		faddd	%f8,%f10,%f2
+/* 0x02a4	     (10 13) */		faddd	%f4,%f6,%f4
+/* 0x02a8	     (10 11) */		std	%f4,[%l1-32]
+/* 0x02ac	     (11 14) */		faddd	%f0,%f2,%f0
+/* 0x02b0	     (11 12) */		bg,pn	%icc,.L77000213	! tprob=0.13
+/* 0x02b4	     (11 12) */		std	%f0,[%l1-16]
+                                   .L77000281:		/* frequency 4.0 confidence 0.0 */
+/* 0x02b8	 216 ( 0  3) */		ldd	[%o0],%f0
+                                   .L900000752:		/* frequency 36.6 confidence 0.0 */
+/* 0x02bc	 216 ( 0  3) */		ldd	[%g2],%f4
+/* 0x02c0	     ( 0  3) */		fmuld	%f0,%f20,%f2
+/* 0x02c4	     ( 0  1) */		add	%o1,1,%o1
+/* 0x02c8	     ( 1  4) */		ldd	[%l1],%f0
+/* 0x02cc	     ( 1  2) */		add	%o0,8,%o0
+/* 0x02d0	     ( 1  2) */		add	%g2,8,%g2
+/* 0x02d4	     ( 2  5) */		fmuld	%f4,%f22,%f4
+/* 0x02d8	     ( 2  3) */		cmp	%o1,%g3
+/* 0x02dc	     ( 5  8) */		faddd	%f2,%f4,%f2
+/* 0x02e0	     ( 8 11) */		faddd	%f0,%f2,%f0
+/* 0x02e4	     ( 8  9) */		std	%f0,[%l1]
+/* 0x02e8	     ( 8  9) */		add	%l1,16,%l1
+/* 0x02ec	     ( 8  9) */		ble,a,pt	%icc,.L900000752	! tprob=0.87
+/* 0x02f0	     (10 13) */		ldd	[%o0],%f0
+                                   .L77000213:		/* frequency 5.3 confidence 0.0 */
+/* 0x02f4	     ( 0  1) */		srl	%i5,31,%g2
+                                   .L900000753:		/* frequency 5.3 confidence 0.0 */
+/* 0x02f8	 218 ( 0  1) */		cmp	%l0,30
+/* 0x02fc	     ( 0  1) */		bne,a,pt	%icc,.L900000751	! tprob=0.54
+/* 0x0300	     ( 0  3) */		fdtox	%f18,%f0
+/* 0x0304	     ( 1  2) */		add	%i5,%g2,%g2
+/* 0x0308	     ( 1  2) */		sub	%o3,1,%o2
+/* 0x030c	     ( 2  3) */		sra	%g2,1,%o0
+/* 0x0310	 216 ( 2  5) */		ldd	[%g4],%f0
+/* 0x0314	     ( 3  4) */		add	%o0,1,%g2
+/* 0x0318	     ( 4  5) */		sll	%g2,1,%o0
+/* 0x031c	     ( 4  5) */		fmovd	%f0,%f2
+/* 0x0320	     ( 5  6) */		sll	%g2,4,%o1
+/* 0x0324	     ( 5  6) */		cmp	%o0,%o3
+/* 0x0328	     ( 5  6) */		bge,pt	%icc,.L77000215	! tprob=0.53
+/* 0x032c	     ( 6  7) */		or	%g0,0,%l0
+/* 0x0330	 218 ( 6  7) */		add	%g5,%o1,%o1
+/* 0x0334	 216 ( 7 10) */		ldd	[%o1],%f8
+                                   .L900000750:		/* frequency 32.0 confidence 0.0 */
+/* 0x0338	     ( 0  3) */		fdtox	%f8,%f6
+/* 0x033c	     ( 0  3) */		ldd	[%g4],%f10
+/* 0x0340	     ( 0  1) */		add	%o0,2,%o0
+/* 0x0344	     ( 1  4) */		ldd	[%o1+8],%f4
+/* 0x0348	     ( 1  4) */		fdtox	%f8,%f8
+/* 0x034c	     ( 1  2) */		cmp	%o0,%o2
+/* 0x0350	     ( 5  6) */		fmovs	%f10,%f6
+/* 0x0354	     ( 7 10) */		fxtod	%f6,%f10
+/* 0x0358	     ( 8 11) */		fdtox	%f4,%f6
+/* 0x035c	     ( 9 12) */		fdtox	%f4,%f4
+/* 0x0360	     (10 13) */		faddd	%f10,%f2,%f2
+/* 0x0364	     (10 11) */		std	%f2,[%o1]
+/* 0x0368	     (12 15) */		ldd	[%g4],%f2
+/* 0x036c	     (14 15) */		fmovs	%f2,%f6
+/* 0x0370	     (16 19) */		fxtod	%f6,%f6
+/* 0x0374	     (17 20) */		fitod	%f8,%f2
+/* 0x0378	     (19 22) */		faddd	%f6,%f0,%f0
+/* 0x037c	     (19 20) */		std	%f0,[%o1+8]
+/* 0x0380	     (19 20) */		add	%o1,16,%o1
+/* 0x0384	     (20 23) */		fitod	%f4,%f0
+/* 0x0388	     (20 21) */		ble,a,pt	%icc,.L900000750	! tprob=0.87
+/* 0x038c	     (20 23) */		ldd	[%o1],%f8
+                                   .L77000233:		/* frequency 4.6 confidence 0.0 */
+/* 0x0390	     ( 0  0) */		or	%g0,0,%l0
+                                   .L77000215:		/* frequency 5.3 confidence 0.0 */
+/* 0x0394	     ( 0  3) */		fdtox	%f18,%f0
+                                   .L900000751:		/* frequency 5.3 confidence 0.0 */
+/* 0x0398	     ( 0  3) */		ldd	[%g4],%f6
+/* 0x039c	 220 ( 0  1) */		add	%i5,1,%i5
+/* 0x03a0	     ( 0  1) */		add	%i4,8,%i4
+/* 0x03a4	     ( 1  4) */		ldd	[%g4-8],%f2
+/* 0x03a8	     ( 1  2) */		add	%l0,1,%l0
+/* 0x03ac	     ( 1  2) */		add	%i3,8,%i3
+/* 0x03b0	     ( 2  3) */		fmovs	%f6,%f0
+/* 0x03b4	     ( 2  5) */		ldd	[%g4-16],%f4
+/* 0x03b8	     ( 2  3) */		cmp	%i5,%o5
+/* 0x03bc	     ( 4  7) */		fxtod	%f0,%f0
+/* 0x03c0	     ( 7 10) */		fmuld	%f0,%f16,%f0
+/* 0x03c4	     (10 13) */		fmuld	%f0,%f2,%f2
+/* 0x03c8	     (13 16) */		fdtox	%f2,%f2
+/* 0x03cc	     (16 19) */		fxtod	%f2,%f2
+/* 0x03d0	     (19 22) */		fmuld	%f2,%f4,%f2
+/* 0x03d4	     (22 25) */		fsubd	%f0,%f2,%f22
+/* 0x03d8	     (22 23) */		ble,a,pt	%icc,.L900000749	! tprob=0.89
+/* 0x03dc	     (22 25) */		ldd	[%i2],%f0
+                                   .L900000725:		/* frequency 0.7 confidence 0.0 */
+/* 0x03e0	 220 ( 0  1) */		ba	.L900000748	! tprob=1.00
+/* 0x03e4	     ( 0  1) */		sll	%g1,4,%g2
+
+	
+                                   .L77000289:		/* frequency 0.8 confidence 0.0 */
+/* 0x03e8	 225 ( 0  3) */		ldd	[%o1],%f6
+/* 0x03ec	 242 ( 0  1) */		add	%g4,-8,%g2
+/* 0x03f0	     ( 0  1) */		add	%g4,-16,%g3
+/* 0x03f4	 225 ( 1  4) */		ldd	[%i1],%f2
+/* 0x03f8	 245 ( 1  2) */		or	%g0,0,%o3
+/* 0x03fc	     ( 1  2) */		or	%g0,0,%o0
+/* 0x0400	 225 ( 3  6) */		fmuld	%f2,%f6,%f2
+/* 0x0404	     ( 3  4) */		std	%f2,[%o2]
+/* 0x0408	     ( 4  7) */		ldd	[%g4],%f6
+/* 0x040c	 237 ( 7  8) */		std	%f6,[%o2+8]
+/* 0x0410	     ( 8  9) */		std	%f6,[%o2+16]
+/* 0x0414	     ( 9 10) */		std	%f6,[%o2+24]
+/* 0x0418	     (10 11) */		std	%f6,[%o2+32]
+/* 0x041c	     (11 12) */		std	%f6,[%o2+40]
+/* 0x0420	     (12 13) */		std	%f6,[%o2+48]
+/* 0x0424	     (13 14) */		std	%f6,[%o2+56]
+/* 0x0428	     (14 15) */		std	%f6,[%o2+64]
+/* 0x042c	     (15 16) */		std	%f6,[%o2+72]
+!	prefetch	[%i4],0
+!	prefetch	[%i4+32],0
+!	prefetch	[%i4+64],0
+!	prefetch	[%i4+96],0
+!	prefetch	[%i4+120],0
+!	prefetch	[%i1],0
+!	prefetch	[%i1+32],0
+!	prefetch	[%i1+64],0
+!	prefetch	[%i1+96],0
+!	prefetch	[%i1+120],0
+/* 0x0430	     (16 17) */		std	%f6,[%o2+80]
+/* 0x0434	     (17 18) */		std	%f6,[%o2+88]
+/* 0x0438	     (18 19) */		std	%f6,[%o2+96]
+/* 0x043c	     (19 20) */		std	%f6,[%o2+104]
+/* 0x0440	     (20 21) */		std	%f6,[%o2+112]
+/* 0x0444	     (21 22) */		std	%f6,[%o2+120]
+/* 0x0448	     (22 23) */		std	%f6,[%o2+128]
+/* 0x044c	     (23 24) */		std	%f6,[%o2+136]
+/* 0x0450	     (24 25) */		std	%f6,[%o2+144]
+/* 0x0454	     (25 26) */		std	%f6,[%o2+152]
+/* 0x0458	     (26 27) */		std	%f6,[%o2+160]
+/* 0x045c	     (27 28) */		std	%f6,[%o2+168]
+/* 0x0460	     (27 30) */		fdtox	%f2,%f2
+/* 0x0464	     (28 29) */		std	%f6,[%o2+176]
+/* 0x0468	     (29 30) */		std	%f6,[%o2+184]
+/* 0x046c	     (30 31) */		std	%f6,[%o2+192]
+/* 0x0470	     (31 32) */		std	%f6,[%o2+200]
+/* 0x0474	     (32 33) */		std	%f6,[%o2+208]
+/* 0x0478	     (33 34) */		std	%f6,[%o2+216]
+/* 0x047c	     (34 35) */		std	%f6,[%o2+224]
+/* 0x0480	     (35 36) */		std	%f6,[%o2+232]
+/* 0x0484	     (36 37) */		std	%f6,[%o2+240]
+/* 0x0488	     (37 38) */		std	%f6,[%o2+248]
+/* 0x048c	     (38 39) */		std	%f6,[%o2+256]
+/* 0x0490	     (39 40) */		std	%f6,[%o2+264]
+/* 0x0494	     (40 41) */		std	%f6,[%o2+272]
+/* 0x0498	     (41 42) */		std	%f6,[%o2+280]
+/* 0x049c	     (42 43) */		std	%f6,[%o2+288]
+/* 0x04a0	     (43 44) */		std	%f6,[%o2+296]
+/* 0x04a4	     (44 45) */		std	%f6,[%o2+304]
+/* 0x04a8	     (45 46) */		std	%f6,[%o2+312]
+/* 0x04ac	     (46 47) */		std	%f6,[%o2+320]
+/* 0x04b0	     (47 48) */		std	%f6,[%o2+328]
+/* 0x04b4	     (48 49) */		std	%f6,[%o2+336]
+/* 0x04b8	     (49 50) */		std	%f6,[%o2+344]
+/* 0x04bc	     (50 51) */		std	%f6,[%o2+352]
+/* 0x04c0	     (51 52) */		std	%f6,[%o2+360]
+/* 0x04c4	     (52 53) */		std	%f6,[%o2+368]
+/* 0x04c8	     (53 54) */		std	%f6,[%o2+376]
+/* 0x04cc	     (54 55) */		std	%f6,[%o2+384]
+/* 0x04d0	     (55 56) */		std	%f6,[%o2+392]
+/* 0x04d4	     (56 57) */		std	%f6,[%o2+400]
+/* 0x04d8	     (57 58) */		std	%f6,[%o2+408]
+/* 0x04dc	     (58 59) */		std	%f6,[%o2+416]
+/* 0x04e0	     (59 60) */		std	%f6,[%o2+424]
+/* 0x04e4	     (60 61) */		std	%f6,[%o2+432]
+/* 0x04e8	     (61 62) */		std	%f6,[%o2+440]
+/* 0x04ec	     (62 63) */		std	%f6,[%o2+448]
+/* 0x04f0	     (63 64) */		std	%f6,[%o2+456]
+/* 0x04f4	     (64 65) */		std	%f6,[%o2+464]
+/* 0x04f8	     (65 66) */		std	%f6,[%o2+472]
+/* 0x04fc	     (66 67) */		std	%f6,[%o2+480]
+/* 0x0500	     (67 68) */		std	%f6,[%o2+488]
+/* 0x0504	     (68 69) */		std	%f6,[%o2+496]
+/* 0x0508	     (69 70) */		std	%f6,[%o2+504]
+/* 0x050c	     (70 71) */		std	%f6,[%o2+512]
+/* 0x0510	     (71 72) */		std	%f6,[%o2+520]
+/* 0x0514	 242 (72 75) */		ld	[%g4],%f2 ! dalign
+/* 0x0518	     (73 76) */		ld	[%g2],%f6 ! dalign
+/* 0x051c	     (74 77) */		fxtod	%f2,%f10
+/* 0x0520	     (74 77) */		ld	[%g2+4],%f7
+/* 0x0524	     (75 78) */		ld	[%g3],%f8 ! dalign
+/* 0x0528	     (76 79) */		ld	[%g3+4],%f9
+/* 0x052c	     (77 80) */		fmuld	%f10,%f0,%f0
+/* 0x0530	 239 (77 80) */		ldd	[%i4],%f4
+/* 0x0534	 240 (78 81) */		ldd	[%i1],%f2
+/* 0x0538	     (80 83) */		fmuld	%f0,%f6,%f6
+/* 0x053c	     (83 86) */		fdtox	%f6,%f6
+/* 0x0540	     (86 89) */		fxtod	%f6,%f6
+/* 0x0544	     (89 92) */		fmuld	%f6,%f8,%f6
+/* 0x0548	     (92 95) */		fsubd	%f0,%f6,%f0
+/* 0x054c	 250 (95 98) */		fmuld	%f4,%f0,%f10
+                                   .L900000747:		/* frequency 6.4 confidence 0.0 */
+
+
+	fmovd %f0,%f0
+	fmovd %f16,%f18
+	ldd [%i4],%f2
+	ldd [%o2],%f8
+	ldd [%i1],%f10
+	ldd [%g4-8],%f14
+	ldd [%g4-16],%f16
+	ldd [%o1],%f24
+
+	ldd [%i1+8],%f26
+	ldd [%i1+16],%f40
+	ldd [%i1+48],%f46
+	ldd [%i1+56],%f30
+	ldd [%i1+64],%f54
+	ldd [%i1+104],%f34
+	ldd [%i1+112],%f58
+
+	ldd [%i4+112],%f60
+	ldd [%i4+8],%f28	
+	ldd [%i4+104],%f38
+
+	nop
+	nop
+!
+	.L99999999:
+!1
+!!!
+	ldd	[%i1+24],%f32
+	fmuld	%f0,%f2,%f4
+!2
+!!!
+	ldd	[%i4+24],%f36
+	fmuld	%f26,%f24,%f20
+!3
+!!!
+	ldd	[%i1+40],%f42
+	fmuld	%f28,%f0,%f22
+!4
+!!!
+	ldd	[%i4+40],%f44
+	fmuld	%f32,%f24,%f32
+!5
+!!!
+	ldd	[%o1+8],%f6
+	faddd	%f4,%f8,%f4
+	fmuld	%f36,%f0,%f36
+!6
+!!!
+	add	%o1,8,%o1
+	ldd	[%i4+56],%f50
+	fmuld	%f42,%f24,%f42
+!7
+!!!
+	ldd	[%i1+72],%f52
+	faddd	%f20,%f22,%f20
+	fmuld	%f44,%f0,%f44
+!8
+!!!
+	ldd	[%o2+16],%f22
+	fmuld	%f10,%f6,%f12
+!9
+!!!
+	ldd	[%i4+72],%f56
+	faddd	%f32,%f36,%f32
+	fmuld	%f14,%f4,%f4
+!10
+!!!
+	ldd	[%o2+48],%f36
+	fmuld	%f30,%f24,%f48
+!11
+!!!
+	ldd	[%o2+8],%f8
+	faddd	%f20,%f22,%f20
+	fmuld	%f50,%f0,%f50	
+!12
+!!!
+	std	%f20,[%o2+16]
+	faddd	%f42,%f44,%f42
+	fmuld	%f52,%f24,%f52
+!13
+!!!
+	ldd	[%o2+80],%f44
+	faddd	%f4,%f12,%f4
+	fmuld	%f56,%f0,%f56
+!14
+!!!
+	ldd	[%i1+88],%f20
+	faddd	%f32,%f36,%f32
+!15
+!!!
+	ldd	[%i4+88],%f22
+	faddd	%f48,%f50,%f48
+!16
+!!!
+	ldd	[%o2+112],%f50
+	faddd	%f52,%f56,%f52
+!17
+!!!
+	ldd	[%o2+144],%f56
+	faddd	%f4,%f8,%f8
+	fmuld	%f20,%f24,%f20
+!18
+!!!
+	std	%f32,[%o2+48]
+	faddd	%f42,%f44,%f42
+	fmuld	%f22,%f0,%f22
+!19
+!!!
+	std	%f42,[%o2+80]
+	faddd	%f48,%f50,%f48
+	fmuld	%f34,%f24,%f32
+!20
+!!!
+	std	%f48,[%o2+112]
+	faddd	%f52,%f56,%f52
+	fmuld	%f38,%f0,%f36
+!21
+!!!
+	ldd	[%i1+120],%f42
+	fdtox	%f8,%f4
+!22
+!!!
+	std	%f52,[%o2+144]
+	faddd	%f20,%f22,%f20
+!23
+!!!
+	ldd	[%i4+120],%f44
+!24
+!!!
+	ldd	[%o2+176],%f22
+	faddd	%f32,%f36,%f32
+	fmuld	%f42,%f24,%f42
+!25
+!!!
+	ldd	[%i4+16],%f50
+	fmovs	%f17,%f4
+!26
+!!!
+	ldd	[%i1+32],%f52
+	fmuld	%f44,%f0,%f44
+!27
+!!!
+	ldd	[%i4+32],%f56
+	fmuld	%f40,%f24,%f48
+!28
+!!!
+	ldd	[%o2+208],%f36
+	faddd	%f20,%f22,%f20
+	fmuld	%f50,%f0,%f50
+!29
+!!!
+	std	%f20,[%o2+176]
+	fxtod	%f4,%f4
+	fmuld	%f52,%f24,%f52
+!30
+!!!
+	ldd	[%i4+48],%f22
+	faddd	%f42,%f44,%f42
+	fmuld	%f56,%f0,%f56
+!31
+!!!
+	ldd	[%o2+240],%f44
+	faddd	%f32,%f36,%f32
+!32
+!!!
+	std	%f32,[%o2+208]
+	faddd	%f48,%f50,%f48
+	fmuld	%f46,%f24,%f20
+!33
+!!!
+	ldd	[%o2+32],%f50
+	fmuld	%f4,%f18,%f12
+!34
+!!!
+	ldd	[%i4+64],%f36
+	faddd	%f52,%f56,%f52
+	fmuld	%f22,%f0,%f22
+!35
+!!!
+	ldd	[%o2+64],%f56
+	faddd	%f42,%f44,%f42
+!36
+!!!
+	std	%f42,[%o2+240]
+	faddd	%f48,%f50,%f48
+	fmuld	%f54,%f24,%f32
+!37
+!!!
+	std	%f48,[%o2+32]
+	fmuld	%f12,%f14,%f4
+!38
+!!!
+	ldd	[%i1+80],%f42
+	faddd	%f52,%f56,%f56	! yes, tmp52!
+	fmuld	%f36,%f0,%f36
+!39
+!!!
+	ldd	[%i4+80],%f44
+	faddd	%f20,%f22,%f20
+!40
+!!!
+	ldd	[%i1+96],%f48
+	fmuld	%f58,%f24,%f52
+!41
+!!!
+	ldd	[%i4+96],%f50
+	fdtox	%f4,%f4
+	fmuld	%f42,%f24,%f42
+!42
+!!!
+	std	%f56,[%o2+64]	! yes, tmp52!
+	faddd	%f32,%f36,%f32
+	fmuld	%f44,%f0,%f44
+!43
+!!!
+	ldd	[%o2+96],%f22
+	fmuld	%f48,%f24,%f48
+!44
+!!!
+	ldd	[%o2+128],%f36
+	fmovd	%f6,%f24
+	fmuld	%f50,%f0,%f50
+!45
+!!!
+	fxtod	%f4,%f4
+	fmuld	%f60,%f0,%f56
+!46
+!!!
+	add	%o2,8,%o2
+	faddd	%f42,%f44,%f42
+!47
+!!!
+	ldd	[%o2+160-8],%f44
+	faddd	%f20,%f22,%f20
+!48
+!!!
+	std	%f20,[%o2+96-8]
+	faddd	%f48,%f50,%f48
+!49
+!!!
+	ldd	[%o2+192-8],%f50
+	faddd	%f52,%f56,%f52
+	fmuld	%f4,%f16,%f4
+!50
+!!!
+	ldd	[%o2+224-8],%f56
+	faddd	%f32,%f36,%f32
+!51
+!!!
+	std	%f32,[%o2+128-8]
+	faddd	%f42,%f44,%f42
+!52
+	add	%o3,1,%o3
+	std	%f42,[%o2+160-8]
+	faddd	%f48,%f50,%f48
+!53
+!!!
+	cmp	%o3,31
+	std	%f48,[%o2+192-8]
+	faddd	%f52,%f56,%f52
+!54
+	std	%f52,[%o2+224-8]
+	ble,pt	%icc,.L99999999
+	fsubd	%f12,%f4,%f0
+
+
+
+!55
+	std %f8,[%o2]
+
+	
+	
+	
+	
+	
+	                                   .L77000285:		/* frequency 1.0 confidence 0.0 */
+/* 0x07a8	 279 ( 0  1) */		sll	%g1,4,%g2
+                                   .L900000748:		/* frequency 1.0 confidence 0.0 */
+/* 0x07ac	 279 ( 0  3) */		ldd	[%g5+%g2],%f0
+/* 0x07b0	     ( 0  1) */		add	%g5,%g2,%i1
+/* 0x07b4	     ( 0  1) */		or	%g0,0,%o4
+/* 0x07b8	 206 ( 1  4) */		ld	[%fp+68],%o0
+/* 0x07bc	 279 ( 1  2) */		or	%g0,0,%i0
+/* 0x07c0	     ( 1  2) */		cmp	%g1,0
+/* 0x07c4	     ( 2  5) */		fdtox	%f0,%f0
+/* 0x07c8	     ( 2  3) */		std	%f0,[%sp+120]
+/* 0x07cc	 275 ( 2  3) */		sethi	%hi(0xfc00),%o1
+/* 0x07d0	 206 ( 3  4) */		or	%g0,%o0,%o3
+/* 0x07d4	 275 ( 3  4) */		sub	%g1,1,%g4
+/* 0x07d8	 279 ( 4  7) */		ldd	[%i1+8],%f0
+/* 0x07dc	     ( 4  5) */		or	%g0,%o0,%g5
+/* 0x07e0	     ( 4  5) */		add	%o1,1023,%o1
+/* 0x07e4	     ( 6  9) */		fdtox	%f0,%f0
+/* 0x07e8	     ( 6  7) */		std	%f0,[%sp+112]
+/* 0x07ec	     (10 12) */		ldx	[%sp+112],%o5
+/* 0x07f0	     (11 13) */		ldx	[%sp+120],%o7
+/* 0x07f4	     (11 12) */		ble,pt	%icc,.L900000746	! tprob=0.56
+/* 0x07f8	     (11 12) */		sethi	%hi(0xfc00),%g2
+/* 0x07fc	 275 (12 13) */		or	%g0,-1,%g2
+/* 0x0800	 279 (12 13) */		cmp	%g1,3
+/* 0x0804	 275 (13 14) */		srl	%g2,0,%o2
+/* 0x0808	 279 (13 14) */		bl,pn	%icc,.L77000286	! tprob=0.44
+/* 0x080c	     (13 14) */		or	%g0,%i1,%g2
+/* 0x0810	     (14 17) */		ldd	[%i1+16],%f0
+/* 0x0814	     (14 15) */		and	%o5,%o1,%o0
+/* 0x0818	     (14 15) */		add	%i1,16,%g2
+/* 0x081c	     (15 16) */		sllx	%o0,16,%g3
+/* 0x0820	     (15 16) */		and	%o7,%o2,%o0
+/* 0x0824	     (16 19) */		fdtox	%f0,%f0
+/* 0x0828	     (16 17) */		std	%f0,[%sp+104]
+/* 0x082c	     (16 17) */		add	%o0,%g3,%o4
+/* 0x0830	     (17 20) */		ldd	[%i1+24],%f2
+/* 0x0834	     (17 18) */		srax	%o5,16,%o0
+/* 0x0838	     (17 18) */		add	%o3,4,%g5
+/* 0x083c	     (18 19) */		stx	%o0,[%sp+128]
+/* 0x0840	     (18 19) */		and	%o4,%o2,%o0
+/* 0x0844	     (18 19) */		or	%g0,1,%i0
+/* 0x0848	     (19 20) */		stx	%o0,[%sp+112]
+/* 0x084c	     (19 20) */		srax	%o4,32,%o0
+/* 0x0850	     (19 22) */		fdtox	%f2,%f0
+/* 0x0854	     (20 21) */		stx	%o0,[%sp+136]
+/* 0x0858	     (20 21) */		srax	%o7,32,%o4
+/* 0x085c	     (21 22) */		std	%f0,[%sp+96]
+/* 0x0860	     (22 24) */		ldx	[%sp+136],%o7
+/* 0x0864	     (23 25) */		ldx	[%sp+128],%o0
+/* 0x0868	     (25 27) */		ldx	[%sp+104],%g3
+/* 0x086c	     (25 26) */		add	%o0,%o7,%o0
+/* 0x0870	     (26 28) */		ldx	[%sp+112],%o7
+/* 0x0874	     (26 27) */		add	%o4,%o0,%o4
+/* 0x0878	     (27 29) */		ldx	[%sp+96],%o5
+/* 0x087c	     (28 29) */		st	%o7,[%o3]
+/* 0x0880	     (28 29) */		or	%g0,%g3,%o7
+                                   .L900000730:		/* frequency 64.0 confidence 0.0 */
+/* 0x0884	     (17 19) */		ldd	[%g2+16],%f0
+/* 0x0888	     (17 18) */		add	%i0,1,%i0
+/* 0x088c	     (17 18) */		add	%g5,4,%g5
+/* 0x0890	     (18 18) */		cmp	%i0,%g4
+/* 0x0894	     (18 19) */		add	%g2,16,%g2
+/* 0x0898	     (19 22) */		fdtox	%f0,%f0
+/* 0x089c	     (20 21) */		std	%f0,[%sp+104]
+/* 0x08a0	     (21 23) */		ldd	[%g2+8],%f0
+/* 0x08a4	     (23 26) */		fdtox	%f0,%f0
+/* 0x08a8	     (24 25) */		std	%f0,[%sp+96]
+/* 0x08ac	     (25 26) */		and	%o5,%o1,%g3
+/* 0x08b0	     (26 27) */		sllx	%g3,16,%g3
+/* 0x08b4	     ( 0  0) */		stx	%g3,[%sp+120]
+/* 0x08b8	     (26 27) */		and	%o7,%o2,%g3
+/* 0x08bc	     ( 0  0) */		stx	%o7,[%sp+128]
+/* 0x08c0	     ( 0  0) */		ldx	[%sp+120],%o7
+/* 0x08c4	     (27 27) */		add	%g3,%o7,%g3
+/* 0x08c8	     ( 0  0) */		ldx	[%sp+128],%o7
+/* 0x08cc	     (28 29) */		srax	%o5,16,%o5
+/* 0x08d0	     (28 28) */		add	%g3,%o4,%g3
+/* 0x08d4	     (29 30) */		srax	%g3,32,%o4
+/* 0x08d8	     ( 0  0) */		stx	%o4,[%sp+112]
+/* 0x08dc	     (30 31) */		srax	%o7,32,%o4
+/* 0x08e0	     ( 0  0) */		ldx	[%sp+112],%o7
+/* 0x08e4	     (30 31) */		add	%o5,%o7,%o7
+/* 0x08e8	     (31 33) */		ldx	[%sp+96],%o5
+/* 0x08ec	     (31 32) */		add	%o4,%o7,%o4
+/* 0x08f0	     (32 33) */		and	%g3,%o2,%g3
+/* 0x08f4	     ( 0  0) */		ldx	[%sp+104],%o7
+/* 0x08f8	     (33 34) */		ble,pt	%icc,.L900000730	! tprob=0.50
+/* 0x08fc	     (33 34) */		st	%g3,[%g5-4]
+                                   .L900000733:		/* frequency 8.0 confidence 0.0 */
+/* 0x0900	     ( 0  1) */		ba	.L900000746	! tprob=1.00
+/* 0x0904	     ( 0  1) */		sethi	%hi(0xfc00),%g2
+                                   .L77000286:		/* frequency 0.7 confidence 0.0 */
+/* 0x0908	     ( 0  3) */		ldd	[%g2+16],%f0
+                                   .L900000745:		/* frequency 6.4 confidence 0.0 */
+/* 0x090c	     ( 0  1) */		and	%o7,%o2,%o0
+/* 0x0910	     ( 0  1) */		and	%o5,%o1,%g3
+/* 0x0914	     ( 0  3) */		fdtox	%f0,%f0
+/* 0x0918	     ( 1  2) */		add	%o4,%o0,%o0
+/* 0x091c	     ( 1  2) */		std	%f0,[%sp+104]
+/* 0x0920	     ( 1  2) */		add	%i0,1,%i0
+/* 0x0924	     ( 2  3) */		sllx	%g3,16,%o4
+/* 0x0928	     ( 2  5) */		ldd	[%g2+24],%f2
+/* 0x092c	     ( 2  3) */		add	%g2,16,%g2
+/* 0x0930	     ( 3  4) */		add	%o0,%o4,%o4
+/* 0x0934	     ( 3  4) */		cmp	%i0,%g4
+/* 0x0938	     ( 4  5) */		srax	%o5,16,%o0
+/* 0x093c	     ( 4  5) */		stx	%o0,[%sp+112]
+/* 0x0940	     ( 4  5) */		and	%o4,%o2,%g3
+/* 0x0944	     ( 5  6) */		srax	%o4,32,%o5
+/* 0x0948	     ( 5  8) */		fdtox	%f2,%f0
+/* 0x094c	     ( 5  6) */		std	%f0,[%sp+96]
+/* 0x0950	     ( 6  7) */		srax	%o7,32,%o4
+/* 0x0954	     ( 6  8) */		ldx	[%sp+112],%o7
+/* 0x0958	     ( 8  9) */		add	%o7,%o5,%o7
+/* 0x095c	     ( 9 11) */		ldx	[%sp+104],%o5
+/* 0x0960	     ( 9 10) */		add	%o4,%o7,%o4
+/* 0x0964	     (10 12) */		ldx	[%sp+96],%o0
+/* 0x0968	     (11 12) */		st	%g3,[%g5]
+/* 0x096c	     (11 12) */		or	%g0,%o5,%o7
+/* 0x0970	     (11 12) */		add	%g5,4,%g5
+/* 0x0974	     (12 13) */		or	%g0,%o0,%o5
+/* 0x0978	     (12 13) */		ble,a,pt	%icc,.L900000745	! tprob=0.86
+/* 0x097c	     (12 15) */		ldd	[%g2+16],%f0
+                                   .L77000236:		/* frequency 1.0 confidence 0.0 */
+/* 0x0980	     ( 0  1) */		sethi	%hi(0xfc00),%g2
+                                   .L900000746:		/* frequency 1.0 confidence 0.0 */
+/* 0x0984	     ( 0  1) */		or	%g0,-1,%o0
+/* 0x0988	     ( 0  1) */		add	%g2,1023,%g2
+/* 0x098c	     ( 0  3) */		ld	[%fp+88],%o1
+/* 0x0990	     ( 1  2) */		srl	%o0,0,%g3
+/* 0x0994	     ( 1  2) */		and	%o5,%g2,%g2
+/* 0x0998	     ( 2  3) */		and	%o7,%g3,%g4
+/* 0x099c	 281 ( 2  3) */		or	%g0,-1,%o5
+/* 0x09a0	 275 ( 3  4) */		sllx	%g2,16,%g2
+/* 0x09a4	     ( 3  4) */		add	%o4,%g4,%g4
+/* 0x09a8	     ( 4  5) */		add	%g4,%g2,%g2
+/* 0x09ac	     ( 5  6) */		sll	%i0,2,%g4
+/* 0x09b0	     ( 5  6) */		and	%g2,%g3,%g2
+/* 0x09b4	     ( 6  7) */		st	%g2,[%o3+%g4]
+/* 0x09b8	 281 ( 6  7) */		sll	%g1,2,%g2
+/* 0x09bc	     ( 7 10) */		ld	[%o3+%g2],%g2
+/* 0x09c0	     ( 9 10) */		cmp	%g2,0
+/* 0x09c4	     ( 9 10) */		bleu,pn	%icc,.L77000241	! tprob=0.50
+/* 0x09c8	     ( 9 10) */		or	%g0,%o1,%o2
+/* 0x09cc	     (10 11) */		ba	.L900000744	! tprob=1.00
+/* 0x09d0	     (10 11) */		cmp	%o5,0
+                                   .L77000241:		/* frequency 0.8 confidence 0.0 */
+/* 0x09d4	     ( 0  1) */		subcc	%g1,1,%o5
+/* 0x09d8	     ( 0  1) */		bneg,pt	%icc,.L900000744	! tprob=0.60
+/* 0x09dc	     ( 1  2) */		cmp	%o5,0
+/* 0x09e0	     ( 1  2) */		sll	%o5,2,%g2
+/* 0x09e4	     ( 2  3) */		add	%o1,%g2,%o0
+/* 0x09e8	     ( 2  3) */		add	%o3,%g2,%o4
+/* 0x09ec	     ( 3  6) */		ld	[%o0],%g2
+                                   .L900000743:		/* frequency 5.3 confidence 0.0 */
+/* 0x09f0	     ( 0  3) */		ld	[%o4],%g3
+/* 0x09f4	     ( 0  1) */		add	%o0,4,%o0
+/* 0x09f8	     ( 0  1) */		add	%o4,4,%o4
+/* 0x09fc	     ( 2  3) */		cmp	%g3,%g2
+/* 0x0a00	     ( 2  3) */		bne,pn	%icc,.L77000244	! tprob=0.16
+/* 0x0a04	     ( 2  3) */		nop
+/* 0x0a08	     ( 3  4) */		addcc	%o5,1,%o5
+/* 0x0a0c	     ( 3  4) */		bpos,a,pt	%icc,.L900000743	! tprob=0.84
+/* 0x0a10	     ( 3  6) */		ld	[%o0],%g2
+                                   .L77000244:		/* frequency 1.0 confidence 0.0 */
+/* 0x0a14	     ( 0  1) */		cmp	%o5,0
+                                   .L900000744:		/* frequency 1.0 confidence 0.0 */
+/* 0x0a18	     ( 0  1) */		bl,pn	%icc,.L77000287	! tprob=0.50
+/* 0x0a1c	     ( 0  1) */		sll	%o5,2,%g2
+/* 0x0a20	     ( 1  4) */		ld	[%o2+%g2],%g3
+/* 0x0a24	     ( 2  5) */		ld	[%o3+%g2],%g2
+/* 0x0a28	     ( 4  5) */		cmp	%g2,%g3
+/* 0x0a2c	     ( 4  5) */		bleu,pt	%icc,.L77000224	! tprob=0.56
+/* 0x0a30	     ( 4  5) */		nop
+                                   .L77000287:		/* frequency 0.8 confidence 0.0 */
+/* 0x0a34	     ( 0  1) */		cmp	%g1,0
+/* 0x0a38	     ( 0  1) */		ble,pt	%icc,.L77000224	! tprob=0.60
+/* 0x0a3c	     ( 0  1) */		nop
+/* 0x0a40	 281 ( 1  2) */		sub	%g1,1,%o7
+/* 0x0a44	     ( 1  2) */		or	%g0,-1,%g2
+/* 0x0a48	     ( 2  3) */		srl	%g2,0,%o4
+/* 0x0a4c	     ( 2  3) */		add	%o7,1,%o0
+/* 0x0a50	 279 ( 3  4) */		or	%g0,0,%o5
+/* 0x0a54	     ( 3  4) */		or	%g0,0,%g1
+/* 0x0a58	     ( 4  5) */		cmp	%o0,3
+/* 0x0a5c	     ( 4  5) */		bl,pn	%icc,.L77000288	! tprob=0.40
+/* 0x0a60	     ( 4  5) */		add	%o3,8,%o1
+/* 0x0a64	     ( 5  6) */		add	%o2,4,%o0
+/* 0x0a68	     ( 5  8) */		ld	[%o1-8],%g2
+/* 0x0a6c	   0 ( 5  6) */		or	%g0,%o1,%o3
+/* 0x0a70	 279 ( 6  9) */		ld	[%o0-4],%g3
+/* 0x0a74	   0 ( 6  7) */		or	%g0,%o0,%o2
+/* 0x0a78	 279 ( 6  7) */		or	%g0,2,%g1
+/* 0x0a7c	     ( 7 10) */		ld	[%o3-4],%o0
+/* 0x0a80	     ( 8  9) */		sub	%g2,%g3,%g2
+/* 0x0a84	     ( 9 10) */		or	%g0,%g2,%o5
+/* 0x0a88	     ( 9 10) */		and	%g2,%o4,%g2
+/* 0x0a8c	     ( 9 10) */		st	%g2,[%o3-8]
+/* 0x0a90	     (10 11) */		srax	%o5,32,%o5
+                                   .L900000734:		/* frequency 64.0 confidence 0.0 */
+/* 0x0a94	     (12 20) */		ld	[%o2],%g2
+/* 0x0a98	     (12 13) */		add	%g1,1,%g1
+/* 0x0a9c	     (12 13) */		add	%o2,4,%o2
+/* 0x0aa0	     (13 13) */		cmp	%g1,%o7
+/* 0x0aa4	     (13 14) */		add	%o3,4,%o3
+/* 0x0aa8	     (14 14) */		sub	%o0,%g2,%o0
+/* 0x0aac	     (15 15) */		add	%o0,%o5,%o5
+/* 0x0ab0	     (16 17) */		and	%o5,%o4,%g2
+/* 0x0ab4	     (16 24) */		ld	[%o3-4],%o0
+/* 0x0ab8	     (17 18) */		st	%g2,[%o3-8]
+/* 0x0abc	     (17 18) */		ble,pt	%icc,.L900000734	! tprob=0.50
+/* 0x0ac0	     (17 18) */		srax	%o5,32,%o5
+                                   .L900000737:		/* frequency 8.0 confidence 0.0 */
+/* 0x0ac4	     ( 0  3) */		ld	[%o2],%o1
+/* 0x0ac8	     ( 2  3) */		sub	%o0,%o1,%o0
+/* 0x0acc	     ( 3  4) */		add	%o0,%o5,%o0
+/* 0x0ad0	     ( 4  5) */		and	%o0,%o4,%o1
+/* 0x0ad4	     ( 4  5) */		st	%o1,[%o3-4]
+/* 0x0ad8	     ( 5  7) */		ret	! Result = 
+/* 0x0adc	     ( 7  8) */		restore	%g0,%g0,%g0
+                                   .L77000288:		/* frequency 0.6 confidence 0.0 */
+/* 0x0ae0	     ( 0  3) */		ld	[%o3],%o0
+                                   .L900000742:		/* frequency 5.3 confidence 0.0 */
+/* 0x0ae4	     ( 0  3) */		ld	[%o2],%o1
+/* 0x0ae8	     ( 0  1) */		add	%o5,%o0,%o0
+/* 0x0aec	     ( 0  1) */		add	%g1,1,%g1
+/* 0x0af0	     ( 1  2) */		add	%o2,4,%o2
+/* 0x0af4	     ( 1  2) */		cmp	%g1,%o7
+/* 0x0af8	     ( 2  3) */		sub	%o0,%o1,%o0
+/* 0x0afc	     ( 3  4) */		and	%o0,%o4,%o1
+/* 0x0b00	     ( 3  4) */		st	%o1,[%o3]
+/* 0x0b04	     ( 3  4) */		add	%o3,4,%o3
+/* 0x0b08	     ( 4  5) */		srax	%o0,32,%o5
+/* 0x0b0c	     ( 4  5) */		ble,a,pt	%icc,.L900000742	! tprob=0.84
+/* 0x0b10	     ( 4  7) */		ld	[%o3],%o0
+                                   .L77000224:		/* frequency 1.0 confidence 0.0 */
+/* 0x0b14	     ( 0  2) */		ret	! Result = 
+/* 0x0b18	     ( 2  3) */		restore	%g0,%g0,%g0
+/* 0x0b1c	   0 ( 0  0) */		.type	mont_mulf_noconv,2
+/* 0x0b1c	     ( 0  0) */		.size	mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv8.il b/security/nss/lib/freebl/mpi/montmulfv8.il
new file mode 100644
index 000000000..4952d0fb8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv8.il
@@ -0,0 +1,108 @@
+!  
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+        .inline upper32,8
+        std     %o0,[%sp+0x48]
+        ldd     [%sp+0x48],%f10
+
+	fdtox	%f10,%f10
+	fitod	%f10,%f0
+        .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+        .inline lower32,8
+        std     %o0,[%sp+0x48]
+        ldd     [%sp+0x48],%f10
+        std     %o2,[%sp+0x48]
+        ldd     [%sp+0x48],%f12
+
+	fdtox	%f10,%f10
+	fmovs	%f12,%f10
+	fxtod	%f10,%f0
+        .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+        .inline mod,12
+        std     %o0,[%sp+0x48]
+        ldd     [%sp+0x48],%f2
+        std     %o2,[%sp+0x48]
+        ldd     [%sp+0x48],%f4
+        std     %o4,[%sp+0x48]
+        ldd     [%sp+0x48],%f6
+
+	fmuld	%f2,%f4,%f4
+	fdtox	%f4,%f4
+	fxtod	%f4,%f4
+	fmuld	%f4,%f6,%f4
+	fsubd	%f2,%f4,%f0
+        .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+!			    double * /* 0 */,
+!			    double * /*result16*/, double * /* result32 */
+!			    float *  /*source - should be unsigned int*
+!		            	       converted to float* */);
+!
+        .inline i16_to_d16_and_d32x4,24
+        ldd     [%o0],%f2  ! 1/(2^16)
+        ldd     [%o1],%f4  ! 2^16
+	ldd	[%o2],%f22
+
+	fmovd	%f22,%f6
+	ld	[%o5],%f7
+	fmovd	%f22,%f10
+	ld	[%o5+4],%f11
+	fmovd	%f22,%f14
+	ld	[%o5+8],%f15
+	fmovd	%f22,%f18
+	ld	[%o5+12],%f19
+	fxtod	%f6,%f6
+	std	%f6,[%o4]
+	fxtod	%f10,%f10
+	std	%f10,[%o4+8]
+	fxtod	%f14,%f14
+	std	%f14,[%o4+16]
+	fxtod	%f18,%f18
+	std	%f18,[%o4+24]
+	fmuld	%f2,%f6,%f8
+	fmuld	%f2,%f10,%f12
+	fmuld	%f2,%f14,%f16
+	fmuld	%f2,%f18,%f20
+	fdtox	%f8,%f8
+	fdtox	%f12,%f12
+	fdtox	%f16,%f16
+	fdtox	%f20,%f20
+	fxtod	%f8,%f8
+	std	%f8,[%o3+8]
+	fxtod	%f12,%f12
+	std	%f12,[%o3+24]
+	fxtod	%f16,%f16
+	std	%f16,[%o3+40]
+	fxtod	%f20,%f20
+	std	%f20,[%o3+56]
+	fmuld	%f8,%f4,%f8
+	fmuld	%f12,%f4,%f12
+	fmuld	%f16,%f4,%f16
+	fmuld	%f20,%f4,%f20
+	fsubd	%f6,%f8,%f8
+	std	%f8,[%o3]
+	fsubd	%f10,%f12,%f12
+	std	%f12,[%o3+16]
+	fsubd	%f14,%f16,%f16
+	std	%f16,[%o3+32]
+	fsubd	%f18,%f20,%f20
+	std	%f20,[%o3+48]
+        .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv8.s b/security/nss/lib/freebl/mpi/montmulfv8.s
new file mode 100644
index 000000000..ca738880f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv8.s
@@ -0,0 +1,1818 @@
+!  
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+	.section	".text",#alloc,#execinstr
+	.file	"montmulf.c"
+
+	.section	".rodata",#alloc
+	.global	TwoTo16
+	.align	8
+!
+! CONSTANT POOL
+!
+	.global TwoTo16
+TwoTo16:
+	.word	1089470464
+	.word	0
+	.type	TwoTo16,#object
+	.size	TwoTo16,8
+	.global	TwoToMinus16
+!
+! CONSTANT POOL
+!
+	.global TwoToMinus16
+TwoToMinus16:
+	.word	1055916032
+	.word	0
+	.type	TwoToMinus16,#object
+	.size	TwoToMinus16,8
+	.global	Zero
+!
+! CONSTANT POOL
+!
+	.global Zero
+Zero:
+	.word	0
+	.word	0
+	.type	Zero,#object
+	.size	Zero,8
+	.global	TwoTo32
+!
+! CONSTANT POOL
+!
+	.global TwoTo32
+TwoTo32:
+	.word	1106247680
+	.word	0
+	.type	TwoTo32,#object
+	.size	TwoTo32,8
+	.global	TwoToMinus32
+!
+! CONSTANT POOL
+!
+	.global TwoToMinus32
+TwoToMinus32:
+	.word	1039138816
+	.word	0
+	.type	TwoToMinus32,#object
+	.size	TwoToMinus32,8
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	4
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global conv_d16_to_i32
+                       conv_d16_to_i32:
+/* 000000	     */		save	%sp,-128,%sp
+! FILE montmulf.c
+
+!   36		      !#define RF_INLINE_MACROS
+!   38		      !static const double TwoTo16=65536.0;
+!   39		      !static const double TwoToMinus16=1.0/65536.0;
+!   40		      !static const double Zero=0.0;
+!   41		      !static const double TwoTo32=65536.0*65536.0;
+!   42		      !static const double TwoToMinus32=1.0/(65536.0*65536.0);
+!   44		      !#ifdef RF_INLINE_MACROS
+!   46		      !double upper32(double);
+!   47		      !double lower32(double, double);
+!   48		      !double mod(double, double, double);
+!   50		      !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/, 
+!   51		      !			  const double * /* 2^16*/,
+!   52		      !			  const double * /* 0 */,
+!   53		      !			  double *       /*result16*/, 
+!   54		      !			  double *       /* result32 */,
+!   55		      !			  float *  /*source - should be unsigned int*
+!   56		      !		          	       converted to float* */);
+!   58		      !#else
+!   60		      !static double upper32(double x)
+!   61		      !{
+!   62		      !  return floor(x*TwoToMinus32);
+!   63		      !}
+!   65		      !static double lower32(double x, double y)
+!   66		      !{
+!   67		      !  return x-TwoTo32*floor(x*TwoToMinus32);
+!   68		      !}
+!   70		      !static double mod(double x, double oneoverm, double m)
+!   71		      !{
+!   72		      !  return x-m*floor(x*oneoverm);
+!   73		      !}
+!   75		      !#endif
+!   78		      !static void cleanup(double *dt, int from, int tlen)
+!   79		      !{
+!   80		      ! int i;
+!   81		      ! double tmp,tmp1,x,x1;
+!   83		      ! tmp=tmp1=Zero;
+!   84		      ! /* original code **
+!   85		      ! for(i=2*from;i<2*tlen-2;i++)
+!   86		      !   {
+!   87		      !     x=dt[i];
+!   88		      !     dt[i]=lower32(x,Zero)+tmp1;
+!   89		      !     tmp1=tmp;
+!   90		      !     tmp=upper32(x);
+!   91		      !   }
+!   92		      ! dt[tlen-2]+=tmp1;
+!   93		      ! dt[tlen-1]+=tmp;
+!   94		      ! **end original code ***/
+!   95		      ! /* new code ***/
+!   96		      ! for(i=2*from;i<2*tlen;i+=2)
+!   97		      !   {
+!   98		      !     x=dt[i];
+!   99		      !     x1=dt[i+1];
+!  100		      !     dt[i]=lower32(x,Zero)+tmp;
+!  101		      !     dt[i+1]=lower32(x1,Zero)+tmp1;
+!  102		      !     tmp=upper32(x);
+!  103		      !     tmp1=upper32(x1);
+!  104		      !   }
+!  105		      !  /** end new code **/
+!  106		      !}
+!  109		      !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+!  110		      !{
+!  111		      !int i;
+!  112		      !long long t, t1, a, b, c, d;
+!  114		      ! t1=0;
+!  115		      ! a=(long long)d16[0];
+
+/* 0x0004	 115 */		ldd	[%i1],%f0
+/* 0x0008	 110 */		or	%g0,%i1,%o0
+
+!  116		      ! b=(long long)d16[1];
+!  117		      ! for(i=0; i<ilen-1; i++)
+
+/* 0x000c	 117 */		sub	%i3,1,%g2
+/* 0x0010	     */		cmp	%g2,0
+/* 0x0014	 114 */		or	%g0,0,%o4
+/* 0x0018	 115 */		fdtox	%f0,%f0
+/* 0x001c	     */		std	%f0,[%sp+120]
+/* 0x0020	 117 */		or	%g0,0,%o7
+/* 0x0024	 110 */		or	%g0,%i3,%o1
+/* 0x0028	     */		sub	%i3,2,%o2
+/* 0x002c	 116 */		ldd	[%o0+8],%f0
+/* 0x0030	 110 */		sethi	%hi(0xfc00),%o1
+/* 0x0034	     */		add	%o2,1,%g3
+/* 0x0038	     */		add	%o1,1023,%o1
+/* 0x003c	     */		or	%g0,%i0,%o5
+/* 0x0040	 116 */		fdtox	%f0,%f0
+/* 0x0044	     */		std	%f0,[%sp+112]
+/* 0x0048	     */		ldx	[%sp+112],%g1
+/* 0x004c	 115 */		ldx	[%sp+120],%g4
+/* 0x0050	 117 */		ble,pt	%icc,.L900000117
+/* 0x0054	     */		sethi	%hi(0xfc00),%g2
+/* 0x0058	 110 */		or	%g0,-1,%g2
+/* 0x005c	 117 */		cmp	%g3,3
+/* 0x0060	 110 */		srl	%g2,0,%o3
+/* 0x0064	 117 */		bl,pn	%icc,.L77000134
+/* 0x0068	     */		or	%g0,%o0,%g2
+
+!  118		      !   {
+!  119		      !     c=(long long)d16[2*i+2];
+
+/* 0x006c	 119 */		ldd	[%o0+16],%f0
+
+!  120		      !     t1+=a&0xffffffff;
+!  121		      !     t=(a>>32);
+!  122		      !     d=(long long)d16[2*i+3];
+!  123		      !     t1+=(b&0xffff)<<16;
+!  124		      !     t+=(b>>16)+(t1>>32);
+!  125		      !     i32[i]=t1&0xffffffff;
+!  126		      !     t1=t;
+!  127		      !     a=c;
+!  128		      !     b=d;
+
+/* 0x0070	 128 */		add	%o0,16,%g2
+/* 0x0074	 123 */		and	%g1,%o1,%o0
+/* 0x0078	     */		sllx	%o0,16,%g3
+/* 0x007c	 120 */		and	%g4,%o3,%o0
+/* 0x0080	 117 */		add	%o0,%g3,%o4
+/* 0x0084	 119 */		fdtox	%f0,%f0
+/* 0x0088	     */		std	%f0,[%sp+104]
+/* 0x008c	 125 */		and	%o4,%o3,%g5
+/* 0x0090	 122 */		ldd	[%g2+8],%f2
+/* 0x0094	 128 */		add	%o5,4,%o5
+/* 0x0098	 124 */		srax	%o4,32,%o4
+/* 0x009c	     */		stx	%o4,[%sp+112]
+/* 0x00a0	 122 */		fdtox	%f2,%f0
+/* 0x00a4	     */		std	%f0,[%sp+96]
+/* 0x00a8	 124 */		srax	%g1,16,%o0
+/* 0x00ac	     */		ldx	[%sp+112],%o7
+/* 0x00b0	 121 */		srax	%g4,32,%o4
+/* 0x00b4	 124 */		add	%o0,%o7,%g4
+/* 0x00b8	 128 */		or	%g0,1,%o7
+/* 0x00bc	 119 */		ldx	[%sp+104],%g3
+/* 0x00c0	 124 */		add	%o4,%g4,%o4
+/* 0x00c4	 122 */		ldx	[%sp+96],%g1
+/* 0x00c8	 125 */		st	%g5,[%o5-4]
+/* 0x00cc	 127 */		or	%g0,%g3,%g4
+                       .L900000112:
+/* 0x00d0	 119 */		ldd	[%g2+16],%f0
+/* 0x00d4	 128 */		add	%o7,1,%o7
+/* 0x00d8	     */		add	%o5,4,%o5
+/* 0x00dc	     */		cmp	%o7,%o2
+/* 0x00e0	     */		add	%g2,16,%g2
+/* 0x00e4	 119 */		fdtox	%f0,%f0
+/* 0x00e8	     */		std	%f0,[%sp+104]
+/* 0x00ec	 122 */		ldd	[%g2+8],%f0
+/* 0x00f0	     */		fdtox	%f0,%f0
+/* 0x00f4	     */		std	%f0,[%sp+96]
+/* 0x00f8	 123 */		and	%g1,%o1,%g3
+/* 0x00fc	     */		sllx	%g3,16,%g5
+/* 0x0100	 120 */		and	%g4,%o3,%g3
+/* 0x0104	 117 */		add	%g3,%g5,%g3
+/* 0x0108	 124 */		srax	%g1,16,%g1
+/* 0x010c	 117 */		add	%g3,%o4,%g3
+/* 0x0110	 124 */		srax	%g3,32,%o4
+/* 0x0114	     */		stx	%o4,[%sp+112]
+/* 0x0118	 119 */		ldx	[%sp+104],%g5
+/* 0x011c	 121 */		srax	%g4,32,%o4
+/* 0x0120	 124 */		ldx	[%sp+112],%g4
+/* 0x0124	     */		add	%g1,%g4,%g4
+/* 0x0128	 122 */		ldx	[%sp+96],%g1
+/* 0x012c	 124 */		add	%o4,%g4,%o4
+/* 0x0130	 125 */		and	%g3,%o3,%g3
+/* 0x0134	 127 */		or	%g0,%g5,%g4
+/* 0x0138	 128 */		ble,pt	%icc,.L900000112
+/* 0x013c	     */		st	%g3,[%o5-4]
+                       .L900000115:
+/* 0x0140	 128 */		ba	.L900000117
+/* 0x0144	     */		sethi	%hi(0xfc00),%g2
+                       .L77000134:
+/* 0x0148	 119 */		ldd	[%g2+16],%f0
+                       .L900000116:
+/* 0x014c	 120 */		and	%g4,%o3,%o0
+/* 0x0150	 123 */		and	%g1,%o1,%g3
+/* 0x0154	 119 */		fdtox	%f0,%f0
+/* 0x0158	 120 */		add	%o4,%o0,%o0
+/* 0x015c	 119 */		std	%f0,[%sp+104]
+/* 0x0160	 128 */		add	%o7,1,%o7
+/* 0x0164	 123 */		sllx	%g3,16,%o4
+/* 0x0168	 122 */		ldd	[%g2+24],%f2
+/* 0x016c	 128 */		add	%g2,16,%g2
+/* 0x0170	 123 */		add	%o0,%o4,%o0
+/* 0x0174	 128 */		cmp	%o7,%o2
+/* 0x0178	 125 */		and	%o0,%o3,%g3
+/* 0x017c	 122 */		fdtox	%f2,%f0
+/* 0x0180	     */		std	%f0,[%sp+96]
+/* 0x0184	 124 */		srax	%o0,32,%o0
+/* 0x0188	     */		stx	%o0,[%sp+112]
+/* 0x018c	 121 */		srax	%g4,32,%o4
+/* 0x0190	 122 */		ldx	[%sp+96],%o0
+/* 0x0194	 124 */		srax	%g1,16,%g5
+/* 0x0198	     */		ldx	[%sp+112],%g4
+/* 0x019c	 119 */		ldx	[%sp+104],%g1
+/* 0x01a0	 125 */		st	%g3,[%o5]
+/* 0x01a4	 124 */		add	%g5,%g4,%g4
+/* 0x01a8	 128 */		add	%o5,4,%o5
+/* 0x01ac	 124 */		add	%o4,%g4,%o4
+/* 0x01b0	 127 */		or	%g0,%g1,%g4
+/* 0x01b4	 128 */		or	%g0,%o0,%g1
+/* 0x01b8	     */		ble,a,pt	%icc,.L900000116
+/* 0x01bc	     */		ldd	[%g2+16],%f0
+                       .L77000127:
+
+!  129		      !   }
+!  130		      !     t1+=a&0xffffffff;
+!  131		      !     t=(a>>32);
+!  132		      !     t1+=(b&0xffff)<<16;
+!  133		      !     i32[i]=t1&0xffffffff;
+
+/* 0x01c0	 133 */		sethi	%hi(0xfc00),%g2
+                       .L900000117:
+/* 0x01c4	 133 */		or	%g0,-1,%g3
+/* 0x01c8	     */		add	%g2,1023,%g2
+/* 0x01cc	     */		srl	%g3,0,%g3
+/* 0x01d0	     */		and	%g1,%g2,%g2
+/* 0x01d4	     */		and	%g4,%g3,%g4
+/* 0x01d8	     */		sllx	%g2,16,%g2
+/* 0x01dc	     */		add	%o4,%g4,%g4
+/* 0x01e0	     */		add	%g4,%g2,%g2
+/* 0x01e4	     */		sll	%o7,2,%g4
+/* 0x01e8	     */		and	%g2,%g3,%g2
+/* 0x01ec	     */		st	%g2,[%i0+%g4]
+/* 0x01f0	     */		ret	! Result = 
+/* 0x01f4	     */		restore	%g0,%g0,%g0
+/* 0x01f8	   0 */		.type	conv_d16_to_i32,2
+/* 0x01f8	     */		.size	conv_d16_to_i32,(.-conv_d16_to_i32)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	8
+!
+! CONSTANT POOL
+!
+                       .L_const_seg_900000201:
+/* 000000	   0 */		.word	1127219200,0
+/* 0x0008	   0 */		.align	4
+/* 0x0008	     */		.skip	16
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global conv_i32_to_d32
+                       conv_i32_to_d32:
+/* 000000	     */		or	%g0,%o7,%g2
+
+!  135		      !}
+!  137		      !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+!  138		      !{
+!  139		      !int i;
+!  141		      !#pragma pipeloop(0)
+!  142		      ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004	 142 */		cmp	%o2,0
+                       .L900000210:
+/* 0x0008	     */		call	.+8
+/* 0x000c	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0010	 142 */		or	%g0,0,%o5
+/* 0x0014	 138 */		add	%g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0018	     */		or	%g0,%o0,%g5
+/* 0x001c	     */		add	%g4,%o7,%g1
+/* 0x0020	 142 */		ble,pt	%icc,.L77000140
+/* 0x0024	     */		or	%g0,%g2,%o7
+/* 0x0028	     */		sethi	%hi(.L_const_seg_900000201),%g2
+/* 0x002c	 138 */		or	%g0,%o1,%g4
+/* 0x0030	 142 */		add	%g2,%lo(.L_const_seg_900000201),%g2
+/* 0x0034	     */		sub	%o2,1,%g3
+/* 0x0038	     */		ld	[%g1+%g2],%g2
+/* 0x003c	     */		cmp	%o2,9
+/* 0x0040	     */		bl,pn	%icc,.L77000144
+/* 0x0044	     */		ldd	[%g2],%f8
+/* 0x0048	     */		add	%o1,16,%g4
+/* 0x004c	     */		sub	%o2,5,%g1
+/* 0x0050	     */		ld	[%o1],%f7
+/* 0x0054	     */		or	%g0,4,%o5
+/* 0x0058	     */		ld	[%o1+4],%f5
+/* 0x005c	     */		ld	[%o1+8],%f3
+/* 0x0060	     */		fmovs	%f8,%f6
+/* 0x0064	     */		ld	[%o1+12],%f1
+                       .L900000205:
+/* 0x0068	     */		ld	[%g4],%f11
+/* 0x006c	     */		add	%o5,5,%o5
+/* 0x0070	     */		add	%g4,20,%g4
+/* 0x0074	     */		fsubd	%f6,%f8,%f6
+/* 0x0078	     */		std	%f6,[%g5]
+/* 0x007c	     */		cmp	%o5,%g1
+/* 0x0080	     */		add	%g5,40,%g5
+/* 0x0084	     */		fmovs	%f8,%f4
+/* 0x0088	     */		ld	[%g4-16],%f7
+/* 0x008c	     */		fsubd	%f4,%f8,%f12
+/* 0x0090	     */		fmovs	%f8,%f2
+/* 0x0094	     */		std	%f12,[%g5-32]
+/* 0x0098	     */		ld	[%g4-12],%f5
+/* 0x009c	     */		fsubd	%f2,%f8,%f12
+/* 0x00a0	     */		fmovs	%f8,%f0
+/* 0x00a4	     */		std	%f12,[%g5-24]
+/* 0x00a8	     */		ld	[%g4-8],%f3
+/* 0x00ac	     */		fsubd	%f0,%f8,%f12
+/* 0x00b0	     */		fmovs	%f8,%f10
+/* 0x00b4	     */		std	%f12,[%g5-16]
+/* 0x00b8	     */		ld	[%g4-4],%f1
+/* 0x00bc	     */		fsubd	%f10,%f8,%f10
+/* 0x00c0	     */		fmovs	%f8,%f6
+/* 0x00c4	     */		ble,pt	%icc,.L900000205
+/* 0x00c8	     */		std	%f10,[%g5-8]
+                       .L900000208:
+/* 0x00cc	     */		fmovs	%f8,%f4
+/* 0x00d0	     */		add	%g5,32,%g5
+/* 0x00d4	     */		cmp	%o5,%g3
+/* 0x00d8	     */		fmovs	%f8,%f2
+/* 0x00dc	     */		fmovs	%f8,%f0
+/* 0x00e0	     */		fsubd	%f6,%f8,%f6
+/* 0x00e4	     */		std	%f6,[%g5-32]
+/* 0x00e8	     */		fsubd	%f4,%f8,%f4
+/* 0x00ec	     */		std	%f4,[%g5-24]
+/* 0x00f0	     */		fsubd	%f2,%f8,%f2
+/* 0x00f4	     */		std	%f2,[%g5-16]
+/* 0x00f8	     */		fsubd	%f0,%f8,%f0
+/* 0x00fc	     */		bg,pn	%icc,.L77000140
+/* 0x0100	     */		std	%f0,[%g5-8]
+                       .L77000144:
+/* 0x0104	     */		ld	[%g4],%f1
+                       .L900000211:
+/* 0x0108	     */		ldd	[%g2],%f8
+/* 0x010c	     */		add	%o5,1,%o5
+/* 0x0110	     */		add	%g4,4,%g4
+/* 0x0114	     */		cmp	%o5,%g3
+/* 0x0118	     */		fmovs	%f8,%f0
+/* 0x011c	     */		fsubd	%f0,%f8,%f0
+/* 0x0120	     */		std	%f0,[%g5]
+/* 0x0124	     */		add	%g5,8,%g5
+/* 0x0128	     */		ble,a,pt	%icc,.L900000211
+/* 0x012c	     */		ld	[%g4],%f1
+                       .L77000140:
+/* 0x0130	     */		retl	! Result = 
+/* 0x0134	     */		nop
+/* 0x0138	   0 */		.type	conv_i32_to_d32,2
+/* 0x0138	     */		.size	conv_i32_to_d32,(.-conv_i32_to_d32)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	8
+!
+! CONSTANT POOL
+!
+                       .L_const_seg_900000301:
+/* 000000	   0 */		.word	1127219200,0
+/* 0x0008	   0 */		.align	4
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global conv_i32_to_d16
+                       conv_i32_to_d16:
+/* 000000	     */		save	%sp,-104,%sp
+/* 0x0004	     */		or	%g0,%i2,%o0
+
+!  143		      !}
+!  146		      !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+!  147		      !{
+!  148		      !int i;
+!  149		      !unsigned int a;
+!  151		      !#pragma pipeloop(0)
+!  152		      ! for(i=0;i<len;i++)
+!  153		      !   {
+!  154		      !     a=i32[i];
+!  155		      !     d16[2*i]=(double)(a&0xffff);
+!  156		      !     d16[2*i+1]=(double)(a>>16);
+
+/* 0x0008	 156 */		sethi	%hi(.L_const_seg_900000301),%g2
+                       .L900000310:
+/* 0x000c	     */		call	.+8
+/* 0x0010	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x0014	 152 */		cmp	%o0,0
+/* 0x0018	 147 */		add	%g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x001c	 152 */		ble,pt	%icc,.L77000150
+/* 0x0020	     */		add	%g3,%o7,%o2
+/* 0x0024	     */		sub	%i2,1,%o5
+/* 0x0028	 156 */		add	%g2,%lo(.L_const_seg_900000301),%o1
+/* 0x002c	 152 */		sethi	%hi(0xfc00),%o0
+/* 0x0030	     */		ld	[%o2+%o1],%o3
+/* 0x0034	     */		add	%o5,1,%g2
+/* 0x0038	     */		or	%g0,0,%g1
+/* 0x003c	     */		cmp	%g2,3
+/* 0x0040	     */		or	%g0,%i1,%o7
+/* 0x0044	     */		add	%o0,1023,%o4
+/* 0x0048	     */		or	%g0,%i0,%g3
+/* 0x004c	     */		bl,pn	%icc,.L77000154
+/* 0x0050	     */		add	%o7,4,%o0
+/* 0x0054	 155 */		ldd	[%o3],%f0
+/* 0x0058	 156 */		or	%g0,1,%g1
+/* 0x005c	 154 */		ld	[%o0-4],%o1
+/* 0x0060	   0 */		or	%g0,%o0,%o7
+/* 0x0064	 155 */		and	%o1,%o4,%o0
+                       .L900000306:
+/* 0x0068	 155 */		st	%o0,[%sp+96]
+/* 0x006c	 156 */		add	%g1,1,%g1
+/* 0x0070	     */		add	%g3,16,%g3
+/* 0x0074	     */		cmp	%g1,%o5
+/* 0x0078	     */		add	%o7,4,%o7
+/* 0x007c	 155 */		ld	[%sp+96],%f3
+/* 0x0080	     */		fmovs	%f0,%f2
+/* 0x0084	     */		fsubd	%f2,%f0,%f2
+/* 0x0088	 156 */		srl	%o1,16,%o0
+/* 0x008c	 155 */		std	%f2,[%g3-16]
+/* 0x0090	 156 */		st	%o0,[%sp+92]
+/* 0x0094	     */		ld	[%sp+92],%f3
+/* 0x0098	 154 */		ld	[%o7-4],%o1
+/* 0x009c	 156 */		fmovs	%f0,%f2
+/* 0x00a0	     */		fsubd	%f2,%f0,%f2
+/* 0x00a4	 155 */		and	%o1,%o4,%o0
+/* 0x00a8	 156 */		ble,pt	%icc,.L900000306
+/* 0x00ac	     */		std	%f2,[%g3-8]
+                       .L900000309:
+/* 0x00b0	 155 */		st	%o0,[%sp+96]
+/* 0x00b4	     */		fmovs	%f0,%f2
+/* 0x00b8	 156 */		add	%g3,16,%g3
+/* 0x00bc	     */		srl	%o1,16,%o0
+/* 0x00c0	 155 */		ld	[%sp+96],%f3
+/* 0x00c4	     */		fsubd	%f2,%f0,%f2
+/* 0x00c8	     */		std	%f2,[%g3-16]
+/* 0x00cc	 156 */		st	%o0,[%sp+92]
+/* 0x00d0	     */		fmovs	%f0,%f2
+/* 0x00d4	     */		ld	[%sp+92],%f3
+/* 0x00d8	     */		fsubd	%f2,%f0,%f0
+/* 0x00dc	     */		std	%f0,[%g3-8]
+/* 0x00e0	     */		ret	! Result = 
+/* 0x00e4	     */		restore	%g0,%g0,%g0
+                       .L77000154:
+/* 0x00e8	 154 */		ld	[%o7],%o0
+                       .L900000311:
+/* 0x00ec	 155 */		and	%o0,%o4,%o1
+/* 0x00f0	     */		st	%o1,[%sp+96]
+/* 0x00f4	 156 */		add	%g1,1,%g1
+/* 0x00f8	 155 */		ldd	[%o3],%f0
+/* 0x00fc	 156 */		srl	%o0,16,%o0
+/* 0x0100	     */		add	%o7,4,%o7
+/* 0x0104	     */		cmp	%g1,%o5
+/* 0x0108	 155 */		fmovs	%f0,%f2
+/* 0x010c	     */		ld	[%sp+96],%f3
+/* 0x0110	     */		fsubd	%f2,%f0,%f2
+/* 0x0114	     */		std	%f2,[%g3]
+/* 0x0118	 156 */		st	%o0,[%sp+92]
+/* 0x011c	     */		fmovs	%f0,%f2
+/* 0x0120	     */		ld	[%sp+92],%f3
+/* 0x0124	     */		fsubd	%f2,%f0,%f0
+/* 0x0128	     */		std	%f0,[%g3+8]
+/* 0x012c	     */		add	%g3,16,%g3
+/* 0x0130	     */		ble,a,pt	%icc,.L900000311
+/* 0x0134	     */		ld	[%o7],%o0
+                       .L77000150:
+/* 0x0138	     */		ret	! Result = 
+/* 0x013c	     */		restore	%g0,%g0,%g0
+/* 0x0140	   0 */		.type	conv_i32_to_d16,2
+/* 0x0140	     */		.size	conv_i32_to_d16,(.-conv_i32_to_d16)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	8
+!
+! CONSTANT POOL
+!
+                       .L_const_seg_900000401:
+/* 000000	   0 */		.word	1127219200,0
+/* 0x0008	   0 */		.align	4
+/* 0x0008	     */		.skip	16
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global conv_i32_to_d32_and_d16
+                       conv_i32_to_d32_and_d16:
+/* 000000	     */		save	%sp,-120,%sp
+                       .L900000415:
+/* 0x0004	     */		call	.+8
+/* 0x0008	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
+
+!  157		      !   }
+!  158		      !}
+!  161		      !void conv_i32_to_d32_and_d16(double *d32, double *d16, 
+!  162		      !			     unsigned int *i32, int len)
+!  163		      !{
+!  164		      !int i = 0;
+!  165		      !unsigned int a;
+!  167		      !#pragma pipeloop(0)
+!  168		      !#ifdef RF_INLINE_MACROS
+!  169		      ! for(;i<len-3;i+=4)
+
+/* 0x000c	 169 */		sub	%i3,3,%g2
+/* 0x0010	     */		cmp	%g2,0
+/* 0x0014	 163 */		add	%g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
+
+!  170		      !   {
+!  171		      !     i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+!  172		      !			  &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x0018	 172 */		sethi	%hi(Zero),%g2
+/* 0x001c	 163 */		add	%g4,%o7,%o4
+/* 0x0020	 172 */		add	%g2,%lo(Zero),%g2
+/* 0x0024	     */		sethi	%hi(TwoToMinus16),%g3
+/* 0x0028	     */		ld	[%o4+%g2],%o1
+/* 0x002c	     */		sethi	%hi(TwoTo16),%g4
+/* 0x0030	     */		add	%g3,%lo(TwoToMinus16),%g2
+/* 0x0034	     */		ld	[%o4+%g2],%o3
+/* 0x0038	 164 */		or	%g0,0,%g5
+/* 0x003c	 172 */		add	%g4,%lo(TwoTo16),%g3
+/* 0x0040	     */		ld	[%o4+%g3],%o2
+/* 0x0044	 163 */		or	%g0,%i0,%i4
+/* 0x0048	 169 */		or	%g0,%i2,%o7
+/* 0x004c	     */		ble,pt	%icc,.L900000418
+/* 0x0050	     */		cmp	%g5,%i3
+/* 0x0054	 172 */		stx	%o7,[%sp+104]
+/* 0x0058	 169 */		sub	%i3,4,%o5
+/* 0x005c	     */		or	%g0,0,%g4
+/* 0x0060	     */		or	%g0,0,%g1
+                       .L900000417:
+/* 0x0064	     */		ldd	[%o1],%f2
+/* 0x0068	 172 */		add	%i4,%g4,%g2
+/* 0x006c	     */		add	%i1,%g1,%g3
+/* 0x0070	     */		ldd	[%o3],%f0
+/* 0x0074	     */		add	%g5,4,%g5
+/* 0x0078	     */		fmovd	%f2,%f14
+/* 0x007c	     */		ld	[%o7],%f15
+/* 0x0080	     */		cmp	%g5,%o5
+/* 0x0084	     */		fmovd	%f2,%f10
+/* 0x0088	     */		ld	[%o7+4],%f11
+/* 0x008c	     */		add	%o7,16,%o7
+/* 0x0090	     */		ldx	[%sp+104],%o0
+/* 0x0094	     */		fmovd	%f2,%f6
+/* 0x0098	     */		stx	%o7,[%sp+112]
+/* 0x009c	     */		fxtod	%f14,%f14
+/* 0x00a0	     */		ld	[%o0+8],%f7
+/* 0x00a4	     */		fxtod	%f10,%f10
+/* 0x00a8	     */		ld	[%o0+12],%f3
+/* 0x00ac	     */		fxtod	%f6,%f6
+/* 0x00b0	     */		ldd	[%o2],%f16
+/* 0x00b4	     */		fmuld	%f0,%f14,%f12
+/* 0x00b8	     */		fxtod	%f2,%f2
+/* 0x00bc	     */		fmuld	%f0,%f10,%f8
+/* 0x00c0	     */		std	%f14,[%i4+%g4]
+/* 0x00c4	     */		ldx	[%sp+112],%o7
+/* 0x00c8	     */		add	%g4,32,%g4
+/* 0x00cc	     */		fmuld	%f0,%f6,%f4
+/* 0x00d0	     */		fdtox	%f12,%f12
+/* 0x00d4	     */		std	%f10,[%g2+8]
+/* 0x00d8	     */		fmuld	%f0,%f2,%f0
+/* 0x00dc	     */		fdtox	%f8,%f8
+/* 0x00e0	     */		std	%f6,[%g2+16]
+/* 0x00e4	     */		std	%f2,[%g2+24]
+/* 0x00e8	     */		fdtox	%f4,%f4
+/* 0x00ec	     */		fdtox	%f0,%f0
+/* 0x00f0	     */		fxtod	%f12,%f12
+/* 0x00f4	     */		std	%f12,[%g3+8]
+/* 0x00f8	     */		fxtod	%f8,%f8
+/* 0x00fc	     */		std	%f8,[%g3+24]
+/* 0x0100	     */		fxtod	%f4,%f4
+/* 0x0104	     */		std	%f4,[%g3+40]
+/* 0x0108	     */		fxtod	%f0,%f0
+/* 0x010c	     */		std	%f0,[%g3+56]
+/* 0x0110	     */		fmuld	%f12,%f16,%f12
+/* 0x0114	     */		fmuld	%f8,%f16,%f8
+/* 0x0118	     */		fmuld	%f4,%f16,%f4
+/* 0x011c	     */		fsubd	%f14,%f12,%f12
+/* 0x0120	     */		std	%f12,[%i1+%g1]
+/* 0x0124	     */		fmuld	%f0,%f16,%f0
+/* 0x0128	     */		fsubd	%f10,%f8,%f8
+/* 0x012c	     */		std	%f8,[%g3+16]
+/* 0x0130	     */		add	%g1,64,%g1
+/* 0x0134	     */		fsubd	%f6,%f4,%f4
+/* 0x0138	     */		std	%f4,[%g3+32]
+/* 0x013c	     */		fsubd	%f2,%f0,%f0
+/* 0x0140	     */		std	%f0,[%g3+48]
+/* 0x0144	     */		ble,a,pt	%icc,.L900000417
+/* 0x0148	     */		stx	%o7,[%sp+104]
+                       .L77000159:
+
+!  173		      !   }
+!  174		      !#endif
+!  175		      ! for(;i<len;i++)
+
+/* 0x014c	 175 */		cmp	%g5,%i3
+                       .L900000418:
+/* 0x0150	 175 */		bge,pt	%icc,.L77000164
+/* 0x0154	     */		nop
+
+!  176		      !   {
+!  177		      !     a=i32[i];
+!  178		      !     d32[i]=(double)(i32[i]);
+!  179		      !     d16[2*i]=(double)(a&0xffff);
+!  180		      !     d16[2*i+1]=(double)(a>>16);
+
+/* 0x0158	 180 */		sethi	%hi(.L_const_seg_900000401),%g2
+/* 0x015c	     */		add	%g2,%lo(.L_const_seg_900000401),%o1
+/* 0x0160	 175 */		sethi	%hi(0xfc00),%o0
+/* 0x0164	     */		ld	[%o4+%o1],%o2
+/* 0x0168	     */		sll	%g5,2,%o3
+/* 0x016c	     */		sub	%i3,%g5,%g3
+/* 0x0170	     */		sll	%g5,3,%g2
+/* 0x0174	     */		add	%o0,1023,%o4
+/* 0x0178	 178 */		ldd	[%o2],%f0
+/* 0x017c	     */		add	%i2,%o3,%o0
+/* 0x0180	 175 */		cmp	%g3,3
+/* 0x0184	     */		add	%i4,%g2,%o3
+/* 0x0188	     */		sub	%i3,1,%o1
+/* 0x018c	     */		sll	%g5,4,%g4
+/* 0x0190	     */		bl,pn	%icc,.L77000161
+/* 0x0194	     */		add	%i1,%g4,%o5
+/* 0x0198	 178 */		ld	[%o0],%f3
+/* 0x019c	 180 */		add	%o3,8,%o3
+/* 0x01a0	 177 */		ld	[%o0],%o7
+/* 0x01a4	 180 */		add	%o5,16,%o5
+/* 0x01a8	     */		add	%g5,1,%g5
+/* 0x01ac	 178 */		fmovs	%f0,%f2
+/* 0x01b0	 180 */		add	%o0,4,%o0
+/* 0x01b4	 179 */		and	%o7,%o4,%g1
+/* 0x01b8	 178 */		fsubd	%f2,%f0,%f2
+/* 0x01bc	     */		std	%f2,[%o3-8]
+/* 0x01c0	 180 */		srl	%o7,16,%o7
+/* 0x01c4	 179 */		st	%g1,[%sp+96]
+/* 0x01c8	     */		fmovs	%f0,%f2
+/* 0x01cc	     */		ld	[%sp+96],%f3
+/* 0x01d0	     */		fsubd	%f2,%f0,%f2
+/* 0x01d4	     */		std	%f2,[%o5-16]
+/* 0x01d8	 180 */		st	%o7,[%sp+92]
+/* 0x01dc	     */		fmovs	%f0,%f2
+/* 0x01e0	     */		ld	[%sp+92],%f3
+/* 0x01e4	     */		fsubd	%f2,%f0,%f2
+/* 0x01e8	     */		std	%f2,[%o5-8]
+                       .L900000411:
+/* 0x01ec	 178 */		ld	[%o0],%f3
+/* 0x01f0	 180 */		add	%g5,2,%g5
+/* 0x01f4	     */		add	%o5,32,%o5
+/* 0x01f8	 177 */		ld	[%o0],%o7
+/* 0x01fc	 180 */		cmp	%g5,%o1
+/* 0x0200	     */		add	%o3,16,%o3
+/* 0x0204	 178 */		fmovs	%f0,%f2
+/* 0x0208	     */		fsubd	%f2,%f0,%f2
+/* 0x020c	     */		std	%f2,[%o3-16]
+/* 0x0210	 179 */		and	%o7,%o4,%g1
+/* 0x0214	     */		st	%g1,[%sp+96]
+/* 0x0218	     */		ld	[%sp+96],%f3
+/* 0x021c	     */		fmovs	%f0,%f2
+/* 0x0220	     */		fsubd	%f2,%f0,%f2
+/* 0x0224	 180 */		srl	%o7,16,%o7
+/* 0x0228	 179 */		std	%f2,[%o5-32]
+/* 0x022c	 180 */		st	%o7,[%sp+92]
+/* 0x0230	     */		ld	[%sp+92],%f3
+/* 0x0234	     */		fmovs	%f0,%f2
+/* 0x0238	     */		fsubd	%f2,%f0,%f2
+/* 0x023c	     */		std	%f2,[%o5-24]
+/* 0x0240	     */		add	%o0,4,%o0
+/* 0x0244	 178 */		ld	[%o0],%f3
+/* 0x0248	 177 */		ld	[%o0],%o7
+/* 0x024c	 178 */		fmovs	%f0,%f2
+/* 0x0250	     */		fsubd	%f2,%f0,%f2
+/* 0x0254	     */		std	%f2,[%o3-8]
+/* 0x0258	 179 */		and	%o7,%o4,%g1
+/* 0x025c	     */		st	%g1,[%sp+96]
+/* 0x0260	     */		ld	[%sp+96],%f3
+/* 0x0264	     */		fmovs	%f0,%f2
+/* 0x0268	     */		fsubd	%f2,%f0,%f2
+/* 0x026c	 180 */		srl	%o7,16,%o7
+/* 0x0270	 179 */		std	%f2,[%o5-16]
+/* 0x0274	 180 */		st	%o7,[%sp+92]
+/* 0x0278	     */		ld	[%sp+92],%f3
+/* 0x027c	     */		fmovs	%f0,%f2
+/* 0x0280	     */		fsubd	%f2,%f0,%f2
+/* 0x0284	     */		std	%f2,[%o5-8]
+/* 0x0288	     */		bl,pt	%icc,.L900000411
+/* 0x028c	     */		add	%o0,4,%o0
+                       .L900000414:
+/* 0x0290	 180 */		cmp	%g5,%i3
+/* 0x0294	     */		bge,pn	%icc,.L77000164
+/* 0x0298	     */		nop
+                       .L77000161:
+/* 0x029c	 178 */		ld	[%o0],%f3
+                       .L900000416:
+/* 0x02a0	 178 */		ldd	[%o2],%f0
+/* 0x02a4	 180 */		add	%g5,1,%g5
+/* 0x02a8	 177 */		ld	[%o0],%o1
+/* 0x02ac	 180 */		add	%o0,4,%o0
+/* 0x02b0	     */		cmp	%g5,%i3
+/* 0x02b4	 178 */		fmovs	%f0,%f2
+/* 0x02b8	 179 */		and	%o1,%o4,%o7
+/* 0x02bc	 178 */		fsubd	%f2,%f0,%f2
+/* 0x02c0	     */		std	%f2,[%o3]
+/* 0x02c4	 180 */		srl	%o1,16,%o1
+/* 0x02c8	 179 */		st	%o7,[%sp+96]
+/* 0x02cc	 180 */		add	%o3,8,%o3
+/* 0x02d0	 179 */		fmovs	%f0,%f2
+/* 0x02d4	     */		ld	[%sp+96],%f3
+/* 0x02d8	     */		fsubd	%f2,%f0,%f2
+/* 0x02dc	     */		std	%f2,[%o5]
+/* 0x02e0	 180 */		st	%o1,[%sp+92]
+/* 0x02e4	     */		fmovs	%f0,%f2
+/* 0x02e8	     */		ld	[%sp+92],%f3
+/* 0x02ec	     */		fsubd	%f2,%f0,%f0
+/* 0x02f0	     */		std	%f0,[%o5+8]
+/* 0x02f4	     */		add	%o5,16,%o5
+/* 0x02f8	     */		bl,a,pt	%icc,.L900000416
+/* 0x02fc	     */		ld	[%o0],%f3
+                       .L77000164:
+/* 0x0300	     */		ret	! Result = 
+/* 0x0304	     */		restore	%g0,%g0,%g0
+/* 0x0308	   0 */		.type	conv_i32_to_d32_and_d16,2
+/* 0x0308	     */		.size	conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	4
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global adjust_montf_result
+                       adjust_montf_result:
+/* 000000	     */		or	%g0,%o2,%g5
+
+!  181		      !   }
+!  182		      !}
+!  185		      !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+!  186		      !{
+!  187		      !long long acc;
+!  188		      !int i;
+!  190		      ! if(i32[len]>0) i=-1;
+
+/* 0x0004	 190 */		or	%g0,-1,%g4
+/* 0x0008	     */		sll	%o2,2,%g1
+/* 0x000c	     */		ld	[%o0+%g1],%g1
+/* 0x0010	     */		cmp	%g1,0
+/* 0x0014	     */		bleu,pn	%icc,.L77000175
+/* 0x0018	     */		or	%g0,%o1,%o3
+/* 0x001c	     */		ba	.L900000511
+/* 0x0020	     */		cmp	%g4,0
+                       .L77000175:
+
+!  191		      ! else
+!  192		      !   {
+!  193		      !     for(i=len-1; i>=0; i--)
+
+/* 0x0024	 193 */		sub	%o2,1,%g4
+/* 0x0028	     */		sll	%g4,2,%g1
+/* 0x002c	     */		cmp	%g4,0
+/* 0x0030	     */		bl,pt	%icc,.L900000511
+/* 0x0034	     */		cmp	%g4,0
+/* 0x0038	     */		add	%o1,%g1,%g2
+
+!  194		      !       {
+!  195		      !	 if(i32[i]!=nint[i]) break;
+
+/* 0x003c	 195 */		ld	[%g2],%o5
+/* 0x0040	 193 */		add	%o0,%g1,%g3
+                       .L900000510:
+/* 0x0044	 195 */		ld	[%g3],%o2
+/* 0x0048	     */		sub	%g4,1,%g1
+/* 0x004c	     */		sub	%g2,4,%g2
+/* 0x0050	     */		sub	%g3,4,%g3
+/* 0x0054	     */		cmp	%o2,%o5
+/* 0x0058	     */		bne,pn	%icc,.L77000182
+/* 0x005c	     */		nop
+/* 0x0060	   0 */		or	%g0,%g1,%g4
+/* 0x0064	 195 */		cmp	%g1,0
+/* 0x0068	     */		bge,a,pt	%icc,.L900000510
+/* 0x006c	     */		ld	[%g2],%o5
+                       .L77000182:
+
+!  196		      !       }
+!  197		      !   }
+!  198		      ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x0070	 198 */		cmp	%g4,0
+                       .L900000511:
+/* 0x0074	 198 */		bl,pn	%icc,.L77000198
+/* 0x0078	     */		sll	%g4,2,%g2
+/* 0x007c	     */		ld	[%o1+%g2],%g1
+/* 0x0080	     */		ld	[%o0+%g2],%g2
+/* 0x0084	     */		cmp	%g2,%g1
+/* 0x0088	     */		bleu,pt	%icc,.L77000191
+/* 0x008c	     */		nop
+                       .L77000198:
+
+!  199		      !   {
+!  200		      !     acc=0;
+!  201		      !     for(i=0;i<len;i++)
+
+/* 0x0090	 201 */		cmp	%g5,0
+/* 0x0094	     */		ble,pt	%icc,.L77000191
+/* 0x0098	     */		nop
+/* 0x009c	     */		or	%g0,%g5,%g1
+/* 0x00a0	 198 */		or	%g0,-1,%g2
+/* 0x00a4	     */		srl	%g2,0,%g3
+/* 0x00a8	     */		sub	%g5,1,%g4
+/* 0x00ac	 200 */		or	%g0,0,%g5
+/* 0x00b0	 201 */		or	%g0,0,%o5
+/* 0x00b4	 198 */		or	%g0,%o0,%o4
+/* 0x00b8	     */		cmp	%g1,3
+/* 0x00bc	 201 */		bl,pn	%icc,.L77000199
+/* 0x00c0	     */		add	%o0,8,%g1
+/* 0x00c4	     */		add	%o1,4,%g2
+
+!  202		      !       {
+!  203		      !	 acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00c8	 203 */		ld	[%o0],%o2
+/* 0x00cc	     */		ld	[%o1],%o1
+/* 0x00d0	   0 */		or	%g0,%g1,%o4
+/* 0x00d4	     */		or	%g0,%g2,%o3
+/* 0x00d8	 203 */		ld	[%o0+4],%g1
+
+!  204		      !	 i32[i]=acc&0xffffffff;
+!  205		      !	 acc=acc>>32;
+
+/* 0x00dc	 205 */		or	%g0,2,%o5
+/* 0x00e0	 201 */		sub	%o2,%o1,%o2
+/* 0x00e4	     */		or	%g0,%o2,%g5
+/* 0x00e8	 204 */		and	%o2,%g3,%o2
+/* 0x00ec	     */		st	%o2,[%o0]
+/* 0x00f0	 205 */		srax	%g5,32,%g5
+                       .L900000505:
+/* 0x00f4	 203 */		ld	[%o3],%o2
+/* 0x00f8	 205 */		add	%o5,1,%o5
+/* 0x00fc	     */		add	%o3,4,%o3
+/* 0x0100	     */		cmp	%o5,%g4
+/* 0x0104	     */		add	%o4,4,%o4
+/* 0x0108	 201 */		sub	%g1,%o2,%g1
+/* 0x010c	     */		add	%g1,%g5,%g5
+/* 0x0110	 204 */		and	%g5,%g3,%o2
+/* 0x0114	 203 */		ld	[%o4-4],%g1
+/* 0x0118	 204 */		st	%o2,[%o4-8]
+/* 0x011c	 205 */		ble,pt	%icc,.L900000505
+/* 0x0120	     */		srax	%g5,32,%g5
+                       .L900000508:
+/* 0x0124	 203 */		ld	[%o3],%g2
+/* 0x0128	 201 */		sub	%g1,%g2,%g1
+/* 0x012c	     */		add	%g1,%g5,%g1
+/* 0x0130	 204 */		and	%g1,%g3,%g2
+/* 0x0134	     */		retl	! Result = 
+/* 0x0138	     */		st	%g2,[%o4-4]
+                       .L77000199:
+/* 0x013c	 203 */		ld	[%o4],%g1
+                       .L900000509:
+/* 0x0140	 203 */		ld	[%o3],%g2
+/* 0x0144	     */		add	%g5,%g1,%g1
+/* 0x0148	 205 */		add	%o5,1,%o5
+/* 0x014c	     */		add	%o3,4,%o3
+/* 0x0150	     */		cmp	%o5,%g4
+/* 0x0154	 203 */		sub	%g1,%g2,%g1
+/* 0x0158	 204 */		and	%g1,%g3,%g2
+/* 0x015c	     */		st	%g2,[%o4]
+/* 0x0160	 205 */		add	%o4,4,%o4
+/* 0x0164	     */		srax	%g1,32,%g5
+/* 0x0168	     */		ble,a,pt	%icc,.L900000509
+/* 0x016c	     */		ld	[%o4],%g1
+                       .L77000191:
+/* 0x0170	     */		retl	! Result = 
+/* 0x0174	     */		nop
+/* 0x0178	   0 */		.type	adjust_montf_result,2
+/* 0x0178	     */		.size	adjust_montf_result,(.-adjust_montf_result)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	4
+/* 000000	     */		.skip	16
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global mont_mulf_noconv
+                       mont_mulf_noconv:
+/* 000000	     */		save	%sp,-144,%sp
+                       .L900000646:
+/* 0x0004	     */		call	.+8
+/* 0x0008	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
+
+!  206		      !       }
+!  207		      !   }
+!  208		      !}
+!  213		      !/*
+!  214		      !** the lengths of the input arrays should be at least the following:
+!  215		      !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+!  216		      !** all of them should be different from one another
+!  217		      !**
+!  218		      !*/
+!  219		      !void mont_mulf_noconv(unsigned int *result,
+!  220		      !		     double *dm1, double *dm2, double *dt,
+!  221		      !		     double *dn, unsigned int *nint,
+!  222		      !		     int nlen, double dn0)
+!  223		      !{
+!  224		      ! int i, j, jj;
+!  225		      ! int tmp;
+!  226		      ! double digit, m2j, nextm2j, a, b;
+!  227		      ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+!  229		      ! pdm1=&(dm1[0]);
+!  230		      ! pdm2=&(dm2[0]);
+!  231		      ! pdn=&(dn[0]);
+!  232		      ! pdm2[2*nlen]=Zero;
+
+/* 0x000c	 232 */		ld	[%fp+92],%o1
+/* 0x0010	     */		sethi	%hi(Zero),%g2
+/* 0x0014	 223 */		ldd	[%fp+96],%f2
+/* 0x0018	     */		add	%g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
+/* 0x001c	 232 */		add	%g2,%lo(Zero),%g2
+/* 0x0020	 223 */		st	%i0,[%fp+68]
+/* 0x0024	     */		add	%g5,%o7,%o3
+
+!  234		      ! if (nlen!=16)
+!  235		      !   {
+!  236		      !     for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+!  238		      !     a=dt[0]=pdm1[0]*pdm2[0];
+!  239		      !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+/* 0x0028	 239 */		sethi	%hi(TwoToMinus16),%g3
+/* 0x002c	 232 */		ld	[%o3+%g2],%l0
+/* 0x0030	 239 */		sethi	%hi(TwoTo16),%g4
+/* 0x0034	 223 */		or	%g0,%i2,%o2
+/* 0x0038	     */		fmovd	%f2,%f16
+/* 0x003c	     */		st	%i5,[%fp+88]
+/* 0x0040	 239 */		add	%g3,%lo(TwoToMinus16),%g2
+/* 0x0044	 223 */		or	%g0,%i1,%i2
+/* 0x0048	 232 */		ldd	[%l0],%f0
+/* 0x004c	 239 */		add	%g4,%lo(TwoTo16),%g3
+/* 0x0050	 223 */		or	%g0,%i3,%o0
+/* 0x0054	 232 */		sll	%o1,4,%g4
+/* 0x0058	 239 */		ld	[%o3+%g2],%g5
+/* 0x005c	 223 */		or	%g0,%i3,%i1
+/* 0x0060	 239 */		ld	[%o3+%g3],%g1
+/* 0x0064	 232 */		or	%g0,%o1,%i0
+/* 0x0068	     */		or	%g0,%o2,%i3
+/* 0x006c	 234 */		cmp	%o1,16
+/* 0x0070	     */		be,pn	%icc,.L77000279
+/* 0x0074	     */		std	%f0,[%o2+%g4]
+/* 0x0078	 236 */		sll	%o1,2,%g2
+/* 0x007c	     */		or	%g0,%o0,%o3
+/* 0x0080	 232 */		sll	%o1,1,%o1
+/* 0x0084	 236 */		add	%g2,2,%o2
+/* 0x0088	     */		cmp	%o2,0
+/* 0x008c	     */		ble,a,pt	%icc,.L900000660
+/* 0x0090	     */		ldd	[%i2],%f0
+
+!  241		      !     pdtj=&(dt[0]);
+!  242		      !     for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+!  243		      !       {
+!  244		      !	 m2j=pdm2[j];
+!  245		      !	 a=pdtj[0]+pdn[0]*digit;
+!  246		      !	 b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+!  247		      !	 pdtj[1]=b;
+!  249		      !#pragma pipeloop(0)
+!  250		      !	 for(i=1;i<nlen;i++)
+!  251		      !	   {
+!  252		      !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+!  253		      !	   }
+!  254		      ! 	 if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+!  255		      !	 
+!  256		      !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+!  257		      !       }
+!  258		      !   }
+!  259		      ! else
+!  260		      !   {
+!  261		      !     a=dt[0]=pdm1[0]*pdm2[0];
+!  263		      !     dt[65]=     dt[64]=     dt[63]=     dt[62]=     dt[61]=     dt[60]=
+!  264		      !     dt[59]=     dt[58]=     dt[57]=     dt[56]=     dt[55]=     dt[54]=
+!  265		      !     dt[53]=     dt[52]=     dt[51]=     dt[50]=     dt[49]=     dt[48]=
+!  266		      !     dt[47]=     dt[46]=     dt[45]=     dt[44]=     dt[43]=     dt[42]=
+!  267		      !     dt[41]=     dt[40]=     dt[39]=     dt[38]=     dt[37]=     dt[36]=
+!  268		      !     dt[35]=     dt[34]=     dt[33]=     dt[32]=     dt[31]=     dt[30]=
+!  269		      !     dt[29]=     dt[28]=     dt[27]=     dt[26]=     dt[25]=     dt[24]=
+!  270		      !     dt[23]=     dt[22]=     dt[21]=     dt[20]=     dt[19]=     dt[18]=
+!  271		      !     dt[17]=     dt[16]=     dt[15]=     dt[14]=     dt[13]=     dt[12]=
+!  272		      !     dt[11]=     dt[10]=     dt[ 9]=     dt[ 8]=     dt[ 7]=     dt[ 6]=
+!  273		      !     dt[ 5]=     dt[ 4]=     dt[ 3]=     dt[ 2]=     dt[ 1]=Zero;
+!  275		      !     pdn_0=pdn[0];
+!  276		      !     pdm1_0=pdm1[0];
+!  278		      !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+!  279		      !     pdtj=&(dt[0]);
+!  281		      !     for(j=0;j<32;j++,pdtj++)
+
+/* 0x0094	 281 */		add	%g2,2,%o0
+/* 0x0098	 236 */		add	%g2,1,%o2
+/* 0x009c	 281 */		cmp	%o0,3
+/* 0x00a0	     */		bl,pn	%icc,.L77000280
+/* 0x00a4	     */		or	%g0,1,%o0
+/* 0x00a8	     */		add	%o3,8,%o3
+/* 0x00ac	     */		or	%g0,1,%o4
+/* 0x00b0	     */		std	%f0,[%o3-8]
+                       .L900000630:
+/* 0x00b4	     */		std	%f0,[%o3]
+/* 0x00b8	     */		add	%o4,2,%o4
+/* 0x00bc	     */		add	%o3,16,%o3
+/* 0x00c0	     */		cmp	%o4,%g2
+/* 0x00c4	     */		ble,pt	%icc,.L900000630
+/* 0x00c8	     */		std	%f0,[%o3-8]
+                       .L900000633:
+/* 0x00cc	     */		cmp	%o4,%o2
+/* 0x00d0	     */		bg,pn	%icc,.L77000285
+/* 0x00d4	     */		add	%o4,1,%o0
+                       .L77000280:
+/* 0x00d8	     */		std	%f0,[%o3]
+                       .L900000659:
+/* 0x00dc	     */		ldd	[%l0],%f0
+/* 0x00e0	     */		cmp	%o0,%o2
+/* 0x00e4	     */		add	%o3,8,%o3
+/* 0x00e8	     */		add	%o0,1,%o0
+/* 0x00ec	     */		ble,a,pt	%icc,.L900000659
+/* 0x00f0	     */		std	%f0,[%o3]
+                       .L77000285:
+/* 0x00f4	 238 */		ldd	[%i2],%f0
+                       .L900000660:
+/* 0x00f8	 238 */		ldd	[%i3],%f2
+/* 0x00fc	     */		add	%o1,1,%o2
+/* 0x0100	 242 */		cmp	%o1,0
+/* 0x0104	     */		sll	%o2,1,%o0
+/* 0x0108	     */		sub	%o1,1,%o1
+/* 0x010c	 238 */		fmuld	%f0,%f2,%f0
+/* 0x0110	     */		std	%f0,[%i1]
+/* 0x0114	   0 */		or	%g0,0,%l1
+/* 0x0118	     */		ldd	[%l0],%f6
+/* 0x011c	     */		or	%g0,0,%g4
+/* 0x0120	     */		or	%g0,%o2,%i5
+/* 0x0124	     */		ldd	[%g5],%f2
+/* 0x0128	     */		or	%g0,%o1,%g3
+/* 0x012c	     */		or	%g0,%o0,%o3
+/* 0x0130	     */		fdtox	%f0,%f0
+/* 0x0134	     */		ldd	[%g1],%f4
+/* 0x0138	 246 */		add	%i3,8,%o4
+/* 0x013c	     */		or	%g0,0,%l2
+/* 0x0140	     */		or	%g0,%i1,%o5
+/* 0x0144	     */		sub	%i0,1,%o7
+/* 0x0148	     */		fmovs	%f6,%f0
+/* 0x014c	     */		fxtod	%f0,%f0
+/* 0x0150	 239 */		fmuld	%f0,%f16,%f0
+/* 0x0154	     */		fmuld	%f0,%f2,%f2
+/* 0x0158	     */		fdtox	%f2,%f2
+/* 0x015c	     */		fxtod	%f2,%f2
+/* 0x0160	     */		fmuld	%f2,%f4,%f2
+/* 0x0164	     */		fsubd	%f0,%f2,%f22
+/* 0x0168	 242 */		ble,pt	%icc,.L900000653
+/* 0x016c	     */		sll	%i0,4,%g2
+/* 0x0170	 246 */		ldd	[%i4],%f0
+                       .L900000654:
+/* 0x0174	 246 */		fmuld	%f0,%f22,%f8
+/* 0x0178	     */		ldd	[%i2],%f0
+/* 0x017c	 250 */		cmp	%i0,1
+/* 0x0180	 246 */		ldd	[%o4+%l2],%f6
+/* 0x0184	     */		add	%i2,8,%o0
+/* 0x0188	 250 */		or	%g0,1,%o1
+/* 0x018c	 246 */		ldd	[%o5],%f2
+/* 0x0190	     */		add	%o5,16,%l3
+/* 0x0194	     */		fmuld	%f0,%f6,%f6
+/* 0x0198	     */		ldd	[%g5],%f4
+/* 0x019c	     */		faddd	%f2,%f8,%f2
+/* 0x01a0	     */		ldd	[%o5+8],%f0
+/* 0x01a4	 244 */		ldd	[%i3+%l2],%f20
+/* 0x01a8	 246 */		faddd	%f0,%f6,%f0
+/* 0x01ac	     */		fmuld	%f2,%f4,%f2
+/* 0x01b0	     */		faddd	%f0,%f2,%f18
+/* 0x01b4	 247 */		std	%f18,[%o5+8]
+/* 0x01b8	 250 */		ble,pt	%icc,.L900000658
+/* 0x01bc	     */		srl	%g4,31,%g2
+/* 0x01c0	     */		cmp	%o7,7
+/* 0x01c4	 246 */		add	%i4,8,%g2
+/* 0x01c8	 250 */		bl,pn	%icc,.L77000284
+/* 0x01cc	     */		add	%g2,24,%o2
+/* 0x01d0	 252 */		ldd	[%o0+24],%f12
+/* 0x01d4	     */		add	%o5,48,%l3
+/* 0x01d8	     */		ldd	[%o0],%f2
+/* 0x01dc	   0 */		or	%g0,%o2,%g2
+/* 0x01e0	 250 */		sub	%o7,2,%o2
+/* 0x01e4	 252 */		ldd	[%g2-24],%f0
+/* 0x01e8	     */		or	%g0,5,%o1
+/* 0x01ec	     */		ldd	[%o0+8],%f6
+/* 0x01f0	     */		fmuld	%f2,%f20,%f2
+/* 0x01f4	     */		ldd	[%o0+16],%f14
+/* 0x01f8	     */		fmuld	%f0,%f22,%f4
+/* 0x01fc	     */		add	%o0,32,%o0
+/* 0x0200	     */		ldd	[%g2-16],%f8
+/* 0x0204	     */		fmuld	%f6,%f20,%f10
+/* 0x0208	     */		ldd	[%o5+16],%f0
+/* 0x020c	     */		ldd	[%g2-8],%f6
+/* 0x0210	     */		faddd	%f2,%f4,%f4
+/* 0x0214	     */		ldd	[%o5+32],%f2
+                       .L900000642:
+/* 0x0218	 252 */		ldd	[%g2],%f24
+/* 0x021c	     */		add	%o1,3,%o1
+/* 0x0220	     */		add	%g2,24,%g2
+/* 0x0224	     */		fmuld	%f8,%f22,%f8
+/* 0x0228	     */		ldd	[%l3],%f28
+/* 0x022c	     */		cmp	%o1,%o2
+/* 0x0230	     */		add	%o0,24,%o0
+/* 0x0234	     */		ldd	[%o0-24],%f26
+/* 0x0238	     */		faddd	%f0,%f4,%f0
+/* 0x023c	     */		add	%l3,48,%l3
+/* 0x0240	     */		faddd	%f10,%f8,%f10
+/* 0x0244	     */		fmuld	%f14,%f20,%f4
+/* 0x0248	     */		std	%f0,[%l3-80]
+/* 0x024c	     */		ldd	[%g2-16],%f8
+/* 0x0250	     */		fmuld	%f6,%f22,%f6
+/* 0x0254	     */		ldd	[%l3-32],%f0
+/* 0x0258	     */		ldd	[%o0-16],%f14
+/* 0x025c	     */		faddd	%f2,%f10,%f2
+/* 0x0260	     */		faddd	%f4,%f6,%f10
+/* 0x0264	     */		fmuld	%f12,%f20,%f4
+/* 0x0268	     */		std	%f2,[%l3-64]
+/* 0x026c	     */		ldd	[%g2-8],%f6
+/* 0x0270	     */		fmuld	%f24,%f22,%f24
+/* 0x0274	     */		ldd	[%l3-16],%f2
+/* 0x0278	     */		ldd	[%o0-8],%f12
+/* 0x027c	     */		faddd	%f28,%f10,%f10
+/* 0x0280	     */		std	%f10,[%l3-48]
+/* 0x0284	     */		fmuld	%f26,%f20,%f10
+/* 0x0288	     */		ble,pt	%icc,.L900000642
+/* 0x028c	     */		faddd	%f4,%f24,%f4
+                       .L900000645:
+/* 0x0290	 252 */		fmuld	%f8,%f22,%f28
+/* 0x0294	     */		ldd	[%g2],%f24
+/* 0x0298	     */		faddd	%f0,%f4,%f26
+/* 0x029c	     */		fmuld	%f12,%f20,%f8
+/* 0x02a0	     */		add	%l3,32,%l3
+/* 0x02a4	     */		cmp	%o1,%o7
+/* 0x02a8	     */		fmuld	%f14,%f20,%f14
+/* 0x02ac	     */		ldd	[%l3-32],%f4
+/* 0x02b0	     */		add	%g2,8,%g2
+/* 0x02b4	     */		faddd	%f10,%f28,%f12
+/* 0x02b8	     */		fmuld	%f6,%f22,%f6
+/* 0x02bc	     */		ldd	[%l3-16],%f0
+/* 0x02c0	     */		fmuld	%f24,%f22,%f10
+/* 0x02c4	     */		std	%f26,[%l3-64]
+/* 0x02c8	     */		faddd	%f2,%f12,%f2
+/* 0x02cc	     */		std	%f2,[%l3-48]
+/* 0x02d0	     */		faddd	%f14,%f6,%f6
+/* 0x02d4	     */		faddd	%f8,%f10,%f2
+/* 0x02d8	     */		faddd	%f4,%f6,%f4
+/* 0x02dc	     */		std	%f4,[%l3-32]
+/* 0x02e0	     */		faddd	%f0,%f2,%f0
+/* 0x02e4	     */		bg,pn	%icc,.L77000213
+/* 0x02e8	     */		std	%f0,[%l3-16]
+                       .L77000284:
+/* 0x02ec	 252 */		ldd	[%o0],%f0
+                       .L900000657:
+/* 0x02f0	 252 */		ldd	[%g2],%f4
+/* 0x02f4	     */		fmuld	%f0,%f20,%f2
+/* 0x02f8	     */		add	%o1,1,%o1
+/* 0x02fc	     */		ldd	[%l3],%f0
+/* 0x0300	     */		add	%o0,8,%o0
+/* 0x0304	     */		add	%g2,8,%g2
+/* 0x0308	     */		fmuld	%f4,%f22,%f4
+/* 0x030c	     */		cmp	%o1,%o7
+/* 0x0310	     */		faddd	%f2,%f4,%f2
+/* 0x0314	     */		faddd	%f0,%f2,%f0
+/* 0x0318	     */		std	%f0,[%l3]
+/* 0x031c	     */		add	%l3,16,%l3
+/* 0x0320	     */		ble,a,pt	%icc,.L900000657
+/* 0x0324	     */		ldd	[%o0],%f0
+                       .L77000213:
+/* 0x0328	     */		srl	%g4,31,%g2
+                       .L900000658:
+/* 0x032c	 254 */		cmp	%l1,30
+/* 0x0330	     */		bne,a,pt	%icc,.L900000656
+/* 0x0334	     */		fdtox	%f18,%f0
+/* 0x0338	     */		add	%g4,%g2,%g2
+/* 0x033c	     */		sra	%g2,1,%o0
+/* 0x0340	 281 */		ldd	[%l0],%f0
+/* 0x0344	     */		sll	%i5,1,%o2
+/* 0x0348	     */		add	%o0,1,%g2
+/* 0x034c	     */		sll	%g2,1,%o0
+/* 0x0350	 254 */		sub	%o2,1,%o2
+/* 0x0354	 281 */		fmovd	%f0,%f2
+/* 0x0358	     */		sll	%g2,4,%o1
+/* 0x035c	     */		cmp	%o0,%o3
+/* 0x0360	     */		bge,pt	%icc,.L77000215
+/* 0x0364	     */		or	%g0,0,%l1
+/* 0x0368	 254 */		add	%i1,%o1,%o1
+/* 0x036c	 281 */		ldd	[%o1],%f6
+                       .L900000655:
+/* 0x0370	     */		fdtox	%f6,%f10
+/* 0x0374	     */		ldd	[%o1+8],%f4
+/* 0x0378	     */		add	%o0,2,%o0
+/* 0x037c	     */		ldd	[%l0],%f12
+/* 0x0380	     */		fdtox	%f6,%f6
+/* 0x0384	     */		cmp	%o0,%o2
+/* 0x0388	     */		fdtox	%f4,%f8
+/* 0x038c	     */		fdtox	%f4,%f4
+/* 0x0390	     */		fmovs	%f12,%f10
+/* 0x0394	     */		fmovs	%f12,%f8
+/* 0x0398	     */		fxtod	%f10,%f10
+/* 0x039c	     */		fxtod	%f8,%f8
+/* 0x03a0	     */		faddd	%f10,%f2,%f2
+/* 0x03a4	     */		std	%f2,[%o1]
+/* 0x03a8	     */		faddd	%f8,%f0,%f0
+/* 0x03ac	     */		std	%f0,[%o1+8]
+/* 0x03b0	     */		add	%o1,16,%o1
+/* 0x03b4	     */		fitod	%f6,%f2
+/* 0x03b8	     */		fitod	%f4,%f0
+/* 0x03bc	     */		ble,a,pt	%icc,.L900000655
+/* 0x03c0	     */		ldd	[%o1],%f6
+                       .L77000233:
+/* 0x03c4	     */		or	%g0,0,%l1
+                       .L77000215:
+/* 0x03c8	     */		fdtox	%f18,%f0
+                       .L900000656:
+/* 0x03cc	     */		ldd	[%l0],%f6
+/* 0x03d0	 256 */		add	%g4,1,%g4
+/* 0x03d4	     */		add	%l2,8,%l2
+/* 0x03d8	     */		ldd	[%g5],%f2
+/* 0x03dc	     */		add	%l1,1,%l1
+/* 0x03e0	     */		add	%o5,8,%o5
+/* 0x03e4	     */		fmovs	%f6,%f0
+/* 0x03e8	     */		ldd	[%g1],%f4
+/* 0x03ec	     */		cmp	%g4,%g3
+/* 0x03f0	     */		fxtod	%f0,%f0
+/* 0x03f4	     */		fmuld	%f0,%f16,%f0
+/* 0x03f8	     */		fmuld	%f0,%f2,%f2
+/* 0x03fc	     */		fdtox	%f2,%f2
+/* 0x0400	     */		fxtod	%f2,%f2
+/* 0x0404	     */		fmuld	%f2,%f4,%f2
+/* 0x0408	     */		fsubd	%f0,%f2,%f22
+/* 0x040c	     */		ble,a,pt	%icc,.L900000654
+/* 0x0410	     */		ldd	[%i4],%f0
+                       .L900000629:
+/* 0x0414	 256 */		ba	.L900000653
+/* 0x0418	     */		sll	%i0,4,%g2
+                       .L77000279:
+/* 0x041c	 261 */		ldd	[%o2],%f6
+/* 0x0420	 279 */		or	%g0,%o0,%o4
+/* 0x0424	 281 */		or	%g0,0,%o3
+/* 0x0428	 261 */		ldd	[%i2],%f4
+/* 0x042c	 273 */		std	%f0,[%o0+8]
+/* 0x0430	     */		std	%f0,[%o0+16]
+/* 0x0434	 261 */		fmuld	%f4,%f6,%f4
+/* 0x0438	     */		std	%f4,[%o0]
+/* 0x043c	 273 */		std	%f0,[%o0+24]
+/* 0x0440	     */		std	%f0,[%o0+32]
+/* 0x0444	     */		fdtox	%f4,%f4
+/* 0x0448	     */		std	%f0,[%o0+40]
+/* 0x044c	     */		std	%f0,[%o0+48]
+/* 0x0450	     */		std	%f0,[%o0+56]
+/* 0x0454	     */		std	%f0,[%o0+64]
+/* 0x0458	     */		std	%f0,[%o0+72]
+/* 0x045c	     */		std	%f0,[%o0+80]
+/* 0x0460	     */		std	%f0,[%o0+88]
+/* 0x0464	     */		std	%f0,[%o0+96]
+/* 0x0468	     */		std	%f0,[%o0+104]
+/* 0x046c	     */		std	%f0,[%o0+112]
+/* 0x0470	     */		std	%f0,[%o0+120]
+/* 0x0474	     */		std	%f0,[%o0+128]
+/* 0x0478	     */		std	%f0,[%o0+136]
+/* 0x047c	     */		std	%f0,[%o0+144]
+/* 0x0480	     */		std	%f0,[%o0+152]
+/* 0x0484	     */		std	%f0,[%o0+160]
+/* 0x0488	     */		std	%f0,[%o0+168]
+/* 0x048c	     */		fmovs	%f0,%f4
+/* 0x0490	     */		std	%f0,[%o0+176]
+/* 0x0494	 281 */		or	%g0,0,%o1
+/* 0x0498	 273 */		std	%f0,[%o0+184]
+/* 0x049c	     */		fxtod	%f4,%f4
+/* 0x04a0	     */		std	%f0,[%o0+192]
+/* 0x04a4	     */		std	%f0,[%o0+200]
+/* 0x04a8	     */		std	%f0,[%o0+208]
+/* 0x04ac	 278 */		fmuld	%f4,%f2,%f2
+/* 0x04b0	 273 */		std	%f0,[%o0+216]
+/* 0x04b4	     */		std	%f0,[%o0+224]
+/* 0x04b8	     */		std	%f0,[%o0+232]
+/* 0x04bc	     */		std	%f0,[%o0+240]
+/* 0x04c0	     */		std	%f0,[%o0+248]
+/* 0x04c4	     */		std	%f0,[%o0+256]
+/* 0x04c8	     */		std	%f0,[%o0+264]
+/* 0x04cc	     */		std	%f0,[%o0+272]
+/* 0x04d0	     */		std	%f0,[%o0+280]
+/* 0x04d4	     */		std	%f0,[%o0+288]
+/* 0x04d8	     */		std	%f0,[%o0+296]
+/* 0x04dc	     */		std	%f0,[%o0+304]
+/* 0x04e0	     */		std	%f0,[%o0+312]
+/* 0x04e4	     */		std	%f0,[%o0+320]
+/* 0x04e8	     */		std	%f0,[%o0+328]
+/* 0x04ec	     */		std	%f0,[%o0+336]
+/* 0x04f0	     */		std	%f0,[%o0+344]
+/* 0x04f4	     */		std	%f0,[%o0+352]
+/* 0x04f8	     */		std	%f0,[%o0+360]
+/* 0x04fc	     */		std	%f0,[%o0+368]
+/* 0x0500	     */		std	%f0,[%o0+376]
+/* 0x0504	     */		std	%f0,[%o0+384]
+/* 0x0508	     */		std	%f0,[%o0+392]
+/* 0x050c	     */		std	%f0,[%o0+400]
+/* 0x0510	     */		std	%f0,[%o0+408]
+/* 0x0514	     */		std	%f0,[%o0+416]
+/* 0x0518	     */		std	%f0,[%o0+424]
+/* 0x051c	     */		std	%f0,[%o0+432]
+/* 0x0520	     */		std	%f0,[%o0+440]
+/* 0x0524	     */		std	%f0,[%o0+448]
+/* 0x0528	     */		std	%f0,[%o0+456]
+/* 0x052c	     */		std	%f0,[%o0+464]
+/* 0x0530	     */		std	%f0,[%o0+472]
+/* 0x0534	     */		std	%f0,[%o0+480]
+/* 0x0538	     */		std	%f0,[%o0+488]
+/* 0x053c	     */		std	%f0,[%o0+496]
+/* 0x0540	     */		std	%f0,[%o0+504]
+/* 0x0544	     */		std	%f0,[%o0+512]
+/* 0x0548	     */		std	%f0,[%o0+520]
+/* 0x054c	     */		ldd	[%g5],%f0
+/* 0x0550	     */		ldd	[%g1],%f8
+/* 0x0554	     */		fmuld	%f2,%f0,%f6
+/* 0x0558	 275 */		ldd	[%i4],%f4
+/* 0x055c	 276 */		ldd	[%i2],%f0
+/* 0x0560	     */		fdtox	%f6,%f6
+/* 0x0564	     */		fxtod	%f6,%f6
+/* 0x0568	     */		fmuld	%f6,%f8,%f6
+/* 0x056c	     */		fsubd	%f2,%f6,%f2
+/* 0x0570	 286 */		fmuld	%f4,%f2,%f12
+
+!  282		      !       {
+!  284		      !	 m2j=pdm2[j];
+!  285		      !	 a=pdtj[0]+pdn_0*digit;
+!  286		      !	 b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+
+!  287		      !	 pdtj[1]=b;
+!  289		      !	 /**** this loop will be fully unrolled:
+!  290		      !	 for(i=1;i<16;i++)
+!  291		      !	   {
+!  292		      !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+!  293		      !	   }
+!  294		      !	 *************************************/
+!  295		      !	     pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+!  296		      !	     pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+!  297		      !	     pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+!  298		      !	     pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+!  299		      !	     pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+!  300		      !	     pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+!  301		      !	     pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+!  302		      !	     pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+!  303		      !	     pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+!  304		      !	     pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+!  305		      !	     pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+!  306		      !	     pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+!  307		      !	     pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+!  308		      !	     pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+!  309		      !	     pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+!  310		      !	 /* no need for cleenup, cannot overflow */
+!  311		      !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+	fmovd %f2,%f0		! hand modified
+	fmovd %f16,%f18			! hand modified
+	ldd [%i4],%f2
+	ldd [%o4],%f8
+	ldd [%i2],%f10
+	ldd [%g5],%f14		! hand modified
+	ldd [%g1],%f16		! hand modified
+	ldd [%i3],%f24
+
+	ldd [%i2+8],%f26
+	ldd [%i2+16],%f40
+	ldd [%i2+48],%f46
+	ldd [%i2+56],%f30
+	ldd [%i2+64],%f54
+	ldd [%i2+104],%f34
+	ldd [%i2+112],%f58
+
+	ldd [%i4+8],%f28	
+	ldd [%i4+104],%f38
+	ldd [%i4+112],%f60
+
+	.L99999999: 			!1
+	ldd	[%i2+24],%f32
+	fmuld	%f0,%f2,%f4 	!2
+	ldd	[%i4+24],%f36
+	fmuld	%f26,%f24,%f20 	!3
+	ldd	[%i2+40],%f42
+	fmuld	%f28,%f0,%f22 	!4
+	ldd	[%i4+40],%f44
+	fmuld	%f32,%f24,%f32 	!5
+	ldd	[%i3+8],%f6
+	faddd	%f4,%f8,%f4
+	fmuld	%f36,%f0,%f36 	!6
+	add	%i3,8,%i3
+	ldd	[%i4+56],%f50
+	fmuld	%f42,%f24,%f42 	!7
+	ldd	[%i2+72],%f52
+	faddd	%f20,%f22,%f20
+	fmuld	%f44,%f0,%f44 	!8
+	ldd	[%o4+16],%f22
+	fmuld	%f10,%f6,%f12 	!9
+	ldd	[%i4+72],%f56
+	faddd	%f32,%f36,%f32
+	fmuld	%f14,%f4,%f4 !10
+	ldd	[%o4+48],%f36
+	fmuld	%f30,%f24,%f48 	!11
+	ldd	[%o4+8],%f8
+	faddd	%f20,%f22,%f20
+	fmuld	%f50,%f0,%f50	!12
+	std	%f20,[%o4+16]
+	faddd	%f42,%f44,%f42
+	fmuld	%f52,%f24,%f52 	!13
+	ldd	[%o4+80],%f44
+	faddd	%f4,%f12,%f4
+	fmuld	%f56,%f0,%f56 	!14
+	ldd	[%i2+88],%f20
+	faddd	%f32,%f36,%f32 	!15
+	ldd	[%i4+88],%f22
+	faddd	%f48,%f50,%f48 	!16
+	ldd	[%o4+112],%f50
+	faddd	%f52,%f56,%f52 	!17
+	ldd	[%o4+144],%f56
+	faddd	%f4,%f8,%f8
+	fmuld	%f20,%f24,%f20 	!18
+	std	%f32,[%o4+48]
+	faddd	%f42,%f44,%f42
+	fmuld	%f22,%f0,%f22 	!19
+	std	%f42,[%o4+80]
+	faddd	%f48,%f50,%f48
+	fmuld	%f34,%f24,%f32 	!20
+	std	%f48,[%o4+112]
+	faddd	%f52,%f56,%f52
+	fmuld	%f38,%f0,%f36 	!21
+	ldd	[%i2+120],%f42
+	fdtox	%f8,%f4 		!22
+	std	%f52,[%o4+144]
+	faddd	%f20,%f22,%f20 	!23
+	ldd	[%i4+120],%f44 	!24
+	ldd	[%o4+176],%f22
+	faddd	%f32,%f36,%f32
+	fmuld	%f42,%f24,%f42 	!25
+	ldd	[%i4+16],%f50
+	fmovs	%f17,%f4 	!26
+	ldd	[%i2+32],%f52
+	fmuld	%f44,%f0,%f44 	!27
+	ldd	[%i4+32],%f56
+	fmuld	%f40,%f24,%f48 	!28
+	ldd	[%o4+208],%f36
+	faddd	%f20,%f22,%f20
+	fmuld	%f50,%f0,%f50 	!29
+	std	%f20,[%o4+176]
+	fxtod	%f4,%f4
+	fmuld	%f52,%f24,%f52 	!30
+	ldd	[%i4+48],%f22
+	faddd	%f42,%f44,%f42
+	fmuld	%f56,%f0,%f56 	!31
+	ldd	[%o4+240],%f44
+	faddd	%f32,%f36,%f32 	!32
+	std	%f32,[%o4+208]
+	faddd	%f48,%f50,%f48
+	fmuld	%f46,%f24,%f20 	!33
+	ldd	[%o4+32],%f50
+	fmuld	%f4,%f18,%f12 	!34
+	ldd	[%i4+64],%f36
+	faddd	%f52,%f56,%f52
+	fmuld	%f22,%f0,%f22 	!35
+	ldd	[%o4+64],%f56
+	faddd	%f42,%f44,%f42 	!36
+	std	%f42,[%o4+240]
+	faddd	%f48,%f50,%f48
+	fmuld	%f54,%f24,%f32 	!37
+	std	%f48,[%o4+32]
+	fmuld	%f12,%f14,%f4 !38
+	ldd	[%i2+80],%f42
+	faddd	%f52,%f56,%f56	! yes, tmp52!
+	fmuld	%f36,%f0,%f36 	!39
+	ldd	[%i4+80],%f44
+	faddd	%f20,%f22,%f20 	!40
+	ldd	[%i2+96],%f48
+	fmuld	%f58,%f24,%f52 	!41
+	ldd	[%i4+96],%f50
+	fdtox	%f4,%f4
+	fmuld	%f42,%f24,%f42 	!42
+	std	%f56,[%o4+64]	! yes, tmp52!
+	faddd	%f32,%f36,%f32
+	fmuld	%f44,%f0,%f44 	!43
+	ldd	[%o4+96],%f22
+	fmuld	%f48,%f24,%f48 	!44
+	ldd	[%o4+128],%f36
+	fmovd	%f6,%f24
+	fmuld	%f50,%f0,%f50 	!45
+	fxtod	%f4,%f4
+	fmuld	%f60,%f0,%f56 	!46
+	add	%o4,8,%o4
+	faddd	%f42,%f44,%f42 	!47
+	ldd	[%o4+160-8],%f44
+	faddd	%f20,%f22,%f20 	!48
+	std	%f20,[%o4+96-8]
+	faddd	%f48,%f50,%f48 	!49
+	ldd	[%o4+192-8],%f50
+	faddd	%f52,%f56,%f52
+	fmuld	%f4,%f16,%f4 	!50
+	ldd	[%o4+224-8],%f56
+	faddd	%f32,%f36,%f32 	!51
+	std	%f32,[%o4+128-8]
+	faddd	%f42,%f44,%f42 	!52
+	add	%o3,1,%o3
+	std	%f42,[%o4+160-8]
+	faddd	%f48,%f50,%f48 	!53
+	cmp	%o3,31
+	std	%f48,[%o4+192-8]
+	fsubd	%f12,%f4,%f0 	!54
+	faddd	%f52,%f56,%f52
+	ble,pt	%icc,.L99999999
+	std	%f52,[%o4+224-8] 	!55
+	std %f8,[%o4]
+
+!  312		      !       }
+!  313		      !   }
+!  315		      ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+
+/* 0x07c8	 315 */		sll	%i0,4,%g2
+                       .L900000653:
+/* 0x07cc	 315 */		add	%i1,%g2,%i1
+/* 0x07d0	 242 */		ld	[%fp+68],%o0
+/* 0x07d4	 315 */		or	%g0,0,%o4
+/* 0x07d8	     */		ldd	[%i1],%f0
+/* 0x07dc	     */		or	%g0,0,%g5
+/* 0x07e0	     */		cmp	%i0,0
+/* 0x07e4	 242 */		or	%g0,%o0,%o3
+/* 0x07e8	 311 */		sub	%i0,1,%g1
+/* 0x07ec	 315 */		fdtox	%f0,%f0
+/* 0x07f0	     */		std	%f0,[%sp+120]
+/* 0x07f4	 311 */		sethi	%hi(0xfc00),%o1
+/* 0x07f8	     */		add	%g1,1,%g3
+/* 0x07fc	     */		or	%g0,%o0,%g4
+/* 0x0800	 315 */		ldd	[%i1+8],%f0
+/* 0x0804	     */		add	%o1,1023,%o1
+/* 0x0808	     */		fdtox	%f0,%f0
+/* 0x080c	     */		std	%f0,[%sp+112]
+/* 0x0810	     */		ldx	[%sp+112],%o5
+/* 0x0814	     */		ldx	[%sp+120],%o7
+/* 0x0818	     */		ble,pt	%icc,.L900000651
+/* 0x081c	     */		sethi	%hi(0xfc00),%g2
+/* 0x0820	 311 */		or	%g0,-1,%g2
+/* 0x0824	 315 */		cmp	%g3,3
+/* 0x0828	 311 */		srl	%g2,0,%o2
+/* 0x082c	 315 */		bl,pn	%icc,.L77000287
+/* 0x0830	     */		or	%g0,%i1,%g2
+/* 0x0834	     */		ldd	[%i1+16],%f0
+/* 0x0838	     */		and	%o5,%o1,%o0
+/* 0x083c	     */		add	%i1,16,%g2
+/* 0x0840	     */		sllx	%o0,16,%g3
+/* 0x0844	     */		and	%o7,%o2,%o0
+/* 0x0848	     */		fdtox	%f0,%f0
+/* 0x084c	     */		std	%f0,[%sp+104]
+/* 0x0850	     */		add	%o0,%g3,%o4
+/* 0x0854	     */		ldd	[%i1+24],%f2
+/* 0x0858	     */		srax	%o5,16,%o0
+/* 0x085c	     */		add	%o3,4,%g4
+/* 0x0860	     */		stx	%o0,[%sp+128]
+/* 0x0864	     */		and	%o4,%o2,%o0
+/* 0x0868	     */		stx	%o0,[%sp+112]
+/* 0x086c	     */		srax	%o4,32,%o0
+/* 0x0870	     */		fdtox	%f2,%f0
+/* 0x0874	     */		stx	%o0,[%sp+136]
+/* 0x0878	     */		srax	%o7,32,%o4
+/* 0x087c	     */		std	%f0,[%sp+96]
+/* 0x0880	     */		ldx	[%sp+128],%g5
+/* 0x0884	     */		ldx	[%sp+136],%o7
+/* 0x0888	     */		ldx	[%sp+104],%g3
+/* 0x088c	     */		add	%g5,%o7,%o0
+/* 0x0890	     */		or	%g0,1,%g5
+/* 0x0894	     */		ldx	[%sp+112],%o7
+/* 0x0898	     */		add	%o4,%o0,%o4
+/* 0x089c	     */		ldx	[%sp+96],%o5
+/* 0x08a0	     */		st	%o7,[%o3]
+/* 0x08a4	     */		or	%g0,%g3,%o7
+                       .L900000634:
+/* 0x08a8	     */		ldd	[%g2+16],%f0
+/* 0x08ac	     */		add	%g5,1,%g5
+/* 0x08b0	     */		add	%g4,4,%g4
+/* 0x08b4	     */		cmp	%g5,%g1
+/* 0x08b8	     */		add	%g2,16,%g2
+/* 0x08bc	     */		fdtox	%f0,%f0
+/* 0x08c0	     */		std	%f0,[%sp+104]
+/* 0x08c4	     */		ldd	[%g2+8],%f0
+/* 0x08c8	     */		fdtox	%f0,%f0
+/* 0x08cc	     */		std	%f0,[%sp+96]
+/* 0x08d0	     */		and	%o5,%o1,%g3
+/* 0x08d4	     */		sllx	%g3,16,%g3
+/* 0x08d8	     */		stx	%g3,[%sp+120]
+/* 0x08dc	     */		and	%o7,%o2,%g3
+/* 0x08e0	     */		stx	%o7,[%sp+128]
+/* 0x08e4	     */		ldx	[%sp+120],%o7
+/* 0x08e8	     */		add	%g3,%o7,%g3
+/* 0x08ec	     */		ldx	[%sp+128],%o7
+/* 0x08f0	     */		srax	%o5,16,%o5
+/* 0x08f4	     */		add	%g3,%o4,%g3
+/* 0x08f8	     */		srax	%g3,32,%o4
+/* 0x08fc	     */		stx	%o4,[%sp+112]
+/* 0x0900	     */		srax	%o7,32,%o4
+/* 0x0904	     */		ldx	[%sp+112],%o7
+/* 0x0908	     */		add	%o5,%o7,%o7
+/* 0x090c	     */		ldx	[%sp+96],%o5
+/* 0x0910	     */		add	%o4,%o7,%o4
+/* 0x0914	     */		and	%g3,%o2,%g3
+/* 0x0918	     */		ldx	[%sp+104],%o7
+/* 0x091c	     */		ble,pt	%icc,.L900000634
+/* 0x0920	     */		st	%g3,[%g4-4]
+                       .L900000637:
+/* 0x0924	     */		ba	.L900000651
+/* 0x0928	     */		sethi	%hi(0xfc00),%g2
+                       .L77000287:
+/* 0x092c	     */		ldd	[%g2+16],%f0
+                       .L900000650:
+/* 0x0930	     */		and	%o7,%o2,%o0
+/* 0x0934	     */		and	%o5,%o1,%g3
+/* 0x0938	     */		fdtox	%f0,%f0
+/* 0x093c	     */		add	%o4,%o0,%o0
+/* 0x0940	     */		std	%f0,[%sp+104]
+/* 0x0944	     */		add	%g5,1,%g5
+/* 0x0948	     */		sllx	%g3,16,%o4
+/* 0x094c	     */		ldd	[%g2+24],%f2
+/* 0x0950	     */		add	%g2,16,%g2
+/* 0x0954	     */		add	%o0,%o4,%o4
+/* 0x0958	     */		cmp	%g5,%g1
+/* 0x095c	     */		srax	%o5,16,%o0
+/* 0x0960	     */		stx	%o0,[%sp+112]
+/* 0x0964	     */		and	%o4,%o2,%g3
+/* 0x0968	     */		srax	%o4,32,%o5
+/* 0x096c	     */		fdtox	%f2,%f0
+/* 0x0970	     */		std	%f0,[%sp+96]
+/* 0x0974	     */		srax	%o7,32,%o4
+/* 0x0978	     */		ldx	[%sp+112],%o7
+/* 0x097c	     */		add	%o7,%o5,%o7
+/* 0x0980	     */		ldx	[%sp+104],%o5
+/* 0x0984	     */		add	%o4,%o7,%o4
+/* 0x0988	     */		ldx	[%sp+96],%o0
+/* 0x098c	     */		st	%g3,[%g4]
+/* 0x0990	     */		or	%g0,%o5,%o7
+/* 0x0994	     */		add	%g4,4,%g4
+/* 0x0998	     */		or	%g0,%o0,%o5
+/* 0x099c	     */		ble,a,pt	%icc,.L900000650
+/* 0x09a0	     */		ldd	[%g2+16],%f0
+                       .L77000236:
+/* 0x09a4	     */		sethi	%hi(0xfc00),%g2
+                       .L900000651:
+/* 0x09a8	     */		or	%g0,-1,%o0
+/* 0x09ac	     */		add	%g2,1023,%g2
+/* 0x09b0	     */		ld	[%fp+88],%o1
+/* 0x09b4	     */		srl	%o0,0,%g3
+/* 0x09b8	     */		and	%o5,%g2,%g2
+/* 0x09bc	     */		and	%o7,%g3,%g4
+
+!  317		      ! adjust_montf_result(result,nint,nlen); 
+
+/* 0x09c0	 317 */		or	%g0,-1,%o5
+/* 0x09c4	 311 */		sllx	%g2,16,%g2
+/* 0x09c8	     */		add	%o4,%g4,%g4
+/* 0x09cc	     */		add	%g4,%g2,%g2
+/* 0x09d0	     */		sll	%g5,2,%g4
+/* 0x09d4	     */		and	%g2,%g3,%g2
+/* 0x09d8	     */		st	%g2,[%o3+%g4]
+/* 0x09dc	 317 */		sll	%i0,2,%g2
+/* 0x09e0	     */		ld	[%o3+%g2],%g2
+/* 0x09e4	     */		cmp	%g2,0
+/* 0x09e8	     */		bleu,pn	%icc,.L77000241
+/* 0x09ec	     */		or	%g0,%o1,%o2
+/* 0x09f0	     */		ba	.L900000649
+/* 0x09f4	     */		cmp	%o5,0
+                       .L77000241:
+/* 0x09f8	     */		sub	%i0,1,%o5
+/* 0x09fc	     */		sll	%o5,2,%g2
+/* 0x0a00	     */		cmp	%o5,0
+/* 0x0a04	     */		bl,pt	%icc,.L900000649
+/* 0x0a08	     */		cmp	%o5,0
+/* 0x0a0c	     */		add	%o1,%g2,%o1
+/* 0x0a10	     */		add	%o3,%g2,%o4
+/* 0x0a14	     */		ld	[%o1],%g2
+                       .L900000648:
+/* 0x0a18	     */		ld	[%o4],%g3
+/* 0x0a1c	     */		sub	%o5,1,%o0
+/* 0x0a20	     */		sub	%o1,4,%o1
+/* 0x0a24	     */		sub	%o4,4,%o4
+/* 0x0a28	     */		cmp	%g3,%g2
+/* 0x0a2c	     */		bne,pn	%icc,.L77000244
+/* 0x0a30	     */		nop
+/* 0x0a34	   0 */		or	%g0,%o0,%o5
+/* 0x0a38	 317 */		cmp	%o0,0
+/* 0x0a3c	     */		bge,a,pt	%icc,.L900000648
+/* 0x0a40	     */		ld	[%o1],%g2
+                       .L77000244:
+/* 0x0a44	     */		cmp	%o5,0
+                       .L900000649:
+/* 0x0a48	     */		bl,pn	%icc,.L77000288
+/* 0x0a4c	     */		sll	%o5,2,%g2
+/* 0x0a50	     */		ld	[%o2+%g2],%g3
+/* 0x0a54	     */		ld	[%o3+%g2],%g2
+/* 0x0a58	     */		cmp	%g2,%g3
+/* 0x0a5c	     */		bleu,pt	%icc,.L77000224
+/* 0x0a60	     */		nop
+                       .L77000288:
+/* 0x0a64	     */		cmp	%i0,0
+/* 0x0a68	     */		ble,pt	%icc,.L77000224
+/* 0x0a6c	     */		nop
+/* 0x0a70	 317 */		sub	%i0,1,%o7
+/* 0x0a74	     */		or	%g0,-1,%g2
+/* 0x0a78	     */		srl	%g2,0,%o4
+/* 0x0a7c	     */		add	%o7,1,%o0
+/* 0x0a80	 315 */		or	%g0,0,%o5
+/* 0x0a84	     */		or	%g0,0,%g1
+/* 0x0a88	     */		cmp	%o0,3
+/* 0x0a8c	     */		bl,pn	%icc,.L77000289
+/* 0x0a90	     */		add	%o3,8,%o1
+/* 0x0a94	     */		add	%o2,4,%o0
+/* 0x0a98	     */		ld	[%o1-8],%g2
+/* 0x0a9c	   0 */		or	%g0,%o1,%o3
+/* 0x0aa0	 315 */		ld	[%o0-4],%g3
+/* 0x0aa4	   0 */		or	%g0,%o0,%o2
+/* 0x0aa8	 315 */		or	%g0,2,%g1
+/* 0x0aac	     */		ld	[%o3-4],%o0
+/* 0x0ab0	     */		sub	%g2,%g3,%g2
+/* 0x0ab4	     */		or	%g0,%g2,%o5
+/* 0x0ab8	     */		and	%g2,%o4,%g2
+/* 0x0abc	     */		st	%g2,[%o3-8]
+/* 0x0ac0	     */		srax	%o5,32,%o5
+                       .L900000638:
+/* 0x0ac4	     */		ld	[%o2],%g2
+/* 0x0ac8	     */		add	%g1,1,%g1
+/* 0x0acc	     */		add	%o2,4,%o2
+/* 0x0ad0	     */		cmp	%g1,%o7
+/* 0x0ad4	     */		add	%o3,4,%o3
+/* 0x0ad8	     */		sub	%o0,%g2,%o0
+/* 0x0adc	     */		add	%o0,%o5,%o5
+/* 0x0ae0	     */		and	%o5,%o4,%g2
+/* 0x0ae4	     */		ld	[%o3-4],%o0
+/* 0x0ae8	     */		st	%g2,[%o3-8]
+/* 0x0aec	     */		ble,pt	%icc,.L900000638
+/* 0x0af0	     */		srax	%o5,32,%o5
+                       .L900000641:
+/* 0x0af4	     */		ld	[%o2],%o1
+/* 0x0af8	     */		sub	%o0,%o1,%o0
+/* 0x0afc	     */		add	%o0,%o5,%o0
+/* 0x0b00	     */		and	%o0,%o4,%o1
+/* 0x0b04	     */		st	%o1,[%o3-4]
+/* 0x0b08	     */		ret	! Result = 
+/* 0x0b0c	     */		restore	%g0,%g0,%g0
+                       .L77000289:
+/* 0x0b10	     */		ld	[%o3],%o0
+                       .L900000647:
+/* 0x0b14	     */		ld	[%o2],%o1
+/* 0x0b18	     */		add	%o5,%o0,%o0
+/* 0x0b1c	     */		add	%g1,1,%g1
+/* 0x0b20	     */		add	%o2,4,%o2
+/* 0x0b24	     */		cmp	%g1,%o7
+/* 0x0b28	     */		sub	%o0,%o1,%o0
+/* 0x0b2c	     */		and	%o0,%o4,%o1
+/* 0x0b30	     */		st	%o1,[%o3]
+/* 0x0b34	     */		add	%o3,4,%o3
+/* 0x0b38	     */		srax	%o0,32,%o5
+/* 0x0b3c	     */		ble,a,pt	%icc,.L900000647
+/* 0x0b40	     */		ld	[%o3],%o0
+                       .L77000224:
+/* 0x0b44	     */		ret	! Result = 
+/* 0x0b48	     */		restore	%g0,%g0,%g0
+/* 0x0b4c	   0 */		.type	mont_mulf_noconv,2
+/* 0x0b4c	     */		.size	mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv9.il b/security/nss/lib/freebl/mpi/montmulfv9.il
new file mode 100644
index 000000000..006f47431
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv9.il
@@ -0,0 +1,93 @@
+!  
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+        .inline upper32,8
+	fdtox	%f0,%f10
+	fitod	%f10,%f0
+        .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+        .inline lower32,8
+	fdtox	%f0,%f10
+	fmovs	%f2,%f10
+	fxtod	%f10,%f0
+        .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+        .inline mod,12
+	fmuld	%f0,%f2,%f2
+	fdtox	%f2,%f2
+	fxtod	%f2,%f2
+	fmuld	%f2,%f4,%f2
+	fsubd	%f0,%f2,%f0
+        .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+!			    double * /* 0 */,
+!			    double * /*result16*/, double * /* result32 */
+!			    float *  /*source - should be unsigned int*
+!		            	       converted to float* */);
+!
+        .inline i16_to_d16_and_d32x4,24
+        ldd     [%o0],%f2  ! 1/(2^16)
+        ldd     [%o1],%f4  ! 2^16
+	ldd	[%o2],%f22
+
+	fmovd	%f22,%f6
+	ld	[%o5],%f7
+	fmovd	%f22,%f10
+	ld	[%o5+4],%f11
+	fmovd	%f22,%f14
+	ld	[%o5+8],%f15
+	fmovd	%f22,%f18
+	ld	[%o5+12],%f19
+	fxtod	%f6,%f6
+	std	%f6,[%o4]
+	fxtod	%f10,%f10
+	std	%f10,[%o4+8]
+	fxtod	%f14,%f14
+	std	%f14,[%o4+16]
+	fxtod	%f18,%f18
+	std	%f18,[%o4+24]
+	fmuld	%f2,%f6,%f8
+	fmuld	%f2,%f10,%f12
+	fmuld	%f2,%f14,%f16
+	fmuld	%f2,%f18,%f20
+	fdtox	%f8,%f8
+	fdtox	%f12,%f12
+	fdtox	%f16,%f16
+	fdtox	%f20,%f20
+	fxtod	%f8,%f8
+	std	%f8,[%o3+8]
+	fxtod	%f12,%f12
+	std	%f12,[%o3+24]
+	fxtod	%f16,%f16
+	std	%f16,[%o3+40]
+	fxtod	%f20,%f20
+	std	%f20,[%o3+56]
+	fmuld	%f8,%f4,%f8
+	fmuld	%f12,%f4,%f12
+	fmuld	%f16,%f4,%f16
+	fmuld	%f20,%f4,%f20
+	fsubd	%f6,%f8,%f8
+	std	%f8,[%o3]
+	fsubd	%f10,%f12,%f12
+	std	%f12,[%o3+16]
+	fsubd	%f14,%f16,%f16
+	std	%f16,[%o3+32]
+	fsubd	%f18,%f20,%f20
+	std	%f20,[%o3+48]
+        .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv9.s b/security/nss/lib/freebl/mpi/montmulfv9.s
new file mode 100644
index 000000000..560e47f7b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv9.s
@@ -0,0 +1,2346 @@
+!  
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+	.section	".text",#alloc,#execinstr
+	.file	"montmulf.c"
+
+	.section	".rodata",#alloc
+	.global	TwoTo16
+	.align	8
+!
+! CONSTANT POOL
+!
+	.global TwoTo16
+TwoTo16:
+	.word	1089470464
+	.word	0
+	.type	TwoTo16,#object
+	.size	TwoTo16,8
+	.global	TwoToMinus16
+!
+! CONSTANT POOL
+!
+	.global TwoToMinus16
+TwoToMinus16:
+	.word	1055916032
+	.word	0
+	.type	TwoToMinus16,#object
+	.size	TwoToMinus16,8
+	.global	Zero
+!
+! CONSTANT POOL
+!
+	.global Zero
+Zero:
+	.word	0
+	.word	0
+	.type	Zero,#object
+	.size	Zero,8
+	.global	TwoTo32
+!
+! CONSTANT POOL
+!
+	.global TwoTo32
+TwoTo32:
+	.word	1106247680
+	.word	0
+	.type	TwoTo32,#object
+	.size	TwoTo32,8
+	.global	TwoToMinus32
+!
+! CONSTANT POOL
+!
+	.global TwoToMinus32
+TwoToMinus32:
+	.word	1039138816
+	.word	0
+	.type	TwoToMinus32,#object
+	.size	TwoToMinus32,8
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.register	%g3,#scratch
+/* 000000	     */		.register	%g2,#scratch
+/* 000000	   0 */		.align	8
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global conv_d16_to_i32
+                       conv_d16_to_i32:
+/* 000000	     */		save	%sp,-208,%sp
+! FILE montmulf.c
+
+!    1		      !/*
+!    2		      ! * The contents of this file are subject to the Mozilla Public
+!    3		      ! * License Version 1.1 (the "License"); you may not use this file
+!    4		      ! * except in compliance with the License. You may obtain a copy of
+!    5		      ! * the License at http://www.mozilla.org/MPL/
+!    6		      ! * 
+!    7		      ! * Software distributed under the License is distributed on an "AS
+!    8		      ! * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+!    9		      ! * implied. See the License for the specific language governing
+!   10		      ! * rights and limitations under the License.
+!   11		      ! * 
+!   12		      ! * The Original Code is SPARC optimized Montgomery multiply functions.
+!   13		      ! *
+!   14		      ! * The Initial Developer of the Original Code is Sun Microsystems Inc.
+!   15		      ! * Portions created by Sun Microsystems Inc. are 
+!   16		      ! * Copyright (C) 1999-2000 Sun Microsystems Inc.  All Rights Reserved.
+!   17		      ! * 
+!   18		      ! * Contributor(s):
+!   19		      ! *	Netscape Communications Corporation
+!   20		      ! * 
+!   21		      ! * Alternatively, the contents of this file may be used under the
+!   22		      ! * terms of the GNU General Public License Version 2 or later (the
+!   23		      ! * "GPL"), in which case the provisions of the GPL are applicable 
+!   24		      ! * instead of those above.	If you wish to allow use of your 
+!   25		      ! * version of this file only under the terms of the GPL and not to
+!   26		      ! * allow others to use your version of this file under the MPL,
+!   27		      ! * indicate your decision by deleting the provisions above and
+!   28		      ! * replace them with the notice and other provisions required by
+!   29		      ! * the GPL.  If you do not delete the provisions above, a recipient
+!   30		      ! * may use your version of this file under either the MPL or the
+!   31		      ! * GPL.
+!   34		      ! */
+!   36		      !#define RF_INLINE_MACROS
+!   38		      !static const double TwoTo16=65536.0;
+!   39		      !static const double TwoToMinus16=1.0/65536.0;
+!   40		      !static const double Zero=0.0;
+!   41		      !static const double TwoTo32=65536.0*65536.0;
+!   42		      !static const double TwoToMinus32=1.0/(65536.0*65536.0);
+!   44		      !#ifdef RF_INLINE_MACROS
+!   46		      !double upper32(double);
+!   47		      !double lower32(double, double);
+!   48		      !double mod(double, double, double);
+!   50		      !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/, 
+!   51		      !			  const double * /* 2^16*/,
+!   52		      !			  const double * /* 0 */,
+!   53		      !			  double *       /*result16*/, 
+!   54		      !			  double *       /* result32 */,
+!   55		      !			  float *  /*source - should be unsigned int*
+!   56		      !		          	       converted to float* */);
+!   58		      !#else
+!   60		      !static double upper32(double x)
+!   61		      !{
+!   62		      !  return floor(x*TwoToMinus32);
+!   63		      !}
+!   65		      !static double lower32(double x, double y)
+!   66		      !{
+!   67		      !  return x-TwoTo32*floor(x*TwoToMinus32);
+!   68		      !}
+!   70		      !static double mod(double x, double oneoverm, double m)
+!   71		      !{
+!   72		      !  return x-m*floor(x*oneoverm);
+!   73		      !}
+!   75		      !#endif
+!   78		      !static void cleanup(double *dt, int from, int tlen)
+!   79		      !{
+!   80		      ! int i;
+!   81		      ! double tmp,tmp1,x,x1;
+!   83		      ! tmp=tmp1=Zero;
+!   84		      ! /* original code **
+!   85		      ! for(i=2*from;i<2*tlen-2;i++)
+!   86		      !   {
+!   87		      !     x=dt[i];
+!   88		      !     dt[i]=lower32(x,Zero)+tmp1;
+!   89		      !     tmp1=tmp;
+!   90		      !     tmp=upper32(x);
+!   91		      !   }
+!   92		      ! dt[tlen-2]+=tmp1;
+!   93		      ! dt[tlen-1]+=tmp;
+!   94		      ! **end original code ***/
+!   95		      ! /* new code ***/
+!   96		      ! for(i=2*from;i<2*tlen;i+=2)
+!   97		      !   {
+!   98		      !     x=dt[i];
+!   99		      !     x1=dt[i+1];
+!  100		      !     dt[i]=lower32(x,Zero)+tmp;
+!  101		      !     dt[i+1]=lower32(x1,Zero)+tmp1;
+!  102		      !     tmp=upper32(x);
+!  103		      !     tmp1=upper32(x1);
+!  104		      !   }
+!  105		      !  /** end new code **/
+!  106		      !}
+!  109		      !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+!  110		      !{
+!  111		      !int i;
+!  112		      !long long t, t1, a, b, c, d;
+!  114		      ! t1=0;
+!  115		      ! a=(long long)d16[0];
+
+/* 0x0004	 115 */		ldd	[%i1],%f2
+
+!  116		      ! b=(long long)d16[1];
+!  117		      ! for(i=0; i<ilen-1; i++)
+
+/* 0x0008	 117 */		sub	%i3,1,%o1
+/* 0x000c	 110 */		or	%g0,%i0,%g1
+/* 0x0010	 116 */		ldd	[%i1+8],%f4
+/* 0x0014	 117 */		cmp	%o1,0
+/* 0x0018	 114 */		or	%g0,0,%g5
+/* 0x001c	 115 */		fdtox	%f2,%f2
+/* 0x0020	     */		std	%f2,[%sp+2247]
+/* 0x0024	 117 */		or	%g0,0,%o0
+/* 0x0028	 116 */		fdtox	%f4,%f2
+/* 0x002c	     */		std	%f2,[%sp+2239]
+/* 0x0030	 110 */		sub	%o1,1,%o7
+/* 0x0034	     */		or	%g0,%i1,%o4
+/* 0x0038	     */		sethi	%hi(0xfc00),%o3
+/* 0x003c	     */		or	%g0,-1,%o1
+/* 0x0040	     */		or	%g0,2,%i1
+/* 0x0044	     */		srl	%o1,0,%g3
+/* 0x0048	     */		or	%g0,%o4,%g4
+/* 0x004c	 116 */		ldx	[%sp+2239],%i2
+/* 0x0050	     */		add	%o3,1023,%o5
+/* 0x0054	 117 */		sub	%o7,1,%o2
+/* 0x0058	 115 */		ldx	[%sp+2247],%i3
+/* 0x005c	 117 */		ble,pt	%icc,.L900000113
+/* 0x0060	     */		sethi	%hi(0xfc00),%g2
+/* 0x0064	     */		add	%o7,1,%g2
+
+!  118		      !   {
+!  119		      !     c=(long long)d16[2*i+2];
+!  120		      !     t1+=a&0xffffffff;
+!  121		      !     t=(a>>32);
+!  122		      !     d=(long long)d16[2*i+3];
+!  123		      !     t1+=(b&0xffff)<<16;
+
+/* 0x0068	 123 */		and	%i2,%o5,%i4
+/* 0x006c	     */		sllx	%i4,16,%o1
+/* 0x0070	 117 */		cmp	%g2,6
+/* 0x0074	     */		bl,pn	%icc,.L77000134
+/* 0x0078	     */		or	%g0,3,%i0
+/* 0x007c	 119 */		ldd	[%o4+16],%f0
+/* 0x0080	 120 */		and	%i3,%g3,%o3
+
+!  124		      !     t+=(b>>16)+(t1>>32);
+
+/* 0x0084	 124 */		srax	%i2,16,%i5
+/* 0x0088	 117 */		add	%o3,%o1,%i4
+/* 0x008c	 121 */		srax	%i3,32,%i3
+/* 0x0090	 119 */		fdtox	%f0,%f0
+/* 0x0094	     */		std	%f0,[%sp+2231]
+
+!  125		      !     i32[i]=t1&0xffffffff;
+
+/* 0x0098	 125 */		and	%i4,%g3,%l0
+/* 0x009c	 117 */		or	%g0,72,%o3
+/* 0x00a0	 122 */		ldd	[%g4+24],%f0
+/* 0x00a4	 117 */		or	%g0,64,%o4
+/* 0x00a8	     */		or	%g0,4,%o1
+
+!  126		      !     t1=t;
+!  127		      !     a=c;
+!  128		      !     b=d;
+
+/* 0x00ac	 128 */		or	%g0,5,%i0
+/* 0x00b0	     */		or	%g0,4,%i1
+/* 0x00b4	 119 */		ldx	[%sp+2231],%g2
+/* 0x00b8	 122 */		fdtox	%f0,%f0
+/* 0x00bc	 128 */		or	%g0,4,%o0
+/* 0x00c0	 122 */		std	%f0,[%sp+2223]
+/* 0x00c4	     */		ldd	[%g4+40],%f2
+/* 0x00c8	 120 */		and	%g2,%g3,%i2
+/* 0x00cc	 119 */		ldd	[%g4+32],%f0
+/* 0x00d0	 121 */		srax	%g2,32,%g2
+/* 0x00d4	 122 */		ldd	[%g4+56],%f4
+/* 0x00d8	     */		fdtox	%f2,%f2
+/* 0x00dc	     */		ldx	[%sp+2223],%g5
+/* 0x00e0	 119 */		fdtox	%f0,%f0
+/* 0x00e4	 125 */		st	%l0,[%g1]
+/* 0x00e8	 124 */		srax	%i4,32,%l0
+/* 0x00ec	 122 */		fdtox	%f4,%f4
+/* 0x00f0	     */		std	%f2,[%sp+2223]
+/* 0x00f4	 123 */		and	%g5,%o5,%i4
+/* 0x00f8	 124 */		add	%i5,%l0,%i5
+/* 0x00fc	 119 */		std	%f0,[%sp+2231]
+/* 0x0100	 123 */		sllx	%i4,16,%i4
+/* 0x0104	 124 */		add	%i3,%i5,%i3
+/* 0x0108	 119 */		ldd	[%g4+48],%f2
+/* 0x010c	 124 */		srax	%g5,16,%g5
+/* 0x0110	 117 */		add	%i2,%i4,%i2
+/* 0x0114	 122 */		ldd	[%g4+72],%f0
+/* 0x0118	 117 */		add	%i2,%i3,%i4
+/* 0x011c	 124 */		srax	%i4,32,%i5
+/* 0x0120	 119 */		fdtox	%f2,%f2
+/* 0x0124	 125 */		and	%i4,%g3,%i4
+/* 0x0128	 122 */		ldx	[%sp+2223],%i2
+/* 0x012c	 124 */		add	%g5,%i5,%g5
+/* 0x0130	 119 */		ldx	[%sp+2231],%i3
+/* 0x0134	 124 */		add	%g2,%g5,%g5
+/* 0x0138	 119 */		std	%f2,[%sp+2231]
+/* 0x013c	 122 */		std	%f4,[%sp+2223]
+/* 0x0140	 119 */		ldd	[%g4+64],%f2
+/* 0x0144	 125 */		st	%i4,[%g1+4]
+                       .L900000108:
+/* 0x0148	 122 */		ldx	[%sp+2223],%i4
+/* 0x014c	 128 */		add	%o0,2,%o0
+/* 0x0150	     */		add	%i0,4,%i0
+/* 0x0154	 119 */		ldx	[%sp+2231],%l0
+/* 0x0158	 117 */		add	%o3,16,%o3
+/* 0x015c	 123 */		and	%i2,%o5,%g2
+/* 0x0160	     */		sllx	%g2,16,%i5
+/* 0x0164	 120 */		and	%i3,%g3,%g2
+/* 0x0168	 122 */		ldd	[%g4+%o3],%f4
+/* 0x016c	     */		fdtox	%f0,%f0
+/* 0x0170	     */		std	%f0,[%sp+2223]
+/* 0x0174	 124 */		srax	%i2,16,%i2
+/* 0x0178	 117 */		add	%g2,%i5,%g2
+/* 0x017c	 119 */		fdtox	%f2,%f0
+/* 0x0180	 117 */		add	%o4,16,%o4
+/* 0x0184	 119 */		std	%f0,[%sp+2231]
+/* 0x0188	 117 */		add	%g2,%g5,%g2
+/* 0x018c	 119 */		ldd	[%g4+%o4],%f2
+/* 0x0190	 124 */		srax	%g2,32,%i5
+/* 0x0194	 128 */		cmp	%o0,%o2
+/* 0x0198	 121 */		srax	%i3,32,%g5
+/* 0x019c	 124 */		add	%i2,%i5,%i2
+/* 0x01a0	     */		add	%g5,%i2,%i5
+/* 0x01a4	 117 */		add	%o1,4,%o1
+/* 0x01a8	 125 */		and	%g2,%g3,%g2
+/* 0x01ac	 127 */		or	%g0,%l0,%g5
+/* 0x01b0	 125 */		st	%g2,[%g1+%o1]
+/* 0x01b4	 128 */		add	%i1,4,%i1
+/* 0x01b8	 122 */		ldx	[%sp+2223],%i2
+/* 0x01bc	 119 */		ldx	[%sp+2231],%i3
+/* 0x01c0	 117 */		add	%o3,16,%o3
+/* 0x01c4	 123 */		and	%i4,%o5,%g2
+/* 0x01c8	     */		sllx	%g2,16,%l0
+/* 0x01cc	 120 */		and	%g5,%g3,%g2
+/* 0x01d0	 122 */		ldd	[%g4+%o3],%f0
+/* 0x01d4	     */		fdtox	%f4,%f4
+/* 0x01d8	     */		std	%f4,[%sp+2223]
+/* 0x01dc	 124 */		srax	%i4,16,%i4
+/* 0x01e0	 117 */		add	%g2,%l0,%g2
+/* 0x01e4	 119 */		fdtox	%f2,%f2
+/* 0x01e8	 117 */		add	%o4,16,%o4
+/* 0x01ec	 119 */		std	%f2,[%sp+2231]
+/* 0x01f0	 117 */		add	%g2,%i5,%g2
+/* 0x01f4	 119 */		ldd	[%g4+%o4],%f2
+/* 0x01f8	 124 */		srax	%g2,32,%i5
+/* 0x01fc	 121 */		srax	%g5,32,%g5
+/* 0x0200	 124 */		add	%i4,%i5,%i4
+/* 0x0204	     */		add	%g5,%i4,%g5
+/* 0x0208	 117 */		add	%o1,4,%o1
+/* 0x020c	 125 */		and	%g2,%g3,%g2
+/* 0x0210	 128 */		ble,pt	%icc,.L900000108
+/* 0x0214	     */		st	%g2,[%g1+%o1]
+                       .L900000111:
+/* 0x0218	 122 */		ldx	[%sp+2223],%o2
+/* 0x021c	 123 */		and	%i2,%o5,%i4
+/* 0x0220	 120 */		and	%i3,%g3,%g2
+/* 0x0224	 123 */		sllx	%i4,16,%i4
+/* 0x0228	 119 */		ldx	[%sp+2231],%i5
+/* 0x022c	 128 */		cmp	%o0,%o7
+/* 0x0230	 124 */		srax	%i2,16,%i2
+/* 0x0234	 117 */		add	%g2,%i4,%g2
+/* 0x0238	 122 */		fdtox	%f0,%f4
+/* 0x023c	     */		std	%f4,[%sp+2223]
+/* 0x0240	 117 */		add	%g2,%g5,%g5
+/* 0x0244	 123 */		and	%o2,%o5,%l0
+/* 0x0248	 124 */		srax	%g5,32,%l1
+/* 0x024c	 120 */		and	%i5,%g3,%i4
+/* 0x0250	 119 */		fdtox	%f2,%f0
+/* 0x0254	 121 */		srax	%i3,32,%g2
+/* 0x0258	 119 */		std	%f0,[%sp+2231]
+/* 0x025c	 124 */		add	%i2,%l1,%i2
+/* 0x0260	 123 */		sllx	%l0,16,%i3
+/* 0x0264	 124 */		add	%g2,%i2,%i2
+/* 0x0268	     */		srax	%o2,16,%o2
+/* 0x026c	 117 */		add	%o1,4,%g2
+/* 0x0270	     */		add	%i4,%i3,%o1
+/* 0x0274	 125 */		and	%g5,%g3,%g5
+/* 0x0278	     */		st	%g5,[%g1+%g2]
+/* 0x027c	 119 */		ldx	[%sp+2231],%i3
+/* 0x0280	 117 */		add	%o1,%i2,%o1
+/* 0x0284	     */		add	%g2,4,%g2
+/* 0x0288	 124 */		srax	%o1,32,%i4
+/* 0x028c	 122 */		ldx	[%sp+2223],%i2
+/* 0x0290	 125 */		and	%o1,%g3,%g5
+/* 0x0294	 121 */		srax	%i5,32,%o1
+/* 0x0298	 124 */		add	%o2,%i4,%o2
+/* 0x029c	 125 */		st	%g5,[%g1+%g2]
+/* 0x02a0	 128 */		bg,pn	%icc,.L77000127
+/* 0x02a4	     */		add	%o1,%o2,%g5
+/* 0x02a8	     */		add	%i0,6,%i0
+/* 0x02ac	     */		add	%i1,6,%i1
+                       .L77000134:
+/* 0x02b0	 119 */		sra	%i1,0,%o2
+                       .L900000112:
+/* 0x02b4	 119 */		sllx	%o2,3,%o3
+/* 0x02b8	 120 */		and	%i3,%g3,%o1
+/* 0x02bc	 119 */		ldd	[%g4+%o3],%f0
+/* 0x02c0	 122 */		sra	%i0,0,%o3
+/* 0x02c4	 123 */		and	%i2,%o5,%o2
+/* 0x02c8	 122 */		sllx	%o3,3,%o3
+/* 0x02cc	 120 */		add	%g5,%o1,%o1
+/* 0x02d0	 119 */		fdtox	%f0,%f0
+/* 0x02d4	     */		std	%f0,[%sp+2231]
+/* 0x02d8	 123 */		sllx	%o2,16,%o2
+/* 0x02dc	     */		add	%o1,%o2,%o2
+/* 0x02e0	 128 */		add	%i1,2,%i1
+/* 0x02e4	 122 */		ldd	[%g4+%o3],%f0
+/* 0x02e8	 124 */		srax	%o2,32,%g2
+/* 0x02ec	 125 */		and	%o2,%g3,%o3
+/* 0x02f0	 124 */		srax	%i2,16,%o1
+/* 0x02f4	 128 */		add	%i0,2,%i0
+/* 0x02f8	 122 */		fdtox	%f0,%f0
+/* 0x02fc	     */		std	%f0,[%sp+2223]
+/* 0x0300	 125 */		sra	%o0,0,%o2
+/* 0x0304	     */		sllx	%o2,2,%o2
+/* 0x0308	 124 */		add	%o1,%g2,%g5
+/* 0x030c	 121 */		srax	%i3,32,%g2
+/* 0x0310	 128 */		add	%o0,1,%o0
+/* 0x0314	 124 */		add	%g2,%g5,%g5
+/* 0x0318	 128 */		cmp	%o0,%o7
+/* 0x031c	 119 */		ldx	[%sp+2231],%o4
+/* 0x0320	 122 */		ldx	[%sp+2223],%i2
+/* 0x0324	 125 */		st	%o3,[%g1+%o2]
+/* 0x0328	 127 */		or	%g0,%o4,%i3
+/* 0x032c	 128 */		ble,pt	%icc,.L900000112
+/* 0x0330	     */		sra	%i1,0,%o2
+                       .L77000127:
+
+!  129		      !   }
+!  130		      !     t1+=a&0xffffffff;
+!  131		      !     t=(a>>32);
+!  132		      !     t1+=(b&0xffff)<<16;
+!  133		      !     i32[i]=t1&0xffffffff;
+
+/* 0x0334	 133 */		sethi	%hi(0xfc00),%g2
+                       .L900000113:
+/* 0x0338	 133 */		or	%g0,-1,%g3
+/* 0x033c	     */		add	%g2,1023,%g2
+/* 0x0340	     */		srl	%g3,0,%g3
+/* 0x0344	     */		and	%i2,%g2,%g2
+/* 0x0348	     */		and	%i3,%g3,%g4
+/* 0x034c	     */		sllx	%g2,16,%g2
+/* 0x0350	     */		add	%g5,%g4,%g4
+/* 0x0354	     */		sra	%o0,0,%g5
+/* 0x0358	     */		add	%g4,%g2,%g4
+/* 0x035c	     */		sllx	%g5,2,%g2
+/* 0x0360	     */		and	%g4,%g3,%g3
+/* 0x0364	     */		st	%g3,[%g1+%g2]
+/* 0x0368	     */		ret	! Result = 
+/* 0x036c	     */		restore	%g0,%g0,%g0
+/* 0x0370	   0 */		.type	conv_d16_to_i32,2
+/* 0x0370	     */		.size	conv_d16_to_i32,(.-conv_d16_to_i32)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	8
+!
+! CONSTANT POOL
+!
+                       .L_const_seg_900000201:
+/* 000000	   0 */		.word	1127219200,0
+/* 0x0008	   0 */		.align	8
+/* 0x0008	     */		.skip	24
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global conv_i32_to_d32
+                       conv_i32_to_d32:
+/* 000000	     */		or	%g0,%o7,%g3
+
+!  135		      !}
+!  137		      !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+!  138		      !{
+!  139		      !int i;
+!  141		      !#pragma pipeloop(0)
+!  142		      ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004	 142 */		cmp	%o2,0
+                       .L900000210:
+/* 0x0008	     */		call	.+8
+/* 0x000c	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0010	 142 */		or	%g0,0,%o3
+/* 0x0014	 138 */		add	%g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0018	 142 */		sub	%o2,1,%o4
+/* 0x001c	 138 */		add	%g4,%o7,%g1
+/* 0x0020	 142 */		ble,pt	%icc,.L77000140
+/* 0x0024	     */		or	%g0,%g3,%o7
+/* 0x0028	     */		sethi	%hi(.L_const_seg_900000201),%g3
+/* 0x002c	     */		cmp	%o2,12
+/* 0x0030	     */		add	%g3,%lo(.L_const_seg_900000201),%g2
+/* 0x0034	     */		or	%g0,%o1,%g5
+/* 0x0038	     */		ldx	[%g1+%g2],%g4
+/* 0x003c	     */		or	%g0,0,%g1
+/* 0x0040	     */		or	%g0,24,%g2
+/* 0x0044	     */		bl,pn	%icc,.L77000144
+/* 0x0048	     */		or	%g0,0,%g3
+/* 0x004c	     */		ld	[%o1],%f13
+/* 0x0050	     */		or	%g0,7,%o3
+/* 0x0054	     */		ldd	[%g4],%f8
+/* 0x0058	     */		sub	%o2,5,%g3
+/* 0x005c	     */		or	%g0,8,%g1
+/* 0x0060	     */		ld	[%o1+4],%f11
+/* 0x0064	     */		ld	[%o1+8],%f7
+/* 0x0068	     */		fmovs	%f8,%f12
+/* 0x006c	     */		ld	[%o1+12],%f5
+/* 0x0070	     */		fmovs	%f8,%f10
+/* 0x0074	     */		ld	[%o1+16],%f3
+/* 0x0078	     */		fmovs	%f8,%f6
+/* 0x007c	     */		ld	[%o1+20],%f1
+/* 0x0080	     */		fsubd	%f12,%f8,%f12
+/* 0x0084	     */		std	%f12,[%o0]
+/* 0x0088	     */		fsubd	%f10,%f8,%f10
+/* 0x008c	     */		std	%f10,[%o0+8]
+                       .L900000205:
+/* 0x0090	     */		ld	[%o1+%g2],%f11
+/* 0x0094	     */		add	%g1,8,%g1
+/* 0x0098	     */		add	%o3,5,%o3
+/* 0x009c	     */		fsubd	%f6,%f8,%f6
+/* 0x00a0	     */		add	%g2,4,%g2
+/* 0x00a4	     */		std	%f6,[%o0+%g1]
+/* 0x00a8	     */		cmp	%o3,%g3
+/* 0x00ac	     */		fmovs	%f8,%f4
+/* 0x00b0	     */		ld	[%o1+%g2],%f7
+/* 0x00b4	     */		fsubd	%f4,%f8,%f12
+/* 0x00b8	     */		add	%g1,8,%g1
+/* 0x00bc	     */		add	%g2,4,%g2
+/* 0x00c0	     */		fmovs	%f8,%f2
+/* 0x00c4	     */		std	%f12,[%o0+%g1]
+/* 0x00c8	     */		ld	[%o1+%g2],%f5
+/* 0x00cc	     */		fsubd	%f2,%f8,%f12
+/* 0x00d0	     */		add	%g1,8,%g1
+/* 0x00d4	     */		add	%g2,4,%g2
+/* 0x00d8	     */		fmovs	%f8,%f0
+/* 0x00dc	     */		std	%f12,[%o0+%g1]
+/* 0x00e0	     */		ld	[%o1+%g2],%f3
+/* 0x00e4	     */		fsubd	%f0,%f8,%f12
+/* 0x00e8	     */		add	%g1,8,%g1
+/* 0x00ec	     */		add	%g2,4,%g2
+/* 0x00f0	     */		fmovs	%f8,%f10
+/* 0x00f4	     */		std	%f12,[%o0+%g1]
+/* 0x00f8	     */		ld	[%o1+%g2],%f1
+/* 0x00fc	     */		fsubd	%f10,%f8,%f10
+/* 0x0100	     */		add	%g1,8,%g1
+/* 0x0104	     */		add	%g2,4,%g2
+/* 0x0108	     */		std	%f10,[%o0+%g1]
+/* 0x010c	     */		ble,pt	%icc,.L900000205
+/* 0x0110	     */		fmovs	%f8,%f6
+                       .L900000208:
+/* 0x0114	     */		fmovs	%f8,%f4
+/* 0x0118	     */		ld	[%o1+%g2],%f11
+/* 0x011c	     */		add	%g1,8,%g3
+/* 0x0120	     */		fmovs	%f8,%f2
+/* 0x0124	     */		add	%g1,16,%g1
+/* 0x0128	     */		cmp	%o3,%o4
+/* 0x012c	     */		fmovs	%f8,%f0
+/* 0x0130	     */		add	%g1,8,%o1
+/* 0x0134	     */		add	%g1,16,%o2
+/* 0x0138	     */		fmovs	%f8,%f10
+/* 0x013c	     */		add	%g1,24,%g2
+/* 0x0140	     */		fsubd	%f6,%f8,%f6
+/* 0x0144	     */		std	%f6,[%o0+%g3]
+/* 0x0148	     */		fsubd	%f4,%f8,%f4
+/* 0x014c	     */		std	%f4,[%o0+%g1]
+/* 0x0150	     */		sra	%o3,0,%g1
+/* 0x0154	     */		fsubd	%f2,%f8,%f2
+/* 0x0158	     */		std	%f2,[%o0+%o1]
+/* 0x015c	     */		sllx	%g1,2,%g3
+/* 0x0160	     */		fsubd	%f0,%f8,%f0
+/* 0x0164	     */		std	%f0,[%o0+%o2]
+/* 0x0168	     */		fsubd	%f10,%f8,%f0
+/* 0x016c	     */		bg,pn	%icc,.L77000140
+/* 0x0170	     */		std	%f0,[%o0+%g2]
+                       .L77000144:
+/* 0x0174	     */		ldd	[%g4],%f8
+                       .L900000211:
+/* 0x0178	     */		ld	[%g5+%g3],%f13
+/* 0x017c	     */		sllx	%g1,3,%g2
+/* 0x0180	     */		add	%o3,1,%o3
+/* 0x0184	     */		sra	%o3,0,%g1
+/* 0x0188	     */		cmp	%o3,%o4
+/* 0x018c	     */		fmovs	%f8,%f12
+/* 0x0190	     */		sllx	%g1,2,%g3
+/* 0x0194	     */		fsubd	%f12,%f8,%f0
+/* 0x0198	     */		std	%f0,[%o0+%g2]
+/* 0x019c	     */		ble,a,pt	%icc,.L900000211
+/* 0x01a0	     */		ldd	[%g4],%f8
+                       .L77000140:
+/* 0x01a4	     */		retl	! Result = 
+/* 0x01a8	     */		nop
+/* 0x01ac	   0 */		.type	conv_i32_to_d32,2
+/* 0x01ac	     */		.size	conv_i32_to_d32,(.-conv_i32_to_d32)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	8
+!
+! CONSTANT POOL
+!
+                       .L_const_seg_900000301:
+/* 000000	   0 */		.word	1127219200,0
+/* 0x0008	   0 */		.align	8
+/* 0x0008	     */		.skip	24
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global conv_i32_to_d16
+                       conv_i32_to_d16:
+/* 000000	     */		save	%sp,-192,%sp
+                       .L900000310:
+/* 0x0004	     */		call	.+8
+/* 0x0008	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+
+!  143		      !}
+!  146		      !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+!  147		      !{
+!  148		      !int i;
+!  149		      !unsigned int a;
+!  151		      !#pragma pipeloop(0)
+!  152		      ! for(i=0;i<len;i++)
+
+/* 0x000c	 152 */		cmp	%i2,0
+/* 0x0010	 147 */		add	%g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x0014	 152 */		ble,pt	%icc,.L77000150
+/* 0x0018	     */		add	%g3,%o7,%o0
+
+!  153		      !   {
+!  154		      !     a=i32[i];
+!  155		      !     d16[2*i]=(double)(a&0xffff);
+!  156		      !     d16[2*i+1]=(double)(a>>16);
+
+/* 0x001c	 156 */		sethi	%hi(.L_const_seg_900000301),%g2
+/* 0x0020	 147 */		or	%g0,%i2,%o1
+/* 0x0024	 152 */		sethi	%hi(0xfc00),%g3
+/* 0x0028	 156 */		add	%g2,%lo(.L_const_seg_900000301),%g2
+/* 0x002c	 152 */		or	%g0,%o1,%g4
+/* 0x0030	 156 */		ldx	[%o0+%g2],%o5
+/* 0x0034	 152 */		add	%g3,1023,%g1
+/* 0x0038	 147 */		or	%g0,%i1,%o7
+/* 0x003c	 152 */		or	%g0,0,%i2
+/* 0x0040	     */		sub	%o1,1,%g5
+/* 0x0044	     */		or	%g0,0,%g3
+/* 0x0048	     */		or	%g0,1,%g2
+/* 0x004c	 154 */		or	%g0,0,%o2
+/* 0x0050	     */		cmp	%g4,6
+/* 0x0054	 152 */		bl,pn	%icc,.L77000154
+/* 0x0058	     */		ldd	[%o5],%f0
+/* 0x005c	     */		sub	%o1,2,%o3
+/* 0x0060	     */		or	%g0,16,%o2
+/* 0x0064	 154 */		ld	[%i1],%o4
+/* 0x0068	 156 */		or	%g0,3,%g2
+/* 0x006c	     */		or	%g0,2,%g3
+/* 0x0070	 155 */		fmovs	%f0,%f2
+/* 0x0074	 156 */		or	%g0,4,%i2
+/* 0x0078	 155 */		and	%o4,%g1,%o0
+/* 0x007c	     */		st	%o0,[%sp+2227]
+/* 0x0080	     */		fmovs	%f0,%f4
+/* 0x0084	 156 */		srl	%o4,16,%i4
+/* 0x0088	 152 */		or	%g0,12,%o4
+/* 0x008c	     */		or	%g0,24,%o0
+/* 0x0090	 155 */		ld	[%sp+2227],%f3
+/* 0x0094	     */		fsubd	%f2,%f0,%f2
+/* 0x0098	     */		std	%f2,[%i0]
+/* 0x009c	 156 */		st	%i4,[%sp+2223]
+/* 0x00a0	 154 */		ld	[%o7+4],%o1
+/* 0x00a4	 156 */		fmovs	%f0,%f2
+/* 0x00a8	 155 */		and	%o1,%g1,%i1
+/* 0x00ac	 156 */		ld	[%sp+2223],%f3
+/* 0x00b0	     */		srl	%o1,16,%o1
+/* 0x00b4	     */		fsubd	%f2,%f0,%f2
+/* 0x00b8	     */		std	%f2,[%i0+8]
+/* 0x00bc	     */		st	%o1,[%sp+2223]
+/* 0x00c0	 155 */		st	%i1,[%sp+2227]
+/* 0x00c4	 154 */		ld	[%o7+8],%o1
+/* 0x00c8	 156 */		fmovs	%f0,%f2
+/* 0x00cc	 155 */		and	%o1,%g1,%g4
+/* 0x00d0	     */		ld	[%sp+2227],%f5
+/* 0x00d4	 156 */		srl	%o1,16,%o1
+/* 0x00d8	     */		ld	[%sp+2223],%f3
+/* 0x00dc	     */		st	%o1,[%sp+2223]
+/* 0x00e0	 155 */		fsubd	%f4,%f0,%f4
+/* 0x00e4	     */		st	%g4,[%sp+2227]
+/* 0x00e8	 156 */		fsubd	%f2,%f0,%f2
+/* 0x00ec	 154 */		ld	[%o7+12],%o1
+/* 0x00f0	 155 */		std	%f4,[%i0+16]
+/* 0x00f4	 156 */		std	%f2,[%i0+24]
+                       .L900000306:
+/* 0x00f8	 155 */		ld	[%sp+2227],%f5
+/* 0x00fc	 156 */		add	%i2,2,%i2
+/* 0x0100	     */		add	%g2,4,%g2
+/* 0x0104	     */		ld	[%sp+2223],%f3
+/* 0x0108	     */		cmp	%i2,%o3
+/* 0x010c	     */		add	%g3,4,%g3
+/* 0x0110	 155 */		and	%o1,%g1,%g4
+/* 0x0114	 156 */		srl	%o1,16,%o1
+/* 0x0118	 155 */		st	%g4,[%sp+2227]
+/* 0x011c	 156 */		st	%o1,[%sp+2223]
+/* 0x0120	 152 */		add	%o4,4,%o1
+/* 0x0124	 154 */		ld	[%o7+%o1],%o4
+/* 0x0128	 156 */		fmovs	%f0,%f2
+/* 0x012c	 155 */		fmovs	%f0,%f4
+/* 0x0130	     */		fsubd	%f4,%f0,%f4
+/* 0x0134	 152 */		add	%o2,16,%o2
+/* 0x0138	 156 */		fsubd	%f2,%f0,%f2
+/* 0x013c	 155 */		std	%f4,[%i0+%o2]
+/* 0x0140	 152 */		add	%o0,16,%o0
+/* 0x0144	 156 */		std	%f2,[%i0+%o0]
+/* 0x0148	 155 */		ld	[%sp+2227],%f5
+/* 0x014c	 156 */		ld	[%sp+2223],%f3
+/* 0x0150	 155 */		and	%o4,%g1,%g4
+/* 0x0154	 156 */		srl	%o4,16,%o4
+/* 0x0158	 155 */		st	%g4,[%sp+2227]
+/* 0x015c	 156 */		st	%o4,[%sp+2223]
+/* 0x0160	 152 */		add	%o1,4,%o4
+/* 0x0164	 154 */		ld	[%o7+%o4],%o1
+/* 0x0168	 156 */		fmovs	%f0,%f2
+/* 0x016c	 155 */		fmovs	%f0,%f4
+/* 0x0170	     */		fsubd	%f4,%f0,%f4
+/* 0x0174	 152 */		add	%o2,16,%o2
+/* 0x0178	 156 */		fsubd	%f2,%f0,%f2
+/* 0x017c	 155 */		std	%f4,[%i0+%o2]
+/* 0x0180	 152 */		add	%o0,16,%o0
+/* 0x0184	 156 */		ble,pt	%icc,.L900000306
+/* 0x0188	     */		std	%f2,[%i0+%o0]
+                       .L900000309:
+/* 0x018c	 155 */		ld	[%sp+2227],%f5
+/* 0x0190	 156 */		fmovs	%f0,%f2
+/* 0x0194	     */		srl	%o1,16,%o3
+/* 0x0198	     */		ld	[%sp+2223],%f3
+/* 0x019c	 155 */		and	%o1,%g1,%i1
+/* 0x01a0	 152 */		add	%o2,16,%g4
+/* 0x01a4	 155 */		fmovs	%f0,%f4
+/* 0x01a8	     */		st	%i1,[%sp+2227]
+/* 0x01ac	 152 */		add	%o0,16,%o2
+/* 0x01b0	 156 */		st	%o3,[%sp+2223]
+/* 0x01b4	 154 */		sra	%i2,0,%o3
+/* 0x01b8	 152 */		add	%g4,16,%o1
+/* 0x01bc	 155 */		fsubd	%f4,%f0,%f4
+/* 0x01c0	     */		std	%f4,[%i0+%g4]
+/* 0x01c4	 152 */		add	%o0,32,%o0
+/* 0x01c8	 156 */		fsubd	%f2,%f0,%f2
+/* 0x01cc	     */		std	%f2,[%i0+%o2]
+/* 0x01d0	     */		sllx	%o3,2,%o2
+/* 0x01d4	 155 */		ld	[%sp+2227],%f5
+/* 0x01d8	 156 */		cmp	%i2,%g5
+/* 0x01dc	     */		add	%g2,6,%g2
+/* 0x01e0	     */		ld	[%sp+2223],%f3
+/* 0x01e4	     */		add	%g3,6,%g3
+/* 0x01e8	 155 */		fmovs	%f0,%f4
+/* 0x01ec	 156 */		fmovs	%f0,%f2
+/* 0x01f0	 155 */		fsubd	%f4,%f0,%f4
+/* 0x01f4	     */		std	%f4,[%i0+%o1]
+/* 0x01f8	 156 */		fsubd	%f2,%f0,%f0
+/* 0x01fc	     */		bg,pn	%icc,.L77000150
+/* 0x0200	     */		std	%f0,[%i0+%o0]
+                       .L77000154:
+/* 0x0204	 155 */		ldd	[%o5],%f0
+                       .L900000311:
+/* 0x0208	 154 */		ld	[%o7+%o2],%o0
+/* 0x020c	 155 */		sra	%g3,0,%o1
+/* 0x0210	     */		fmovs	%f0,%f2
+/* 0x0214	     */		sllx	%o1,3,%o2
+/* 0x0218	 156 */		add	%i2,1,%i2
+/* 0x021c	 155 */		and	%o0,%g1,%o1
+/* 0x0220	     */		st	%o1,[%sp+2227]
+/* 0x0224	 156 */		add	%g3,2,%g3
+/* 0x0228	     */		srl	%o0,16,%o1
+/* 0x022c	     */		cmp	%i2,%g5
+/* 0x0230	     */		sra	%g2,0,%o0
+/* 0x0234	     */		add	%g2,2,%g2
+/* 0x0238	     */		sllx	%o0,3,%o0
+/* 0x023c	 155 */		ld	[%sp+2227],%f3
+/* 0x0240	 154 */		sra	%i2,0,%o3
+/* 0x0244	 155 */		fsubd	%f2,%f0,%f2
+/* 0x0248	     */		std	%f2,[%i0+%o2]
+/* 0x024c	     */		sllx	%o3,2,%o2
+/* 0x0250	 156 */		st	%o1,[%sp+2223]
+/* 0x0254	     */		fmovs	%f0,%f2
+/* 0x0258	     */		ld	[%sp+2223],%f3
+/* 0x025c	     */		fsubd	%f2,%f0,%f0
+/* 0x0260	     */		std	%f0,[%i0+%o0]
+/* 0x0264	     */		ble,a,pt	%icc,.L900000311
+/* 0x0268	     */		ldd	[%o5],%f0
+                       .L77000150:
+/* 0x026c	     */		ret	! Result = 
+/* 0x0270	     */		restore	%g0,%g0,%g0
+/* 0x0274	   0 */		.type	conv_i32_to_d16,2
+/* 0x0274	     */		.size	conv_i32_to_d16,(.-conv_i32_to_d16)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	8
+!
+! CONSTANT POOL
+!
+                       .L_const_seg_900000401:
+/* 000000	   0 */		.word	1127219200,0
+/* 0x0008	   0 */		.align	8
+/* 0x0008	     */		.skip	24
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global conv_i32_to_d32_and_d16
+                       conv_i32_to_d32_and_d16:
+/* 000000	     */		save	%sp,-192,%sp
+                       .L900000415:
+/* 0x0004	     */		call	.+8
+/* 0x0008	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3
+
+!  157		      !   }
+!  158		      !}
+!  161		      !void conv_i32_to_d32_and_d16(double *d32, double *d16, 
+!  162		      !			     unsigned int *i32, int len)
+!  163		      !{
+!  164		      !int i = 0;
+!  165		      !unsigned int a;
+!  167		      !#pragma pipeloop(0)
+!  168		      !#ifdef RF_INLINE_MACROS
+!  169		      ! for(;i<len-3;i+=4)
+!  170		      !   {
+!  171		      !     i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+!  172		      !			  &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x000c	 172 */		sethi	%hi(Zero),%g2
+/* 0x0010	 163 */		add	%g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3
+/* 0x0014	     */		or	%g0,%i3,%g5
+/* 0x0018	     */		add	%g3,%o7,%o3
+/* 0x001c	 172 */		add	%g2,%lo(Zero),%g2
+/* 0x0020	     */		ldx	[%o3+%g2],%o0
+/* 0x0024	     */		sethi	%hi(TwoToMinus16),%g3
+/* 0x0028	 163 */		or	%g0,%i0,%i3
+/* 0x002c	 169 */		sub	%g5,3,%o1
+/* 0x0030	 172 */		sethi	%hi(TwoTo16),%g4
+/* 0x0034	 163 */		or	%g0,%i2,%i0
+/* 0x0038	 172 */		add	%g3,%lo(TwoToMinus16),%g2
+/* 0x003c	     */		ldx	[%o3+%g2],%o2
+/* 0x0040	 169 */		cmp	%o1,0
+/* 0x0044	 164 */		or	%g0,0,%i2
+/* 0x0048	 169 */		ble,pt	%icc,.L900000418
+/* 0x004c	     */		cmp	%i2,%g5
+/* 0x0050	     */		ldd	[%o0],%f2
+/* 0x0054	 172 */		add	%g4,%lo(TwoTo16),%g3
+/* 0x0058	     */		ldx	[%o3+%g3],%o1
+/* 0x005c	 169 */		sub	%g5,4,%o4
+/* 0x0060	     */		or	%g0,0,%o5
+                       .L900000417:
+/* 0x0064	 172 */		sra	%i2,0,%g2
+/* 0x0068	     */		fmovd	%f2,%f14
+/* 0x006c	     */		ldd	[%o2],%f0
+/* 0x0070	     */		sllx	%g2,2,%g3
+/* 0x0074	     */		fmovd	%f2,%f10
+/* 0x0078	     */		ldd	[%o1],%f16
+/* 0x007c	     */		ld	[%g3+%i0],%f15
+/* 0x0080	     */		add	%i0,%g3,%g3
+/* 0x0084	     */		fmovd	%f2,%f6
+/* 0x0088	     */		ld	[%g3+4],%f11
+/* 0x008c	     */		sra	%o5,0,%g4
+/* 0x0090	     */		add	%i2,4,%i2
+/* 0x0094	     */		ld	[%g3+8],%f7
+/* 0x0098	     */		fxtod	%f14,%f14
+/* 0x009c	     */		sllx	%g2,3,%g2
+/* 0x00a0	     */		ld	[%g3+12],%f3
+/* 0x00a4	     */		fxtod	%f10,%f10
+/* 0x00a8	     */		sllx	%g4,3,%g3
+/* 0x00ac	     */		fxtod	%f6,%f6
+/* 0x00b0	     */		std	%f14,[%g2+%i3]
+/* 0x00b4	     */		add	%i3,%g2,%g4
+/* 0x00b8	     */		fxtod	%f2,%f2
+/* 0x00bc	     */		fmuld	%f0,%f14,%f12
+/* 0x00c0	     */		std	%f2,[%g4+24]
+/* 0x00c4	     */		fmuld	%f0,%f10,%f8
+/* 0x00c8	     */		std	%f10,[%g4+8]
+/* 0x00cc	     */		add	%i1,%g3,%g2
+/* 0x00d0	     */		fmuld	%f0,%f6,%f4
+/* 0x00d4	     */		std	%f6,[%g4+16]
+/* 0x00d8	     */		cmp	%i2,%o4
+/* 0x00dc	     */		fmuld	%f0,%f2,%f0
+/* 0x00e0	     */		fdtox	%f12,%f12
+/* 0x00e4	     */		add	%o5,8,%o5
+/* 0x00e8	     */		fdtox	%f8,%f8
+/* 0x00ec	     */		fdtox	%f4,%f4
+/* 0x00f0	     */		fdtox	%f0,%f0
+/* 0x00f4	     */		fxtod	%f12,%f12
+/* 0x00f8	     */		std	%f12,[%g2+8]
+/* 0x00fc	     */		fxtod	%f8,%f8
+/* 0x0100	     */		std	%f8,[%g2+24]
+/* 0x0104	     */		fxtod	%f4,%f4
+/* 0x0108	     */		std	%f4,[%g2+40]
+/* 0x010c	     */		fxtod	%f0,%f0
+/* 0x0110	     */		std	%f0,[%g2+56]
+/* 0x0114	     */		fmuld	%f12,%f16,%f12
+/* 0x0118	     */		fmuld	%f8,%f16,%f8
+/* 0x011c	     */		fmuld	%f4,%f16,%f4
+/* 0x0120	     */		fsubd	%f14,%f12,%f12
+/* 0x0124	     */		std	%f12,[%g3+%i1]
+/* 0x0128	     */		fmuld	%f0,%f16,%f0
+/* 0x012c	     */		fsubd	%f10,%f8,%f8
+/* 0x0130	     */		std	%f8,[%g2+16]
+/* 0x0134	     */		fsubd	%f6,%f4,%f4
+/* 0x0138	     */		std	%f4,[%g2+32]
+/* 0x013c	     */		fsubd	%f2,%f0,%f0
+/* 0x0140	     */		std	%f0,[%g2+48]
+/* 0x0144	     */		ble,a,pt	%icc,.L900000417
+/* 0x0148	     */		ldd	[%o0],%f2
+                       .L77000159:
+
+!  173		      !   }
+!  174		      !#endif
+!  175		      ! for(;i<len;i++)
+
+/* 0x014c	 175 */		cmp	%i2,%g5
+                       .L900000418:
+/* 0x0150	 175 */		bge,pt	%icc,.L77000164
+/* 0x0154	     */		nop
+
+!  176		      !   {
+!  177		      !     a=i32[i];
+!  178		      !     d32[i]=(double)(i32[i]);
+!  179		      !     d16[2*i]=(double)(a&0xffff);
+!  180		      !     d16[2*i+1]=(double)(a>>16);
+
+/* 0x0158	 180 */		sethi	%hi(.L_const_seg_900000401),%g2
+/* 0x015c	     */		add	%g2,%lo(.L_const_seg_900000401),%g2
+/* 0x0160	 175 */		sethi	%hi(0xfc00),%g3
+/* 0x0164	 180 */		ldx	[%o3+%g2],%g1
+/* 0x0168	 175 */		sll	%i2,1,%i4
+/* 0x016c	     */		sub	%g5,%i2,%g4
+/* 0x0170	 177 */		sra	%i2,0,%o3
+/* 0x0174	 175 */		add	%g3,1023,%g3
+/* 0x0178	 178 */		ldd	[%g1],%f2
+/* 0x017c	     */		sllx	%o3,2,%o2
+/* 0x0180	 175 */		add	%i4,1,%g2
+/* 0x0184	 177 */		or	%g0,%o3,%o1
+/* 0x0188	     */		cmp	%g4,6
+/* 0x018c	 175 */		bl,pn	%icc,.L77000161
+/* 0x0190	     */		sra	%i2,0,%o3
+/* 0x0194	 177 */		or	%g0,%o2,%o0
+/* 0x0198	 178 */		ld	[%i0+%o2],%f5
+/* 0x019c	 179 */		fmovs	%f2,%f8
+/* 0x01a0	 175 */		add	%o0,4,%o3
+/* 0x01a4	 177 */		ld	[%i0+%o0],%o7
+/* 0x01a8	 180 */		fmovs	%f2,%f6
+/* 0x01ac	 178 */		fmovs	%f2,%f4
+/* 0x01b0	     */		sllx	%o1,3,%o2
+/* 0x01b4	 175 */		add	%o3,4,%o5
+/* 0x01b8	 179 */		sra	%i4,0,%o0
+/* 0x01bc	 175 */		add	%o3,8,%o4
+/* 0x01c0	 178 */		fsubd	%f4,%f2,%f4
+/* 0x01c4	     */		std	%f4,[%i3+%o2]
+/* 0x01c8	 179 */		sllx	%o0,3,%i5
+/* 0x01cc	     */		and	%o7,%g3,%o0
+/* 0x01d0	     */		st	%o0,[%sp+2227]
+/* 0x01d4	 175 */		add	%i5,16,%o1
+/* 0x01d8	 180 */		srl	%o7,16,%g4
+/* 0x01dc	     */		add	%i2,1,%i2
+/* 0x01e0	     */		sra	%g2,0,%o0
+/* 0x01e4	 175 */		add	%o2,8,%o2
+/* 0x01e8	 179 */		fmovs	%f2,%f4
+/* 0x01ec	 180 */		sllx	%o0,3,%l0
+/* 0x01f0	     */		add	%i4,3,%g2
+/* 0x01f4	 179 */		ld	[%sp+2227],%f5
+/* 0x01f8	 175 */		add	%l0,16,%o0
+/* 0x01fc	 180 */		add	%i4,2,%i4
+/* 0x0200	 175 */		sub	%g5,1,%o7
+/* 0x0204	 180 */		add	%i2,3,%i2
+/* 0x0208	 179 */		fsubd	%f4,%f2,%f4
+/* 0x020c	     */		std	%f4,[%i1+%i5]
+/* 0x0210	 180 */		st	%g4,[%sp+2223]
+/* 0x0214	 177 */		ld	[%i0+%o3],%i5
+/* 0x0218	 180 */		fmovs	%f2,%f4
+/* 0x021c	     */		srl	%i5,16,%g4
+/* 0x0220	 179 */		and	%i5,%g3,%i5
+/* 0x0224	 180 */		ld	[%sp+2223],%f5
+/* 0x0228	     */		fsubd	%f4,%f2,%f4
+/* 0x022c	     */		std	%f4,[%i1+%l0]
+/* 0x0230	     */		st	%g4,[%sp+2223]
+/* 0x0234	 177 */		ld	[%i0+%o5],%g4
+/* 0x0238	 179 */		st	%i5,[%sp+2227]
+/* 0x023c	 178 */		fmovs	%f2,%f4
+/* 0x0240	 180 */		srl	%g4,16,%i5
+/* 0x0244	 179 */		and	%g4,%g3,%g4
+/* 0x0248	 180 */		ld	[%sp+2223],%f7
+/* 0x024c	     */		st	%i5,[%sp+2223]
+/* 0x0250	 178 */		ld	[%i0+%o3],%f5
+/* 0x0254	 180 */		fsubd	%f6,%f2,%f6
+/* 0x0258	 177 */		ld	[%i0+%o4],%o3
+/* 0x025c	 178 */		fsubd	%f4,%f2,%f4
+/* 0x0260	 179 */		ld	[%sp+2227],%f9
+/* 0x0264	 180 */		ld	[%sp+2223],%f1
+/* 0x0268	 179 */		st	%g4,[%sp+2227]
+/* 0x026c	     */		fsubd	%f8,%f2,%f8
+/* 0x0270	     */		std	%f8,[%i1+%o1]
+/* 0x0274	 180 */		std	%f6,[%i1+%o0]
+/* 0x0278	 178 */		std	%f4,[%i3+%o2]
+                       .L900000411:
+/* 0x027c	 179 */		ld	[%sp+2227],%f13
+/* 0x0280	 180 */		srl	%o3,16,%g4
+/* 0x0284	     */		add	%i2,2,%i2
+/* 0x0288	     */		st	%g4,[%sp+2223]
+/* 0x028c	     */		cmp	%i2,%o7
+/* 0x0290	     */		add	%g2,4,%g2
+/* 0x0294	 178 */		ld	[%i0+%o5],%f11
+/* 0x0298	 180 */		add	%i4,4,%i4
+/* 0x029c	 175 */		add	%o4,4,%o5
+/* 0x02a0	 177 */		ld	[%i0+%o5],%g4
+/* 0x02a4	 179 */		and	%o3,%g3,%o3
+/* 0x02a8	     */		st	%o3,[%sp+2227]
+/* 0x02ac	 180 */		fmovs	%f2,%f0
+/* 0x02b0	 179 */		fmovs	%f2,%f12
+/* 0x02b4	 180 */		fsubd	%f0,%f2,%f8
+/* 0x02b8	 179 */		fsubd	%f12,%f2,%f4
+/* 0x02bc	 175 */		add	%o1,16,%o1
+/* 0x02c0	 180 */		ld	[%sp+2223],%f7
+/* 0x02c4	 178 */		fmovs	%f2,%f10
+/* 0x02c8	 179 */		std	%f4,[%i1+%o1]
+/* 0x02cc	 175 */		add	%o0,16,%o0
+/* 0x02d0	 178 */		fsubd	%f10,%f2,%f4
+/* 0x02d4	 175 */		add	%o2,8,%o2
+/* 0x02d8	 180 */		std	%f8,[%i1+%o0]
+/* 0x02dc	 178 */		std	%f4,[%i3+%o2]
+/* 0x02e0	 179 */		ld	[%sp+2227],%f9
+/* 0x02e4	 180 */		srl	%g4,16,%o3
+/* 0x02e8	     */		st	%o3,[%sp+2223]
+/* 0x02ec	 178 */		ld	[%i0+%o4],%f5
+/* 0x02f0	 175 */		add	%o4,8,%o4
+/* 0x02f4	 177 */		ld	[%i0+%o4],%o3
+/* 0x02f8	 179 */		and	%g4,%g3,%g4
+/* 0x02fc	     */		st	%g4,[%sp+2227]
+/* 0x0300	 180 */		fmovs	%f2,%f6
+/* 0x0304	 179 */		fmovs	%f2,%f8
+/* 0x0308	 180 */		fsubd	%f6,%f2,%f6
+/* 0x030c	 179 */		fsubd	%f8,%f2,%f8
+/* 0x0310	 175 */		add	%o1,16,%o1
+/* 0x0314	 180 */		ld	[%sp+2223],%f1
+/* 0x0318	 178 */		fmovs	%f2,%f4
+/* 0x031c	 179 */		std	%f8,[%i1+%o1]
+/* 0x0320	 175 */		add	%o0,16,%o0
+/* 0x0324	 178 */		fsubd	%f4,%f2,%f4
+/* 0x0328	 175 */		add	%o2,8,%o2
+/* 0x032c	 180 */		std	%f6,[%i1+%o0]
+/* 0x0330	     */		bl,pt	%icc,.L900000411
+/* 0x0334	     */		std	%f4,[%i3+%o2]
+                       .L900000414:
+/* 0x0338	 180 */		srl	%o3,16,%o7
+/* 0x033c	     */		st	%o7,[%sp+2223]
+/* 0x0340	 179 */		fmovs	%f2,%f12
+/* 0x0344	 178 */		ld	[%i0+%o5],%f11
+/* 0x0348	 180 */		fmovs	%f2,%f0
+/* 0x034c	 179 */		and	%o3,%g3,%g4
+/* 0x0350	 180 */		fmovs	%f2,%f6
+/* 0x0354	 175 */		add	%o1,16,%o3
+/* 0x0358	     */		add	%o0,16,%o7
+/* 0x035c	 178 */		fmovs	%f2,%f10
+/* 0x0360	 175 */		add	%o2,8,%o2
+/* 0x0364	     */		add	%o1,32,%o5
+/* 0x0368	 179 */		ld	[%sp+2227],%f13
+/* 0x036c	 178 */		fmovs	%f2,%f4
+/* 0x0370	 175 */		add	%o0,32,%o1
+/* 0x0374	 180 */		ld	[%sp+2223],%f7
+/* 0x0378	 175 */		add	%o2,8,%o0
+/* 0x037c	 180 */		cmp	%i2,%g5
+/* 0x0380	 179 */		st	%g4,[%sp+2227]
+/* 0x0384	     */		fsubd	%f12,%f2,%f8
+/* 0x0388	 180 */		add	%g2,6,%g2
+/* 0x038c	 179 */		std	%f8,[%i1+%o3]
+/* 0x0390	 180 */		fsubd	%f0,%f2,%f0
+/* 0x0394	 177 */		sra	%i2,0,%o3
+/* 0x0398	 180 */		std	%f0,[%i1+%o7]
+/* 0x039c	 178 */		fsubd	%f10,%f2,%f0
+/* 0x03a0	 180 */		add	%i4,6,%i4
+/* 0x03a4	 178 */		std	%f0,[%i3+%o2]
+/* 0x03a8	     */		sllx	%o3,2,%o2
+/* 0x03ac	 179 */		ld	[%sp+2227],%f9
+/* 0x03b0	 178 */		ld	[%i0+%o4],%f5
+/* 0x03b4	 179 */		fmovs	%f2,%f8
+/* 0x03b8	     */		fsubd	%f8,%f2,%f0
+/* 0x03bc	     */		std	%f0,[%i1+%o5]
+/* 0x03c0	 180 */		fsubd	%f6,%f2,%f0
+/* 0x03c4	     */		std	%f0,[%i1+%o1]
+/* 0x03c8	 178 */		fsubd	%f4,%f2,%f0
+/* 0x03cc	 180 */		bge,pn	%icc,.L77000164
+/* 0x03d0	     */		std	%f0,[%i3+%o0]
+                       .L77000161:
+/* 0x03d4	 178 */		ldd	[%g1],%f2
+                       .L900000416:
+/* 0x03d8	 178 */		ld	[%i0+%o2],%f5
+/* 0x03dc	 179 */		sra	%i4,0,%o0
+/* 0x03e0	 180 */		add	%i2,1,%i2
+/* 0x03e4	 177 */		ld	[%i0+%o2],%o1
+/* 0x03e8	 178 */		sllx	%o3,3,%o3
+/* 0x03ec	 180 */		add	%i4,2,%i4
+/* 0x03f0	 178 */		fmovs	%f2,%f4
+/* 0x03f4	 179 */		sllx	%o0,3,%o4
+/* 0x03f8	 180 */		cmp	%i2,%g5
+/* 0x03fc	 179 */		and	%o1,%g3,%o0
+/* 0x0400	 178 */		fsubd	%f4,%f2,%f0
+/* 0x0404	     */		std	%f0,[%i3+%o3]
+/* 0x0408	 180 */		srl	%o1,16,%o1
+/* 0x040c	 179 */		st	%o0,[%sp+2227]
+/* 0x0410	 180 */		sra	%g2,0,%o0
+/* 0x0414	     */		add	%g2,2,%g2
+/* 0x0418	 177 */		sra	%i2,0,%o3
+/* 0x041c	 180 */		sllx	%o0,3,%o0
+/* 0x0420	 179 */		fmovs	%f2,%f4
+/* 0x0424	     */		sllx	%o3,2,%o2
+/* 0x0428	     */		ld	[%sp+2227],%f5
+/* 0x042c	     */		fsubd	%f4,%f2,%f0
+/* 0x0430	     */		std	%f0,[%i1+%o4]
+/* 0x0434	 180 */		st	%o1,[%sp+2223]
+/* 0x0438	     */		fmovs	%f2,%f4
+/* 0x043c	     */		ld	[%sp+2223],%f5
+/* 0x0440	     */		fsubd	%f4,%f2,%f0
+/* 0x0444	     */		std	%f0,[%i1+%o0]
+/* 0x0448	     */		bl,a,pt	%icc,.L900000416
+/* 0x044c	     */		ldd	[%g1],%f2
+                       .L77000164:
+/* 0x0450	     */		ret	! Result = 
+/* 0x0454	     */		restore	%g0,%g0,%g0
+/* 0x0458	   0 */		.type	conv_i32_to_d32_and_d16,2
+/* 0x0458	     */		.size	conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	8
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global adjust_montf_result
+                       adjust_montf_result:
+/* 000000	     */		save	%sp,-176,%sp
+/* 0x0004	     */		or	%g0,%i2,%o1
+/* 0x0008	     */		or	%g0,%i0,%i2
+
+!  181		      !   }
+!  182		      !}
+!  185		      !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+!  186		      !{
+!  187		      !long long acc;
+!  188		      !int i;
+!  190		      ! if(i32[len]>0) i=-1;
+
+/* 0x000c	 190 */		sra	%o1,0,%g2
+/* 0x0010	     */		or	%g0,-1,%o2
+/* 0x0014	     */		sllx	%g2,2,%g2
+/* 0x0018	     */		ld	[%i2+%g2],%g2
+/* 0x001c	     */		cmp	%g2,0
+/* 0x0020	     */		bleu,pn	%icc,.L77000175
+/* 0x0024	     */		or	%g0,%i1,%i0
+/* 0x0028	     */		ba	.L900000511
+/* 0x002c	     */		cmp	%o2,0
+                       .L77000175:
+
+!  191		      ! else
+!  192		      !   {
+!  193		      !     for(i=len-1; i>=0; i--)
+
+/* 0x0030	 193 */		sub	%o1,1,%o2
+/* 0x0034	     */		cmp	%o2,0
+/* 0x0038	     */		bl,pn	%icc,.L77000182
+/* 0x003c	     */		sra	%o2,0,%g2
+                       .L900000510:
+
+!  194		      !       {
+!  195		      !	 if(i32[i]!=nint[i]) break;
+
+/* 0x0040	 195 */		sllx	%g2,2,%g2
+/* 0x0044	     */		sub	%o2,1,%o0
+/* 0x0048	     */		ld	[%i1+%g2],%g3
+/* 0x004c	     */		ld	[%i2+%g2],%g2
+/* 0x0050	     */		cmp	%g2,%g3
+/* 0x0054	     */		bne,pn	%icc,.L77000182
+/* 0x0058	     */		nop
+/* 0x005c	   0 */		or	%g0,%o0,%o2
+/* 0x0060	 195 */		cmp	%o0,0
+/* 0x0064	     */		bge,pt	%icc,.L900000510
+/* 0x0068	     */		sra	%o2,0,%g2
+                       .L77000182:
+
+!  196		      !       }
+!  197		      !   }
+!  198		      ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x006c	 198 */		cmp	%o2,0
+                       .L900000511:
+/* 0x0070	 198 */		bl,pn	%icc,.L77000198
+/* 0x0074	     */		sra	%o2,0,%g2
+/* 0x0078	     */		sllx	%g2,2,%g2
+/* 0x007c	     */		ld	[%i1+%g2],%g3
+/* 0x0080	     */		ld	[%i2+%g2],%g2
+/* 0x0084	     */		cmp	%g2,%g3
+/* 0x0088	     */		bleu,pt	%icc,.L77000191
+/* 0x008c	     */		nop
+                       .L77000198:
+
+!  199		      !   {
+!  200		      !     acc=0;
+!  201		      !     for(i=0;i<len;i++)
+
+/* 0x0090	 201 */		cmp	%o1,0
+/* 0x0094	     */		ble,pt	%icc,.L77000191
+/* 0x0098	     */		nop
+/* 0x009c	 198 */		or	%g0,-1,%g2
+/* 0x00a0	 201 */		or	%g0,%o1,%g3
+/* 0x00a4	 198 */		srl	%g2,0,%g2
+/* 0x00a8	     */		sub	%o1,1,%g4
+/* 0x00ac	     */		cmp	%o1,9
+/* 0x00b0	 201 */		or	%g0,0,%i1
+/* 0x00b4	 200 */		or	%g0,0,%g5
+
+!  202		      !       {
+!  203		      !	 acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00b8	 203 */		or	%g0,0,%o1
+/* 0x00bc	 201 */		bl,pn	%icc,.L77000199
+/* 0x00c0	     */		sub	%g3,4,%o7
+/* 0x00c4	 203 */		ld	[%i2],%o1
+
+!  204		      !	 i32[i]=acc&0xffffffff;
+!  205		      !	 acc=acc>>32;
+
+/* 0x00c8	 205 */		or	%g0,5,%i1
+/* 0x00cc	 203 */		ld	[%i0],%o2
+/* 0x00d0	 201 */		or	%g0,8,%o5
+/* 0x00d4	     */		or	%g0,12,%o4
+/* 0x00d8	 203 */		ld	[%i0+4],%o3
+/* 0x00dc	 201 */		or	%g0,16,%g1
+/* 0x00e0	 203 */		ld	[%i2+4],%o0
+/* 0x00e4	 201 */		sub	%o1,%o2,%o1
+/* 0x00e8	 203 */		ld	[%i0+8],%i3
+/* 0x00ec	 204 */		and	%o1,%g2,%g5
+/* 0x00f0	     */		st	%g5,[%i2]
+/* 0x00f4	 205 */		srax	%o1,32,%g5
+/* 0x00f8	 201 */		sub	%o0,%o3,%o0
+/* 0x00fc	 203 */		ld	[%i0+12],%o2
+/* 0x0100	 201 */		add	%o0,%g5,%o0
+/* 0x0104	 204 */		and	%o0,%g2,%g5
+/* 0x0108	     */		st	%g5,[%i2+4]
+/* 0x010c	 205 */		srax	%o0,32,%o0
+/* 0x0110	 203 */		ld	[%i2+8],%o1
+/* 0x0114	     */		ld	[%i2+12],%o3
+/* 0x0118	 201 */		sub	%o1,%i3,%o1
+                       .L900000505:
+/* 0x011c	     */		add	%g1,4,%g3
+/* 0x0120	 203 */		ld	[%g1+%i2],%g5
+/* 0x0124	 201 */		add	%o1,%o0,%o0
+/* 0x0128	 203 */		ld	[%i0+%g1],%i3
+/* 0x012c	 201 */		sub	%o3,%o2,%o1
+/* 0x0130	 204 */		and	%o0,%g2,%o2
+/* 0x0134	     */		st	%o2,[%o5+%i2]
+/* 0x0138	 205 */		srax	%o0,32,%o2
+/* 0x013c	     */		add	%i1,4,%i1
+/* 0x0140	 201 */		add	%g1,8,%o5
+/* 0x0144	 203 */		ld	[%g3+%i2],%o0
+/* 0x0148	 201 */		add	%o1,%o2,%o1
+/* 0x014c	 203 */		ld	[%i0+%g3],%o3
+/* 0x0150	 201 */		sub	%g5,%i3,%o2
+/* 0x0154	 204 */		and	%o1,%g2,%g5
+/* 0x0158	     */		st	%g5,[%o4+%i2]
+/* 0x015c	 205 */		srax	%o1,32,%g5
+/* 0x0160	     */		cmp	%i1,%o7
+/* 0x0164	 201 */		add	%g1,12,%o4
+/* 0x0168	 203 */		ld	[%o5+%i2],%o1
+/* 0x016c	 201 */		add	%o2,%g5,%o2
+/* 0x0170	 203 */		ld	[%i0+%o5],%i3
+/* 0x0174	 201 */		sub	%o0,%o3,%o0
+/* 0x0178	 204 */		and	%o2,%g2,%o3
+/* 0x017c	     */		st	%o3,[%g1+%i2]
+/* 0x0180	 205 */		srax	%o2,32,%g5
+/* 0x0184	 203 */		ld	[%o4+%i2],%o3
+/* 0x0188	 201 */		add	%g1,16,%g1
+/* 0x018c	     */		add	%o0,%g5,%o0
+/* 0x0190	 203 */		ld	[%i0+%o4],%o2
+/* 0x0194	 201 */		sub	%o1,%i3,%o1
+/* 0x0198	 204 */		and	%o0,%g2,%g5
+/* 0x019c	     */		st	%g5,[%g3+%i2]
+/* 0x01a0	 205 */		ble,pt	%icc,.L900000505
+/* 0x01a4	     */		srax	%o0,32,%o0
+                       .L900000508:
+/* 0x01a8	     */		add	%o1,%o0,%g3
+/* 0x01ac	     */		sub	%o3,%o2,%o1
+/* 0x01b0	 203 */		ld	[%g1+%i2],%o0
+/* 0x01b4	     */		ld	[%i0+%g1],%o2
+/* 0x01b8	 205 */		srax	%g3,32,%o7
+/* 0x01bc	 204 */		and	%g3,%g2,%o3
+/* 0x01c0	 201 */		add	%o1,%o7,%o1
+/* 0x01c4	 204 */		st	%o3,[%o5+%i2]
+/* 0x01c8	 205 */		cmp	%i1,%g4
+/* 0x01cc	 201 */		sub	%o0,%o2,%o0
+/* 0x01d0	 204 */		and	%o1,%g2,%o2
+/* 0x01d4	     */		st	%o2,[%o4+%i2]
+/* 0x01d8	 205 */		srax	%o1,32,%o1
+/* 0x01dc	 203 */		sra	%i1,0,%o2
+/* 0x01e0	 201 */		add	%o0,%o1,%o0
+/* 0x01e4	 205 */		srax	%o0,32,%g5
+/* 0x01e8	 204 */		and	%o0,%g2,%o1
+/* 0x01ec	     */		st	%o1,[%g1+%i2]
+/* 0x01f0	 205 */		bg,pn	%icc,.L77000191
+/* 0x01f4	     */		sllx	%o2,2,%o1
+                       .L77000199:
+/* 0x01f8	   0 */		or	%g0,%o1,%g1
+                       .L900000509:
+/* 0x01fc	 203 */		ld	[%o1+%i2],%o0
+/* 0x0200	 205 */		add	%i1,1,%i1
+/* 0x0204	 203 */		ld	[%i0+%o1],%o1
+/* 0x0208	     */		sra	%i1,0,%o2
+/* 0x020c	 205 */		cmp	%i1,%g4
+/* 0x0210	 203 */		add	%g5,%o0,%o0
+/* 0x0214	     */		sub	%o0,%o1,%o0
+/* 0x0218	 205 */		srax	%o0,32,%g5
+/* 0x021c	 204 */		and	%o0,%g2,%o1
+/* 0x0220	     */		st	%o1,[%g1+%i2]
+/* 0x0224	     */		sllx	%o2,2,%o1
+/* 0x0228	 205 */		ble,pt	%icc,.L900000509
+/* 0x022c	     */		or	%g0,%o1,%g1
+                       .L77000191:
+/* 0x0230	     */		ret	! Result = 
+/* 0x0234	     */		restore	%g0,%g0,%g0
+/* 0x0238	   0 */		.type	adjust_montf_result,2
+/* 0x0238	     */		.size	adjust_montf_result,(.-adjust_montf_result)
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 */		.align	8
+/* 000000	     */		.skip	24
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global mont_mulf_noconv
+                       mont_mulf_noconv:
+/* 000000	     */		save	%sp,-224,%sp
+                       .L900000643:
+/* 0x0004	     */		call	.+8
+/* 0x0008	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5
+/* 0x000c	     */		ldx	[%fp+2223],%l0
+
+!  206		      !       }
+!  207		      !   }
+!  208		      !}
+!  213		      !/*
+!  214		      !** the lengths of the input arrays should be at least the following:
+!  215		      !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+!  216		      !** all of them should be different from one another
+!  217		      !**
+!  218		      !*/
+!  219		      !void mont_mulf_noconv(unsigned int *result,
+!  220		      !		     double *dm1, double *dm2, double *dt,
+!  221		      !		     double *dn, unsigned int *nint,
+!  222		      !		     int nlen, double dn0)
+!  223		      !{
+!  224		      ! int i, j, jj;
+!  225		      ! int tmp;
+!  226		      ! double digit, m2j, nextm2j, a, b;
+!  227		      ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+!  229		      ! pdm1=&(dm1[0]);
+!  230		      ! pdm2=&(dm2[0]);
+!  231		      ! pdn=&(dn[0]);
+!  232		      ! pdm2[2*nlen]=Zero;
+
+/* 0x0010	 232 */		sethi	%hi(Zero),%g2
+/* 0x0014	 223 */		fmovd	%f14,%f30
+/* 0x0018	     */		add	%g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5
+/* 0x001c	 232 */		add	%g2,%lo(Zero),%g2
+/* 0x0020	     */		sll	%l0,1,%o3
+/* 0x0024	 223 */		add	%g5,%o7,%o4
+/* 0x0028	 232 */		sra	%o3,0,%g5
+/* 0x002c	     */		ldx	[%o4+%g2],%o7
+
+!  234		      ! if (nlen!=16)
+!  235		      !   {
+!  236		      !     for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+!  238		      !     a=dt[0]=pdm1[0]*pdm2[0];
+!  239		      !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+/* 0x0030	 239 */		sethi	%hi(TwoToMinus16),%g3
+/* 0x0034	     */		sethi	%hi(TwoTo16),%g4
+/* 0x0038	     */		add	%g3,%lo(TwoToMinus16),%g2
+/* 0x003c	 232 */		ldd	[%o7],%f0
+/* 0x0040	 239 */		add	%g4,%lo(TwoTo16),%g3
+/* 0x0044	 223 */		or	%g0,%i4,%o0
+/* 0x0048	 232 */		sllx	%g5,3,%g4
+/* 0x004c	 239 */		ldx	[%o4+%g2],%o5
+/* 0x0050	 223 */		or	%g0,%i5,%l3
+/* 0x0054	     */		or	%g0,%i0,%l2
+/* 0x0058	 239 */		ldx	[%o4+%g3],%o4
+/* 0x005c	 234 */		cmp	%l0,16
+/* 0x0060	 232 */		std	%f0,[%i2+%g4]
+/* 0x0064	 234 */		be,pn	%icc,.L77000279
+/* 0x0068	     */		or	%g0,%i3,%l4
+/* 0x006c	 236 */		sll	%l0,2,%g2
+/* 0x0070	 223 */		or	%g0,%o0,%i5
+/* 0x0074	 236 */		add	%g2,2,%o0
+/* 0x0078	 223 */		or	%g0,%i1,%i4
+/* 0x007c	 236 */		cmp	%o0,0
+/* 0x0080	 223 */		or	%g0,%i2,%l1
+/* 0x0084	 236 */		ble,a,pt	%icc,.L900000657
+/* 0x0088	     */		ldd	[%i1],%f6
+
+!  241		      !     pdtj=&(dt[0]);
+!  242		      !     for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+!  243		      !       {
+!  244		      !	 m2j=pdm2[j];
+!  245		      !	 a=pdtj[0]+pdn[0]*digit;
+!  246		      !	 b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+!  247		      !	 pdtj[1]=b;
+!  249		      !#pragma pipeloop(0)
+!  250		      !	 for(i=1;i<nlen;i++)
+!  251		      !	   {
+!  252		      !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+!  253		      !	   }
+!  254		      ! 	 if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+!  255		      !	 
+!  256		      !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+!  257		      !       }
+!  258		      !   }
+!  259		      ! else
+!  260		      !   {
+!  261		      !     a=dt[0]=pdm1[0]*pdm2[0];
+!  263		      !     dt[65]=     dt[64]=     dt[63]=     dt[62]=     dt[61]=     dt[60]=
+!  264		      !     dt[59]=     dt[58]=     dt[57]=     dt[56]=     dt[55]=     dt[54]=
+!  265		      !     dt[53]=     dt[52]=     dt[51]=     dt[50]=     dt[49]=     dt[48]=
+!  266		      !     dt[47]=     dt[46]=     dt[45]=     dt[44]=     dt[43]=     dt[42]=
+!  267		      !     dt[41]=     dt[40]=     dt[39]=     dt[38]=     dt[37]=     dt[36]=
+!  268		      !     dt[35]=     dt[34]=     dt[33]=     dt[32]=     dt[31]=     dt[30]=
+!  269		      !     dt[29]=     dt[28]=     dt[27]=     dt[26]=     dt[25]=     dt[24]=
+!  270		      !     dt[23]=     dt[22]=     dt[21]=     dt[20]=     dt[19]=     dt[18]=
+!  271		      !     dt[17]=     dt[16]=     dt[15]=     dt[14]=     dt[13]=     dt[12]=
+!  272		      !     dt[11]=     dt[10]=     dt[ 9]=     dt[ 8]=     dt[ 7]=     dt[ 6]=
+!  273		      !     dt[ 5]=     dt[ 4]=     dt[ 3]=     dt[ 2]=     dt[ 1]=Zero;
+!  275		      !     pdn_0=pdn[0];
+!  276		      !     pdm1_0=pdm1[0];
+!  278		      !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+!  279		      !     pdtj=&(dt[0]);
+!  281		      !     for(j=0;j<32;j++,pdtj++)
+
+/* 0x008c	 281 */		or	%g0,%o0,%o1
+/* 0x0090	 236 */		sub	%o0,1,%g1
+/* 0x0094	     */		or	%g0,0,%g2
+/* 0x0098	 281 */		cmp	%o1,5
+/* 0x009c	     */		bl,pn	%icc,.L77000280
+/* 0x00a0	     */		or	%g0,8,%o0
+/* 0x00a4	     */		std	%f0,[%i3]
+/* 0x00a8	     */		or	%g0,2,%g2
+/* 0x00ac	     */		sub	%g1,2,%o1
+                       .L900000627:
+/* 0x00b0	     */		add	%o0,8,%g3
+/* 0x00b4	     */		std	%f0,[%i3+%o0]
+/* 0x00b8	     */		add	%g2,3,%g2
+/* 0x00bc	     */		add	%o0,16,%o2
+/* 0x00c0	     */		std	%f0,[%i3+%g3]
+/* 0x00c4	     */		cmp	%g2,%o1
+/* 0x00c8	     */		add	%o0,24,%o0
+/* 0x00cc	     */		ble,pt	%icc,.L900000627
+/* 0x00d0	     */		std	%f0,[%i3+%o2]
+                       .L900000630:
+/* 0x00d4	     */		cmp	%g2,%g1
+/* 0x00d8	     */		bg,pn	%icc,.L77000285
+/* 0x00dc	     */		std	%f0,[%i3+%o0]
+                       .L77000280:
+/* 0x00e0	     */		ldd	[%o7],%f0
+                       .L900000656:
+/* 0x00e4	     */		sra	%g2,0,%o0
+/* 0x00e8	     */		add	%g2,1,%g2
+/* 0x00ec	     */		sllx	%o0,3,%o0
+/* 0x00f0	     */		cmp	%g2,%g1
+/* 0x00f4	     */		std	%f0,[%i3+%o0]
+/* 0x00f8	     */		ble,a,pt	%icc,.L900000656
+/* 0x00fc	     */		ldd	[%o7],%f0
+                       .L77000285:
+/* 0x0100	 238 */		ldd	[%i1],%f6
+                       .L900000657:
+/* 0x0104	 238 */		ldd	[%i2],%f8
+/* 0x0108	 242 */		cmp	%o3,0
+/* 0x010c	     */		sub	%o3,1,%o1
+/* 0x0110	 239 */		ldd	[%o7],%f10
+/* 0x0114	     */		add	%o3,1,%o2
+/* 0x0118	   0 */		or	%g0,0,%i2
+/* 0x011c	 238 */		fmuld	%f6,%f8,%f6
+/* 0x0120	     */		std	%f6,[%i3]
+/* 0x0124	   0 */		or	%g0,0,%g3
+/* 0x0128	 239 */		ldd	[%o5],%f8
+/* 0x012c	   0 */		or	%g0,%o2,%g1
+/* 0x0130	 236 */		sub	%l0,1,%i1
+/* 0x0134	 239 */		ldd	[%o4],%f12
+/* 0x0138	 236 */		or	%g0,1,%g4
+/* 0x013c	     */		fdtox	%f6,%f0
+/* 0x0140	     */		fmovs	%f10,%f0
+/* 0x0144	     */		fxtod	%f0,%f6
+/* 0x0148	 239 */		fmuld	%f6,%f14,%f6
+/* 0x014c	     */		fmuld	%f6,%f8,%f8
+/* 0x0150	     */		fdtox	%f8,%f8
+/* 0x0154	     */		fxtod	%f8,%f8
+/* 0x0158	     */		fmuld	%f8,%f12,%f8
+/* 0x015c	     */		fsubd	%f6,%f8,%f20
+/* 0x0160	 242 */		ble,pt	%icc,.L900000650
+/* 0x0164	     */		sllx	%g5,3,%g2
+/* 0x0168	   0 */		st	%o1,[%sp+2223]
+/* 0x016c	 246 */		ldd	[%i5],%f6
+                       .L900000651:
+/* 0x0170	 246 */		sra	%g4,0,%g2
+/* 0x0174	     */		fmuld	%f6,%f20,%f6
+/* 0x0178	     */		ldd	[%i3],%f12
+/* 0x017c	     */		sllx	%g2,3,%g2
+/* 0x0180	     */		ldd	[%i4],%f8
+/* 0x0184	 250 */		cmp	%l0,1
+/* 0x0188	 246 */		ldd	[%l1+%g2],%f10
+/* 0x018c	 244 */		sra	%i2,0,%g2
+/* 0x0190	     */		add	%i2,1,%i0
+/* 0x0194	 246 */		faddd	%f12,%f6,%f6
+/* 0x0198	     */		ldd	[%o5],%f12
+/* 0x019c	 244 */		sllx	%g2,3,%g2
+/* 0x01a0	 246 */		fmuld	%f8,%f10,%f8
+/* 0x01a4	     */		ldd	[%i3+8],%f10
+/* 0x01a8	     */		srl	%i2,31,%o3
+/* 0x01ac	 244 */		ldd	[%l1+%g2],%f18
+/* 0x01b0	   0 */		or	%g0,1,%l5
+/* 0x01b4	 236 */		or	%g0,2,%g2
+/* 0x01b8	 246 */		fmuld	%f6,%f12,%f6
+/* 0x01bc	 250 */		or	%g0,32,%o1
+/* 0x01c0	     */		or	%g0,48,%o2
+/* 0x01c4	 246 */		faddd	%f10,%f8,%f8
+/* 0x01c8	     */		faddd	%f8,%f6,%f16
+/* 0x01cc	 250 */		ble,pn	%icc,.L77000213
+/* 0x01d0	     */		std	%f16,[%i3+8]
+/* 0x01d4	     */		cmp	%i1,8
+/* 0x01d8	     */		sub	%l0,3,%o3
+/* 0x01dc	     */		bl,pn	%icc,.L77000284
+/* 0x01e0	     */		or	%g0,8,%o0
+/* 0x01e4	 252 */		ldd	[%i5+8],%f0
+/* 0x01e8	     */		or	%g0,6,%l5
+/* 0x01ec	     */		ldd	[%i4+8],%f2
+/* 0x01f0	     */		or	%g0,4,%g2
+/* 0x01f4	 250 */		or	%g0,40,%o0
+/* 0x01f8	 252 */		ldd	[%i5+16],%f8
+/* 0x01fc	     */		fmuld	%f0,%f20,%f10
+/* 0x0200	     */		ldd	[%i4+16],%f4
+/* 0x0204	     */		fmuld	%f2,%f18,%f2
+/* 0x0208	     */		ldd	[%i3+16],%f0
+/* 0x020c	     */		fmuld	%f8,%f20,%f12
+/* 0x0210	     */		ldd	[%i4+24],%f6
+/* 0x0214	     */		fmuld	%f4,%f18,%f4
+/* 0x0218	     */		ldd	[%i5+24],%f8
+/* 0x021c	     */		faddd	%f2,%f10,%f2
+/* 0x0220	     */		ldd	[%i4+32],%f14
+/* 0x0224	     */		fmuld	%f6,%f18,%f10
+/* 0x0228	     */		ldd	[%i5+32],%f6
+/* 0x022c	     */		faddd	%f4,%f12,%f4
+/* 0x0230	     */		ldd	[%i4+40],%f12
+/* 0x0234	     */		faddd	%f0,%f2,%f0
+/* 0x0238	     */		std	%f0,[%i3+16]
+/* 0x023c	     */		ldd	[%i3+32],%f0
+/* 0x0240	     */		ldd	[%i3+48],%f2
+                       .L900000639:
+/* 0x0244	     */		add	%o2,16,%l6
+/* 0x0248	 252 */		ldd	[%i5+%o0],%f22
+/* 0x024c	     */		add	%l5,3,%l5
+/* 0x0250	     */		fmuld	%f8,%f20,%f8
+/* 0x0254	 250 */		add	%o0,8,%o0
+/* 0x0258	 252 */		ldd	[%l6+%i3],%f26
+/* 0x025c	     */		cmp	%l5,%o3
+/* 0x0260	     */		ldd	[%i4+%o0],%f24
+/* 0x0264	     */		faddd	%f0,%f4,%f0
+/* 0x0268	     */		add	%g2,6,%g2
+/* 0x026c	     */		faddd	%f10,%f8,%f10
+/* 0x0270	     */		fmuld	%f14,%f18,%f4
+/* 0x0274	     */		std	%f0,[%o1+%i3]
+/* 0x0278	 250 */		add	%o2,32,%o1
+/* 0x027c	 252 */		ldd	[%i5+%o0],%f8
+/* 0x0280	     */		fmuld	%f6,%f20,%f6
+/* 0x0284	 250 */		add	%o0,8,%o0
+/* 0x0288	 252 */		ldd	[%o1+%i3],%f0
+/* 0x028c	     */		ldd	[%i4+%o0],%f14
+/* 0x0290	     */		faddd	%f2,%f10,%f2
+/* 0x0294	     */		faddd	%f4,%f6,%f10
+/* 0x0298	     */		fmuld	%f12,%f18,%f4
+/* 0x029c	     */		std	%f2,[%o2+%i3]
+/* 0x02a0	 250 */		add	%o2,48,%o2
+/* 0x02a4	 252 */		ldd	[%i5+%o0],%f6
+/* 0x02a8	     */		fmuld	%f22,%f20,%f22
+/* 0x02ac	 250 */		add	%o0,8,%o0
+/* 0x02b0	 252 */		ldd	[%o2+%i3],%f2
+/* 0x02b4	     */		ldd	[%i4+%o0],%f12
+/* 0x02b8	     */		faddd	%f26,%f10,%f10
+/* 0x02bc	     */		std	%f10,[%l6+%i3]
+/* 0x02c0	     */		fmuld	%f24,%f18,%f10
+/* 0x02c4	     */		ble,pt	%icc,.L900000639
+/* 0x02c8	     */		faddd	%f4,%f22,%f4
+                       .L900000642:
+/* 0x02cc	 252 */		fmuld	%f8,%f20,%f24
+/* 0x02d0	     */		faddd	%f0,%f4,%f8
+/* 0x02d4	 250 */		add	%o2,16,%o3
+/* 0x02d8	 252 */		ldd	[%o3+%i3],%f4
+/* 0x02dc	     */		fmuld	%f14,%f18,%f0
+/* 0x02e0	     */		cmp	%l5,%i1
+/* 0x02e4	     */		std	%f8,[%o1+%i3]
+/* 0x02e8	     */		fmuld	%f12,%f18,%f8
+/* 0x02ec	 250 */		add	%o2,32,%o1
+/* 0x02f0	 252 */		faddd	%f10,%f24,%f12
+/* 0x02f4	     */		ldd	[%i5+%o0],%f22
+/* 0x02f8	     */		fmuld	%f6,%f20,%f6
+/* 0x02fc	     */		add	%g2,8,%g2
+/* 0x0300	     */		fmuld	%f22,%f20,%f10
+/* 0x0304	     */		faddd	%f2,%f12,%f2
+/* 0x0308	     */		faddd	%f0,%f6,%f6
+/* 0x030c	     */		ldd	[%o1+%i3],%f0
+/* 0x0310	     */		std	%f2,[%o2+%i3]
+/* 0x0314	     */		faddd	%f8,%f10,%f2
+/* 0x0318	     */		sra	%l5,0,%o2
+/* 0x031c	     */		sllx	%o2,3,%o0
+/* 0x0320	     */		faddd	%f4,%f6,%f4
+/* 0x0324	     */		std	%f4,[%o3+%i3]
+/* 0x0328	     */		faddd	%f0,%f2,%f0
+/* 0x032c	     */		std	%f0,[%o1+%i3]
+/* 0x0330	     */		bg,a,pn	%icc,.L77000213
+/* 0x0334	     */		srl	%i2,31,%o3
+                       .L77000284:
+/* 0x0338	 252 */		ldd	[%i4+%o0],%f2
+                       .L900000655:
+/* 0x033c	 252 */		ldd	[%i5+%o0],%f0
+/* 0x0340	     */		fmuld	%f2,%f18,%f2
+/* 0x0344	     */		sra	%g2,0,%o0
+/* 0x0348	     */		sllx	%o0,3,%o1
+/* 0x034c	     */		add	%l5,1,%l5
+/* 0x0350	     */		fmuld	%f0,%f20,%f4
+/* 0x0354	     */		ldd	[%o1+%i3],%f0
+/* 0x0358	     */		sra	%l5,0,%o2
+/* 0x035c	     */		sllx	%o2,3,%o0
+/* 0x0360	     */		add	%g2,2,%g2
+/* 0x0364	     */		cmp	%l5,%i1
+/* 0x0368	     */		faddd	%f2,%f4,%f2
+/* 0x036c	     */		faddd	%f0,%f2,%f0
+/* 0x0370	     */		std	%f0,[%o1+%i3]
+/* 0x0374	     */		ble,a,pt	%icc,.L900000655
+/* 0x0378	     */		ldd	[%i4+%o0],%f2
+                       .L900000626:
+/* 0x037c	     */		srl	%i2,31,%o3
+/* 0x0380	 252 */		ba	.L900000654
+/* 0x0384	     */		cmp	%g3,30
+                       .L77000213:
+/* 0x0388	 254 */		cmp	%g3,30
+                       .L900000654:
+/* 0x038c	     */		add	%i2,%o3,%o0
+/* 0x0390	 254 */		bne,a,pt	%icc,.L900000653
+/* 0x0394	     */		fdtox	%f16,%f0
+/* 0x0398	 281 */		sra	%o0,1,%g2
+/* 0x039c	     */		add	%g2,1,%g2
+/* 0x03a0	     */		ldd	[%o7],%f0
+/* 0x03a4	     */		sll	%g2,1,%o1
+/* 0x03a8	     */		sll	%g1,1,%g2
+/* 0x03ac	     */		or	%g0,%o1,%o2
+/* 0x03b0	     */		fmovd	%f0,%f2
+/* 0x03b4	     */		or	%g0,%g2,%o0
+/* 0x03b8	     */		cmp	%o1,%o0
+/* 0x03bc	     */		sub	%g2,1,%o0
+/* 0x03c0	     */		bge,pt	%icc,.L77000215
+/* 0x03c4	     */		or	%g0,0,%g3
+/* 0x03c8	 254 */		add	%o1,1,%o1
+/* 0x03cc	 281 */		sra	%o2,0,%g2
+                       .L900000652:
+/* 0x03d0	     */		sllx	%g2,3,%g2
+/* 0x03d4	     */		ldd	[%o7],%f6
+/* 0x03d8	     */		add	%o2,2,%o2
+/* 0x03dc	     */		sra	%o1,0,%g3
+/* 0x03e0	     */		ldd	[%g2+%l4],%f8
+/* 0x03e4	     */		cmp	%o2,%o0
+/* 0x03e8	     */		sllx	%g3,3,%g3
+/* 0x03ec	     */		add	%o1,2,%o1
+/* 0x03f0	     */		ldd	[%l4+%g3],%f10
+/* 0x03f4	     */		fdtox	%f8,%f12
+/* 0x03f8	     */		fdtox	%f10,%f4
+/* 0x03fc	     */		fmovd	%f12,%f8
+/* 0x0400	     */		fmovs	%f6,%f12
+/* 0x0404	     */		fmovs	%f6,%f4
+/* 0x0408	     */		fxtod	%f12,%f6
+/* 0x040c	     */		fxtod	%f4,%f12
+/* 0x0410	     */		fdtox	%f10,%f4
+/* 0x0414	     */		faddd	%f6,%f2,%f6
+/* 0x0418	     */		std	%f6,[%g2+%l4]
+/* 0x041c	     */		faddd	%f12,%f0,%f6
+/* 0x0420	     */		std	%f6,[%l4+%g3]
+/* 0x0424	     */		fitod	%f8,%f2
+/* 0x0428	     */		fitod	%f4,%f0
+/* 0x042c	     */		ble,pt	%icc,.L900000652
+/* 0x0430	     */		sra	%o2,0,%g2
+                       .L77000233:
+/* 0x0434	     */		or	%g0,0,%g3
+                       .L77000215:
+/* 0x0438	     */		fdtox	%f16,%f0
+                       .L900000653:
+/* 0x043c	 256 */		ldd	[%o7],%f6
+/* 0x0440	     */		add	%g4,1,%g4
+/* 0x0444	     */		or	%g0,%i0,%i2
+/* 0x0448	     */		ldd	[%o5],%f8
+/* 0x044c	     */		add	%g3,1,%g3
+/* 0x0450	     */		add	%i3,8,%i3
+/* 0x0454	     */		fmovs	%f6,%f0
+/* 0x0458	     */		ldd	[%o4],%f10
+/* 0x045c	     */		ld	[%sp+2223],%o0
+/* 0x0460	     */		fxtod	%f0,%f6
+/* 0x0464	     */		cmp	%i0,%o0
+/* 0x0468	     */		fmuld	%f6,%f30,%f6
+/* 0x046c	     */		fmuld	%f6,%f8,%f8
+/* 0x0470	     */		fdtox	%f8,%f8
+/* 0x0474	     */		fxtod	%f8,%f8
+/* 0x0478	     */		fmuld	%f8,%f10,%f8
+/* 0x047c	     */		fsubd	%f6,%f8,%f20
+/* 0x0480	     */		ble,a,pt	%icc,.L900000651
+/* 0x0484	     */		ldd	[%i5],%f6
+                       .L900000625:
+/* 0x0488	 256 */		ba	.L900000650
+/* 0x048c	     */		sllx	%g5,3,%g2
+                       .L77000279:
+/* 0x0490	 261 */		ldd	[%i1],%f4
+/* 0x0494	     */		ldd	[%i2],%f6
+/* 0x0498	 273 */		std	%f0,[%i3+8]
+/* 0x049c	     */		std	%f0,[%i3+16]
+/* 0x04a0	 261 */		fmuld	%f4,%f6,%f6
+/* 0x04a4	     */		std	%f6,[%i3]
+/* 0x04a8	 273 */		std	%f0,[%i3+24]
+/* 0x04ac	     */		std	%f0,[%i3+32]
+/* 0x04b0	     */		fdtox	%f6,%f2
+/* 0x04b4	     */		std	%f0,[%i3+40]
+/* 0x04b8	     */		std	%f0,[%i3+48]
+/* 0x04bc	     */		std	%f0,[%i3+56]
+/* 0x04c0	     */		std	%f0,[%i3+64]
+/* 0x04c4	     */		fmovs	%f0,%f2
+/* 0x04c8	     */		std	%f0,[%i3+72]
+/* 0x04cc	     */		std	%f0,[%i3+80]
+/* 0x04d0	     */		std	%f0,[%i3+88]
+/* 0x04d4	     */		std	%f0,[%i3+96]
+/* 0x04d8	     */		std	%f0,[%i3+104]
+/* 0x04dc	     */		std	%f0,[%i3+112]
+/* 0x04e0	     */		std	%f0,[%i3+120]
+/* 0x04e4	     */		std	%f0,[%i3+128]
+/* 0x04e8	     */		std	%f0,[%i3+136]
+/* 0x04ec	     */		std	%f0,[%i3+144]
+/* 0x04f0	     */		std	%f0,[%i3+152]
+/* 0x04f4	     */		std	%f0,[%i3+160]
+/* 0x04f8	     */		std	%f0,[%i3+168]
+/* 0x04fc	     */		fxtod	%f2,%f6
+/* 0x0500	     */		std	%f0,[%i3+176]
+/* 0x0504	 281 */		or	%g0,1,%o2
+/* 0x0508	 273 */		std	%f0,[%i3+184]
+
+!  282		      !       {
+!  284		      !	 m2j=pdm2[j];
+!  285		      !	 a=pdtj[0]+pdn_0*digit;
+!  286		      !	 b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+
+/* 0x050c	 286 */		sra	%o2,0,%g2
+/* 0x0510	 279 */		or	%g0,%i3,%o3
+/* 0x0514	 273 */		std	%f0,[%i3+192]
+/* 0x0518	 278 */		fmuld	%f6,%f14,%f6
+/* 0x051c	 281 */		or	%g0,0,%g1
+/* 0x0520	 273 */		std	%f0,[%i3+200]
+/* 0x0524	     */		std	%f0,[%i3+208]
+/* 0x0528	     */		std	%f0,[%i3+216]
+/* 0x052c	     */		std	%f0,[%i3+224]
+/* 0x0530	     */		std	%f0,[%i3+232]
+/* 0x0534	     */		std	%f0,[%i3+240]
+/* 0x0538	     */		std	%f0,[%i3+248]
+/* 0x053c	     */		std	%f0,[%i3+256]
+/* 0x0540	     */		std	%f0,[%i3+264]
+/* 0x0544	     */		std	%f0,[%i3+272]
+/* 0x0548	     */		std	%f0,[%i3+280]
+/* 0x054c	     */		std	%f0,[%i3+288]
+/* 0x0550	     */		std	%f0,[%i3+296]
+/* 0x0554	     */		std	%f0,[%i3+304]
+/* 0x0558	     */		std	%f0,[%i3+312]
+/* 0x055c	     */		std	%f0,[%i3+320]
+/* 0x0560	     */		std	%f0,[%i3+328]
+/* 0x0564	     */		std	%f0,[%i3+336]
+/* 0x0568	     */		std	%f0,[%i3+344]
+/* 0x056c	     */		std	%f0,[%i3+352]
+/* 0x0570	     */		std	%f0,[%i3+360]
+/* 0x0574	     */		std	%f0,[%i3+368]
+/* 0x0578	     */		std	%f0,[%i3+376]
+/* 0x057c	     */		std	%f0,[%i3+384]
+/* 0x0580	     */		std	%f0,[%i3+392]
+/* 0x0584	     */		std	%f0,[%i3+400]
+/* 0x0588	     */		std	%f0,[%i3+408]
+/* 0x058c	     */		std	%f0,[%i3+416]
+/* 0x0590	     */		std	%f0,[%i3+424]
+/* 0x0594	     */		std	%f0,[%i3+432]
+/* 0x0598	     */		std	%f0,[%i3+440]
+/* 0x059c	     */		std	%f0,[%i3+448]
+/* 0x05a0	     */		std	%f0,[%i3+456]
+/* 0x05a4	     */		std	%f0,[%i3+464]
+/* 0x05a8	     */		std	%f0,[%i3+472]
+/* 0x05ac	     */		std	%f0,[%i3+480]
+/* 0x05b0	     */		std	%f0,[%i3+488]
+/* 0x05b4	     */		std	%f0,[%i3+496]
+/* 0x05b8	 278 */		ldd	[%o5],%f8
+/* 0x05bc	     */		ldd	[%o4],%f10
+/* 0x05c0	     */		fmuld	%f6,%f8,%f8
+/* 0x05c4	 273 */		std	%f0,[%i3+504]
+/* 0x05c8	     */		std	%f0,[%i3+512]
+/* 0x05cc	     */		std	%f0,[%i3+520]
+/* 0x05d0	     */		fdtox	%f8,%f8
+/* 0x05d4	 275 */		ldd	[%o0],%f0
+/* 0x05d8	     */		fxtod	%f8,%f8
+/* 0x05dc	     */		fmuld	%f8,%f10,%f8
+/* 0x05e0	     */		fsubd	%f6,%f8,%f2
+
+!  287		      !	 pdtj[1]=b;
+!  289		      !	 /**** this loop will be fully unrolled:
+!  290		      !	 for(i=1;i<16;i++)
+!  291		      !	   {
+!  292		      !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+!  293		      !	   }
+!  294		      !	 *************************************/
+!  295		      !	     pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+!  296		      !	     pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+!  297		      !	     pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+!  298		      !	     pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+!  299		      !	     pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+!  300		      !	     pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+!  301		      !	     pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+!  302		      !	     pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+!  303		      !	     pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+!  304		      !	     pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+!  305		      !	     pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+!  306		      !	     pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+!  307		      !	     pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+!  308		      !	     pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+!  309		      !	     pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+!  310		      !	 /* no need for cleenup, cannot overflow */
+!  311		      !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+
+	fmovd %f2,%f0		! hand modified
+	fmovd %f30,%f18		! hand modified
+	ldd [%o0],%f2
+	ldd [%o3],%f8
+	ldd [%i1],%f10
+	ldd [%o5],%f14		! hand modified
+	ldd [%o4],%f16		! hand modified
+	ldd [%i2],%f24
+
+	ldd [%i1+8],%f26
+	ldd [%i1+16],%f40
+	ldd [%i1+48],%f46
+	ldd [%i1+56],%f30
+	ldd [%i1+64],%f54
+	ldd [%i1+104],%f34
+	ldd [%i1+112],%f58
+
+	ldd [%o0+8],%f28	
+	ldd [%o0+104],%f38
+	ldd [%o0+112],%f60
+
+	.L99999999: 			!1
+	ldd	[%i1+24],%f32
+	fmuld	%f0,%f2,%f4 	!2
+	ldd	[%o0+24],%f36
+	fmuld	%f26,%f24,%f20 	!3
+	ldd	[%i1+40],%f42
+	fmuld	%f28,%f0,%f22 	!4
+	ldd	[%o0+40],%f44
+	fmuld	%f32,%f24,%f32 	!5
+	ldd	[%i2+8],%f6
+	faddd	%f4,%f8,%f4
+	fmuld	%f36,%f0,%f36 	!6
+	add	%i2,8,%i2
+	ldd	[%o0+56],%f50
+	fmuld	%f42,%f24,%f42 	!7
+	ldd	[%i1+72],%f52
+	faddd	%f20,%f22,%f20
+	fmuld	%f44,%f0,%f44 	!8
+	ldd	[%o3+16],%f22
+	fmuld	%f10,%f6,%f12 	!9
+	ldd	[%o0+72],%f56
+	faddd	%f32,%f36,%f32
+	fmuld	%f14,%f4,%f4 !10
+	ldd	[%o3+48],%f36
+	fmuld	%f30,%f24,%f48 	!11
+	ldd	[%o3+8],%f8
+	faddd	%f20,%f22,%f20
+	fmuld	%f50,%f0,%f50	!12
+	std	%f20,[%o3+16]
+	faddd	%f42,%f44,%f42
+	fmuld	%f52,%f24,%f52 	!13
+	ldd	[%o3+80],%f44
+	faddd	%f4,%f12,%f4
+	fmuld	%f56,%f0,%f56 	!14
+	ldd	[%i1+88],%f20
+	faddd	%f32,%f36,%f32 	!15
+	ldd	[%o0+88],%f22
+	faddd	%f48,%f50,%f48 	!16
+	ldd	[%o3+112],%f50
+	faddd	%f52,%f56,%f52 	!17
+	ldd	[%o3+144],%f56
+	faddd	%f4,%f8,%f8
+	fmuld	%f20,%f24,%f20 	!18
+	std	%f32,[%o3+48]
+	faddd	%f42,%f44,%f42
+	fmuld	%f22,%f0,%f22 	!19
+	std	%f42,[%o3+80]
+	faddd	%f48,%f50,%f48
+	fmuld	%f34,%f24,%f32 	!20
+	std	%f48,[%o3+112]
+	faddd	%f52,%f56,%f52
+	fmuld	%f38,%f0,%f36 	!21
+	ldd	[%i1+120],%f42
+	fdtox	%f8,%f4 		!22
+	std	%f52,[%o3+144]
+	faddd	%f20,%f22,%f20 	!23
+	ldd	[%o0+120],%f44 	!24
+	ldd	[%o3+176],%f22
+	faddd	%f32,%f36,%f32
+	fmuld	%f42,%f24,%f42 	!25
+	ldd	[%o0+16],%f50
+	fmovs	%f17,%f4 	!26
+	ldd	[%i1+32],%f52
+	fmuld	%f44,%f0,%f44 	!27
+	ldd	[%o0+32],%f56
+	fmuld	%f40,%f24,%f48 	!28
+	ldd	[%o3+208],%f36
+	faddd	%f20,%f22,%f20
+	fmuld	%f50,%f0,%f50 	!29
+	std	%f20,[%o3+176]
+	fxtod	%f4,%f4
+	fmuld	%f52,%f24,%f52 	!30
+	ldd	[%o0+48],%f22
+	faddd	%f42,%f44,%f42
+	fmuld	%f56,%f0,%f56 	!31
+	ldd	[%o3+240],%f44
+	faddd	%f32,%f36,%f32 	!32
+	std	%f32,[%o3+208]
+	faddd	%f48,%f50,%f48
+	fmuld	%f46,%f24,%f20 	!33
+	ldd	[%o3+32],%f50
+	fmuld	%f4,%f18,%f12 	!34
+	ldd	[%o0+64],%f36
+	faddd	%f52,%f56,%f52
+	fmuld	%f22,%f0,%f22 	!35
+	ldd	[%o3+64],%f56
+	faddd	%f42,%f44,%f42 	!36
+	std	%f42,[%o3+240]
+	faddd	%f48,%f50,%f48
+	fmuld	%f54,%f24,%f32 	!37
+	std	%f48,[%o3+32]
+	fmuld	%f12,%f14,%f4 !38
+	ldd	[%i1+80],%f42
+	faddd	%f52,%f56,%f56	! yes, tmp52!
+	fmuld	%f36,%f0,%f36 	!39
+	ldd	[%o0+80],%f44
+	faddd	%f20,%f22,%f20 	!40
+	ldd	[%i1+96],%f48
+	fmuld	%f58,%f24,%f52 	!41
+	ldd	[%o0+96],%f50
+	fdtox	%f4,%f4
+	fmuld	%f42,%f24,%f42 	!42
+	std	%f56,[%o3+64]	! yes, tmp52!
+	faddd	%f32,%f36,%f32
+	fmuld	%f44,%f0,%f44 	!43
+	ldd	[%o3+96],%f22
+	fmuld	%f48,%f24,%f48 	!44
+	ldd	[%o3+128],%f36
+	fmovd	%f6,%f24
+	fmuld	%f50,%f0,%f50 	!45
+	fxtod	%f4,%f4
+	fmuld	%f60,%f0,%f56 	!46
+	add	%o3,8,%o3
+	faddd	%f42,%f44,%f42 	!47
+	ldd	[%o3+160-8],%f44
+	faddd	%f20,%f22,%f20 	!48
+	std	%f20,[%o3+96-8]
+	faddd	%f48,%f50,%f48 	!49
+	ldd	[%o3+192-8],%f50
+	faddd	%f52,%f56,%f52
+	fmuld	%f4,%f16,%f4 	!50
+	ldd	[%o3+224-8],%f56
+	faddd	%f32,%f36,%f32 	!51
+	std	%f32,[%o3+128-8]
+	faddd	%f42,%f44,%f42 	!52
+	add	%g1,1,%g1
+	std	%f42,[%o3+160-8]
+	faddd	%f48,%f50,%f48 	!53
+	cmp	%g1,31
+	std	%f48,[%o3+192-8]
+	fsubd	%f12,%f4,%f0 	!54
+	faddd	%f52,%f56,%f52
+	ble,pt	%icc,.L99999999
+	std	%f52,[%o3+224-8] 	!55
+	std %f8,[%o3]
+!  312		      !       }
+!  313		      !   }
+!  315		      ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+
+/* 0x0844	 315 */		sllx	%g5,3,%g2
+                       .L900000650:
+/* 0x0848	 315 */		ldd	[%g2+%l4],%f2
+/* 0x084c	     */		add	%l4,%g2,%o0
+/* 0x0850	     */		or	%g0,0,%g1
+/* 0x0854	     */		ldd	[%o0+8],%f4
+/* 0x0858	     */		or	%g0,0,%i2
+/* 0x085c	     */		cmp	%l0,0
+/* 0x0860	     */		fdtox	%f2,%f2
+/* 0x0864	     */		std	%f2,[%sp+2255]
+/* 0x0868	 311 */		sethi	%hi(0xfc00),%o3
+/* 0x086c	 315 */		fdtox	%f4,%f2
+/* 0x0870	     */		std	%f2,[%sp+2247]
+/* 0x0874	 311 */		or	%g0,-1,%o2
+/* 0x0878	     */		srl	%o2,0,%o5
+/* 0x087c	     */		or	%g0,2,%g5
+/* 0x0880	     */		sub	%l0,1,%g3
+/* 0x0884	     */		or	%g0,%o0,%o7
+/* 0x0888	     */		add	%o3,1023,%o4
+/* 0x088c	 315 */		or	%g0,64,%o3
+/* 0x0890	     */		ldx	[%sp+2255],%i0
+/* 0x0894	     */		sub	%l0,2,%o1
+/* 0x0898	     */		ldx	[%sp+2247],%i1
+/* 0x089c	     */		ble,pt	%icc,.L900000648
+/* 0x08a0	     */		sethi	%hi(0xfc00),%g2
+/* 0x08a4	     */		cmp	%l0,6
+/* 0x08a8	     */		and	%i0,%o5,%o2
+/* 0x08ac	     */		bl,pn	%icc,.L77000287
+/* 0x08b0	     */		or	%g0,3,%g4
+/* 0x08b4	     */		ldd	[%o7+16],%f0
+/* 0x08b8	     */		and	%i1,%o4,%i3
+/* 0x08bc	     */		sllx	%i3,16,%o0
+/* 0x08c0	     */		or	%g0,5,%g4
+/* 0x08c4	     */		srax	%i1,16,%i4
+/* 0x08c8	     */		fdtox	%f0,%f0
+/* 0x08cc	     */		std	%f0,[%sp+2239]
+/* 0x08d0	     */		srax	%i0,32,%i1
+/* 0x08d4	     */		add	%o2,%o0,%i5
+/* 0x08d8	     */		ldd	[%o7+24],%f0
+/* 0x08dc	     */		and	%i5,%o5,%l1
+/* 0x08e0	     */		or	%g0,72,%o2
+/* 0x08e4	     */		or	%g0,4,%o0
+/* 0x08e8	     */		or	%g0,4,%g5
+/* 0x08ec	     */		ldx	[%sp+2239],%g1
+/* 0x08f0	     */		fdtox	%f0,%f0
+/* 0x08f4	     */		or	%g0,4,%i2
+/* 0x08f8	     */		std	%f0,[%sp+2231]
+/* 0x08fc	     */		ldd	[%o7+40],%f2
+/* 0x0900	     */		and	%g1,%o5,%i3
+/* 0x0904	     */		ldd	[%o7+32],%f0
+/* 0x0908	     */		srax	%g1,32,%g1
+/* 0x090c	     */		ldd	[%o7+56],%f4
+/* 0x0910	     */		fdtox	%f2,%f2
+/* 0x0914	     */		ldx	[%sp+2231],%g2
+/* 0x0918	     */		fdtox	%f0,%f0
+/* 0x091c	     */		st	%l1,[%l2]
+/* 0x0920	     */		srax	%i5,32,%l1
+/* 0x0924	     */		fdtox	%f4,%f4
+/* 0x0928	     */		std	%f2,[%sp+2231]
+/* 0x092c	     */		and	%g2,%o4,%i5
+/* 0x0930	     */		add	%i4,%l1,%i4
+/* 0x0934	     */		std	%f0,[%sp+2239]
+/* 0x0938	     */		sllx	%i5,16,%i0
+/* 0x093c	     */		add	%i1,%i4,%i1
+/* 0x0940	     */		ldd	[%o7+48],%f2
+/* 0x0944	     */		srax	%g2,16,%g2
+/* 0x0948	     */		add	%i3,%i0,%i0
+/* 0x094c	     */		ldd	[%o7+72],%f0
+/* 0x0950	     */		add	%i0,%i1,%i3
+/* 0x0954	     */		srax	%i3,32,%i4
+/* 0x0958	     */		fdtox	%f2,%f2
+/* 0x095c	     */		and	%i3,%o5,%i3
+/* 0x0960	     */		ldx	[%sp+2231],%i1
+/* 0x0964	     */		add	%g2,%i4,%g2
+/* 0x0968	     */		ldx	[%sp+2239],%i0
+/* 0x096c	     */		add	%g1,%g2,%g1
+/* 0x0970	     */		std	%f2,[%sp+2239]
+/* 0x0974	     */		std	%f4,[%sp+2231]
+/* 0x0978	     */		ldd	[%o7+64],%f2
+/* 0x097c	     */		st	%i3,[%l2+4]
+                       .L900000631:
+/* 0x0980	     */		ldx	[%sp+2231],%i3
+/* 0x0984	     */		add	%i2,2,%i2
+/* 0x0988	     */		add	%g4,4,%g4
+/* 0x098c	     */		ldx	[%sp+2239],%i5
+/* 0x0990	     */		add	%o2,16,%o2
+/* 0x0994	     */		and	%i1,%o4,%g2
+/* 0x0998	     */		sllx	%g2,16,%i4
+/* 0x099c	     */		and	%i0,%o5,%g2
+/* 0x09a0	     */		ldd	[%o7+%o2],%f4
+/* 0x09a4	     */		fdtox	%f0,%f0
+/* 0x09a8	     */		std	%f0,[%sp+2231]
+/* 0x09ac	     */		srax	%i1,16,%i1
+/* 0x09b0	     */		add	%g2,%i4,%g2
+/* 0x09b4	     */		fdtox	%f2,%f0
+/* 0x09b8	     */		add	%o3,16,%o3
+/* 0x09bc	     */		std	%f0,[%sp+2239]
+/* 0x09c0	     */		add	%g2,%g1,%g1
+/* 0x09c4	     */		ldd	[%o7+%o3],%f2
+/* 0x09c8	     */		srax	%g1,32,%i4
+/* 0x09cc	     */		cmp	%i2,%o1
+/* 0x09d0	     */		srax	%i0,32,%g2
+/* 0x09d4	     */		add	%i1,%i4,%i0
+/* 0x09d8	     */		add	%g2,%i0,%i4
+/* 0x09dc	     */		add	%o0,4,%o0
+/* 0x09e0	     */		and	%g1,%o5,%g2
+/* 0x09e4	     */		or	%g0,%i5,%g1
+/* 0x09e8	     */		st	%g2,[%l2+%o0]
+/* 0x09ec	     */		add	%g5,4,%g5
+/* 0x09f0	     */		ldx	[%sp+2231],%i1
+/* 0x09f4	     */		ldx	[%sp+2239],%i0
+/* 0x09f8	     */		add	%o2,16,%o2
+/* 0x09fc	     */		and	%i3,%o4,%g2
+/* 0x0a00	     */		sllx	%g2,16,%i5
+/* 0x0a04	     */		and	%g1,%o5,%g2
+/* 0x0a08	     */		ldd	[%o7+%o2],%f0
+/* 0x0a0c	     */		fdtox	%f4,%f4
+/* 0x0a10	     */		std	%f4,[%sp+2231]
+/* 0x0a14	     */		srax	%i3,16,%i3
+/* 0x0a18	     */		add	%g2,%i5,%g2
+/* 0x0a1c	     */		fdtox	%f2,%f2
+/* 0x0a20	     */		add	%o3,16,%o3
+/* 0x0a24	     */		std	%f2,[%sp+2239]
+/* 0x0a28	     */		add	%g2,%i4,%g2
+/* 0x0a2c	     */		ldd	[%o7+%o3],%f2
+/* 0x0a30	     */		srax	%g2,32,%i4
+/* 0x0a34	     */		srax	%g1,32,%g1
+/* 0x0a38	     */		add	%i3,%i4,%i3
+/* 0x0a3c	     */		add	%g1,%i3,%g1
+/* 0x0a40	     */		add	%o0,4,%o0
+/* 0x0a44	     */		and	%g2,%o5,%g2
+/* 0x0a48	     */		ble,pt	%icc,.L900000631
+/* 0x0a4c	     */		st	%g2,[%l2+%o0]
+                       .L900000634:
+/* 0x0a50	     */		srax	%i1,16,%i5
+/* 0x0a54	     */		ldx	[%sp+2231],%o1
+/* 0x0a58	     */		and	%i1,%o4,%i3
+/* 0x0a5c	     */		sllx	%i3,16,%i3
+/* 0x0a60	     */		ldx	[%sp+2239],%i4
+/* 0x0a64	     */		and	%i0,%o5,%g2
+/* 0x0a68	     */		add	%g2,%i3,%g2
+/* 0x0a6c	     */		and	%o1,%o4,%i3
+/* 0x0a70	     */		fdtox	%f0,%f4
+/* 0x0a74	     */		sllx	%i3,16,%i3
+/* 0x0a78	     */		std	%f4,[%sp+2231]
+/* 0x0a7c	     */		add	%g2,%g1,%g2
+/* 0x0a80	     */		srax	%g2,32,%l1
+/* 0x0a84	     */		and	%i4,%o5,%i1
+/* 0x0a88	     */		fdtox	%f2,%f0
+/* 0x0a8c	     */		srax	%i0,32,%g1
+/* 0x0a90	     */		std	%f0,[%sp+2239]
+/* 0x0a94	     */		add	%i5,%l1,%i0
+/* 0x0a98	     */		srax	%o1,16,%o1
+/* 0x0a9c	     */		add	%g1,%i0,%i0
+/* 0x0aa0	     */		add	%o0,4,%g1
+/* 0x0aa4	     */		add	%i1,%i3,%o0
+/* 0x0aa8	     */		and	%g2,%o5,%g2
+/* 0x0aac	     */		st	%g2,[%l2+%g1]
+/* 0x0ab0	     */		add	%o0,%i0,%o0
+/* 0x0ab4	     */		srax	%o0,32,%i3
+/* 0x0ab8	     */		ldx	[%sp+2231],%i1
+/* 0x0abc	     */		add	%g1,4,%g1
+/* 0x0ac0	     */		ldx	[%sp+2239],%i0
+/* 0x0ac4	     */		and	%o0,%o5,%g2
+/* 0x0ac8	     */		add	%o1,%i3,%o1
+/* 0x0acc	     */		srax	%i4,32,%o0
+/* 0x0ad0	     */		cmp	%i2,%g3
+/* 0x0ad4	     */		st	%g2,[%l2+%g1]
+/* 0x0ad8	     */		bg,pn	%icc,.L77000236
+/* 0x0adc	     */		add	%o0,%o1,%g1
+/* 0x0ae0	     */		add	%g4,6,%g4
+/* 0x0ae4	     */		add	%g5,6,%g5
+                       .L77000287:
+/* 0x0ae8	     */		sra	%g5,0,%o1
+                       .L900000647:
+/* 0x0aec	     */		sllx	%o1,3,%o2
+/* 0x0af0	     */		and	%i0,%o5,%o0
+/* 0x0af4	     */		ldd	[%o7+%o2],%f0
+/* 0x0af8	     */		sra	%g4,0,%o2
+/* 0x0afc	     */		and	%i1,%o4,%o1
+/* 0x0b00	     */		sllx	%o2,3,%o2
+/* 0x0b04	     */		add	%g1,%o0,%o0
+/* 0x0b08	     */		fdtox	%f0,%f0
+/* 0x0b0c	     */		std	%f0,[%sp+2239]
+/* 0x0b10	     */		sllx	%o1,16,%o1
+/* 0x0b14	     */		add	%o0,%o1,%o1
+/* 0x0b18	     */		add	%g5,2,%g5
+/* 0x0b1c	     */		ldd	[%o7+%o2],%f0
+/* 0x0b20	     */		srax	%o1,32,%g1
+/* 0x0b24	     */		and	%o1,%o5,%o2
+/* 0x0b28	     */		srax	%i1,16,%o0
+/* 0x0b2c	     */		add	%g4,2,%g4
+/* 0x0b30	     */		fdtox	%f0,%f0
+/* 0x0b34	     */		std	%f0,[%sp+2231]
+/* 0x0b38	     */		sra	%i2,0,%o1
+/* 0x0b3c	     */		sllx	%o1,2,%o1
+/* 0x0b40	     */		add	%o0,%g1,%g2
+/* 0x0b44	     */		srax	%i0,32,%g1
+/* 0x0b48	     */		add	%i2,1,%i2
+/* 0x0b4c	     */		add	%g1,%g2,%g1
+/* 0x0b50	     */		cmp	%i2,%g3
+/* 0x0b54	     */		ldx	[%sp+2239],%o3
+/* 0x0b58	     */		ldx	[%sp+2231],%i1
+/* 0x0b5c	     */		st	%o2,[%l2+%o1]
+/* 0x0b60	     */		or	%g0,%o3,%i0
+/* 0x0b64	     */		ble,pt	%icc,.L900000647
+/* 0x0b68	     */		sra	%g5,0,%o1
+                       .L77000236:
+/* 0x0b6c	     */		sethi	%hi(0xfc00),%g2
+                       .L900000648:
+/* 0x0b70	     */		or	%g0,-1,%o0
+/* 0x0b74	     */		add	%g2,1023,%g2
+/* 0x0b78	     */		srl	%o0,0,%g3
+/* 0x0b7c	     */		and	%i1,%g2,%g2
+/* 0x0b80	     */		and	%i0,%g3,%g4
+/* 0x0b84	     */		sllx	%g2,16,%g2
+/* 0x0b88	     */		add	%g1,%g4,%g4
+/* 0x0b8c	     */		sra	%i2,0,%g5
+/* 0x0b90	     */		add	%g4,%g2,%g4
+/* 0x0b94	     */		sllx	%g5,2,%g2
+/* 0x0b98	     */		and	%g4,%g3,%g3
+/* 0x0b9c	     */		st	%g3,[%l2+%g2]
+
+!  317		      ! adjust_montf_result(result,nint,nlen); 
+
+/* 0x0ba0	 317 */		sra	%l0,0,%g4
+/* 0x0ba4	     */		sllx	%g4,2,%g2
+/* 0x0ba8	     */		ld	[%l2+%g2],%g2
+/* 0x0bac	     */		cmp	%g2,0
+/* 0x0bb0	     */		bleu,pn	%icc,.L77000241
+/* 0x0bb4	     */		or	%g0,-1,%o1
+/* 0x0bb8	     */		ba	.L900000646
+/* 0x0bbc	     */		cmp	%o1,0
+                       .L77000241:
+/* 0x0bc0	     */		sub	%l0,1,%o1
+/* 0x0bc4	     */		cmp	%o1,0
+/* 0x0bc8	     */		bl,pn	%icc,.L77000244
+/* 0x0bcc	     */		sra	%o1,0,%g2
+                       .L900000645:
+/* 0x0bd0	     */		sllx	%g2,2,%g2
+/* 0x0bd4	     */		sub	%o1,1,%o0
+/* 0x0bd8	     */		ld	[%l3+%g2],%g3
+/* 0x0bdc	     */		ld	[%l2+%g2],%g2
+/* 0x0be0	     */		cmp	%g2,%g3
+/* 0x0be4	     */		bne,pn	%icc,.L77000244
+/* 0x0be8	     */		nop
+/* 0x0bec	   0 */		or	%g0,%o0,%o1
+/* 0x0bf0	 317 */		cmp	%o0,0
+/* 0x0bf4	     */		bge,pt	%icc,.L900000645
+/* 0x0bf8	     */		sra	%o1,0,%g2
+                       .L77000244:
+/* 0x0bfc	     */		cmp	%o1,0
+                       .L900000646:
+/* 0x0c00	     */		bl,pn	%icc,.L77000288
+/* 0x0c04	     */		sra	%o1,0,%g2
+/* 0x0c08	     */		sllx	%g2,2,%g2
+/* 0x0c0c	     */		ld	[%l3+%g2],%g3
+/* 0x0c10	     */		ld	[%l2+%g2],%g2
+/* 0x0c14	     */		cmp	%g2,%g3
+/* 0x0c18	     */		bleu,pt	%icc,.L77000224
+/* 0x0c1c	     */		nop
+                       .L77000288:
+/* 0x0c20	     */		cmp	%l0,0
+/* 0x0c24	     */		ble,pt	%icc,.L77000224
+/* 0x0c28	     */		nop
+/* 0x0c2c	 317 */		or	%g0,-1,%g2
+/* 0x0c30	 315 */		or	%g0,0,%i0
+/* 0x0c34	 317 */		srl	%g2,0,%g2
+/* 0x0c38	 315 */		or	%g0,0,%g4
+/* 0x0c3c	     */		or	%g0,0,%o1
+/* 0x0c40	 317 */		sub	%l0,1,%g5
+/* 0x0c44	     */		cmp	%l0,9
+/* 0x0c48	 315 */		or	%g0,8,%o5
+/* 0x0c4c	     */		bl,pn	%icc,.L77000289
+/* 0x0c50	     */		sub	%l0,4,%o7
+/* 0x0c54	     */		ld	[%l2],%o1
+/* 0x0c58	     */		or	%g0,5,%i0
+/* 0x0c5c	     */		ld	[%l3],%o2
+/* 0x0c60	     */		or	%g0,12,%o4
+/* 0x0c64	     */		or	%g0,16,%g1
+/* 0x0c68	     */		ld	[%l3+4],%o3
+/* 0x0c6c	     */		ld	[%l2+4],%o0
+/* 0x0c70	     */		sub	%o1,%o2,%o1
+/* 0x0c74	     */		ld	[%l3+8],%i1
+/* 0x0c78	     */		and	%o1,%g2,%g4
+/* 0x0c7c	     */		st	%g4,[%l2]
+/* 0x0c80	     */		srax	%o1,32,%g4
+/* 0x0c84	     */		sub	%o0,%o3,%o0
+/* 0x0c88	     */		ld	[%l3+12],%o2
+/* 0x0c8c	     */		add	%o0,%g4,%o0
+/* 0x0c90	     */		and	%o0,%g2,%g4
+/* 0x0c94	     */		st	%g4,[%l2+4]
+/* 0x0c98	     */		srax	%o0,32,%o0
+/* 0x0c9c	     */		ld	[%l2+8],%o1
+/* 0x0ca0	     */		ld	[%l2+12],%o3
+/* 0x0ca4	     */		sub	%o1,%i1,%o1
+                       .L900000635:
+/* 0x0ca8	     */		add	%g1,4,%g3
+/* 0x0cac	     */		ld	[%g1+%l2],%g4
+/* 0x0cb0	     */		add	%o1,%o0,%o0
+/* 0x0cb4	     */		ld	[%l3+%g1],%i1
+/* 0x0cb8	     */		sub	%o3,%o2,%o1
+/* 0x0cbc	     */		and	%o0,%g2,%o2
+/* 0x0cc0	     */		st	%o2,[%o5+%l2]
+/* 0x0cc4	     */		srax	%o0,32,%o2
+/* 0x0cc8	     */		add	%i0,4,%i0
+/* 0x0ccc	     */		add	%g1,8,%o5
+/* 0x0cd0	     */		ld	[%g3+%l2],%o0
+/* 0x0cd4	     */		add	%o1,%o2,%o1
+/* 0x0cd8	     */		ld	[%l3+%g3],%o3
+/* 0x0cdc	     */		sub	%g4,%i1,%o2
+/* 0x0ce0	     */		and	%o1,%g2,%g4
+/* 0x0ce4	     */		st	%g4,[%o4+%l2]
+/* 0x0ce8	     */		srax	%o1,32,%g4
+/* 0x0cec	     */		cmp	%i0,%o7
+/* 0x0cf0	     */		add	%g1,12,%o4
+/* 0x0cf4	     */		ld	[%o5+%l2],%o1
+/* 0x0cf8	     */		add	%o2,%g4,%o2
+/* 0x0cfc	     */		ld	[%l3+%o5],%i1
+/* 0x0d00	     */		sub	%o0,%o3,%o0
+/* 0x0d04	     */		and	%o2,%g2,%o3
+/* 0x0d08	     */		st	%o3,[%g1+%l2]
+/* 0x0d0c	     */		srax	%o2,32,%g4
+/* 0x0d10	     */		ld	[%o4+%l2],%o3
+/* 0x0d14	     */		add	%g1,16,%g1
+/* 0x0d18	     */		add	%o0,%g4,%o0
+/* 0x0d1c	     */		ld	[%l3+%o4],%o2
+/* 0x0d20	     */		sub	%o1,%i1,%o1
+/* 0x0d24	     */		and	%o0,%g2,%g4
+/* 0x0d28	     */		st	%g4,[%g3+%l2]
+/* 0x0d2c	     */		ble,pt	%icc,.L900000635
+/* 0x0d30	     */		srax	%o0,32,%o0
+                       .L900000638:
+/* 0x0d34	     */		add	%o1,%o0,%g3
+/* 0x0d38	     */		sub	%o3,%o2,%o1
+/* 0x0d3c	     */		ld	[%g1+%l2],%o0
+/* 0x0d40	     */		ld	[%l3+%g1],%o2
+/* 0x0d44	     */		srax	%g3,32,%o7
+/* 0x0d48	     */		and	%g3,%g2,%o3
+/* 0x0d4c	     */		add	%o1,%o7,%o1
+/* 0x0d50	     */		st	%o3,[%o5+%l2]
+/* 0x0d54	     */		cmp	%i0,%g5
+/* 0x0d58	     */		sub	%o0,%o2,%o0
+/* 0x0d5c	     */		and	%o1,%g2,%o2
+/* 0x0d60	     */		st	%o2,[%o4+%l2]
+/* 0x0d64	     */		srax	%o1,32,%o1
+/* 0x0d68	     */		sra	%i0,0,%o2
+/* 0x0d6c	     */		add	%o0,%o1,%o0
+/* 0x0d70	     */		srax	%o0,32,%g4
+/* 0x0d74	     */		and	%o0,%g2,%o1
+/* 0x0d78	     */		st	%o1,[%g1+%l2]
+/* 0x0d7c	     */		bg,pn	%icc,.L77000224
+/* 0x0d80	     */		sllx	%o2,2,%o1
+                       .L77000289:
+/* 0x0d84	   0 */		or	%g0,%o1,%g1
+                       .L900000644:
+/* 0x0d88	     */		ld	[%o1+%l2],%o0
+/* 0x0d8c	     */		add	%i0,1,%i0
+/* 0x0d90	     */		ld	[%l3+%o1],%o1
+/* 0x0d94	     */		sra	%i0,0,%o2
+/* 0x0d98	     */		cmp	%i0,%g5
+/* 0x0d9c	     */		add	%g4,%o0,%o0
+/* 0x0da0	     */		sub	%o0,%o1,%o0
+/* 0x0da4	     */		srax	%o0,32,%g4
+/* 0x0da8	     */		and	%o0,%g2,%o1
+/* 0x0dac	     */		st	%o1,[%g1+%l2]
+/* 0x0db0	     */		sllx	%o2,2,%o1
+/* 0x0db4	     */		ble,pt	%icc,.L900000644
+/* 0x0db8	     */		or	%g0,%o1,%g1
+                       .L77000224:
+/* 0x0dbc	     */		ret	! Result = 
+/* 0x0dc0	     */		restore	%g0,%g0,%g0
+/* 0x0dc4	   0 */		.type	mont_mulf_noconv,2
+/* 0x0dc4	     */		.size	mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/mp_comba.c b/security/nss/lib/freebl/mpi/mp_comba.c
new file mode 100644
index 000000000..3b4937b98
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba.c
@@ -0,0 +1,3235 @@
+/*
+ * The below file is derived from TFM v0.03.
+ * It contains code from fp_mul_comba.c and
+ * fp_sqr_comba.c, which contained the following license.
+ *
+ * Right now, the assembly in this file limits
+ * this code to AMD 64.
+ *
+ * This file is public domain.
+ */
+
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short.  That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+
+#include "mpi-priv.h"
+
+/* clamp digits */
+#define mp_clamp(a)                                      \
+    {                                                    \
+        while ((a)->used && (a)->dp[(a)->used - 1] == 0) \
+            --((a)->used);                               \
+        (a)->sign = (a)->used ? (a)->sign : ZPOS;        \
+    }
+
+/* anything you need at the start */
+#define COMBA_START
+
+/* clear the chaining variables */
+#define COMBA_CLEAR \
+    c0 = c1 = c2 = 0;
+
+/* forward the carry to the next digit */
+#define COMBA_FORWARD \
+    do {              \
+        c0 = c1;      \
+        c1 = c2;      \
+        c2 = 0;       \
+    } while (0);
+
+/* anything you need at the end */
+#define COMBA_FINI
+
+/* this should multiply i and j  */
+#define MULADD(i, j)                                \
+    __asm__(                                        \
+        "movq  %6,%%rax     \n\t"                   \
+        "mulq  %7           \n\t"                   \
+        "addq  %%rax,%0     \n\t"                   \
+        "adcq  %%rdx,%1     \n\t"                   \
+        "adcq  $0,%2        \n\t"                   \
+        : "=r"(c0), "=r"(c1), "=r"(c2)              \
+        : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
+        : "%rax", "%rdx", "cc");
+
+/* sqr macros only */
+#define CLEAR_CARRY \
+    c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+    x = c0;
+
+#define COMBA_STORE2(x) \
+    x = c1;
+
+#define CARRY_FORWARD \
+    do {              \
+        c0 = c1;      \
+        c1 = c2;      \
+        c2 = 0;       \
+    } while (0);
+
+#define COMBA_FINI
+
+#define SQRADD(i, j)                        \
+    __asm__(                                \
+        "movq  %6,%%rax     \n\t"           \
+        "mulq  %%rax        \n\t"           \
+        "addq  %%rax,%0     \n\t"           \
+        "adcq  %%rdx,%1     \n\t"           \
+        "adcq  $0,%2        \n\t"           \
+        : "=r"(c0), "=r"(c1), "=r"(c2)      \
+        : "0"(c0), "1"(c1), "2"(c2), "g"(i) \
+        : "%rax", "%rdx", "cc");
+
+#define SQRADD2(i, j)                               \
+    __asm__(                                        \
+        "movq  %6,%%rax     \n\t"                   \
+        "mulq  %7           \n\t"                   \
+        "addq  %%rax,%0     \n\t"                   \
+        "adcq  %%rdx,%1     \n\t"                   \
+        "adcq  $0,%2        \n\t"                   \
+        "addq  %%rax,%0     \n\t"                   \
+        "adcq  %%rdx,%1     \n\t"                   \
+        "adcq  $0,%2        \n\t"                   \
+        : "=r"(c0), "=r"(c1), "=r"(c2)              \
+        : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
+        : "%rax", "%rdx", "cc");
+
+#define SQRADDSC(i, j)                    \
+    __asm__(                              \
+        "movq  %3,%%rax     \n\t"         \
+        "mulq  %4           \n\t"         \
+        "movq  %%rax,%0     \n\t"         \
+        "movq  %%rdx,%1     \n\t"         \
+        "xorq  %2,%2        \n\t"         \
+        : "=r"(sc0), "=r"(sc1), "=r"(sc2) \
+        : "g"(i), "g"(j)                  \
+        : "%rax", "%rdx", "cc");
+
+#define SQRADDAC(i, j)                                 \
+    __asm__(                                           \
+        "movq  %6,%%rax     \n\t"                      \
+        "mulq  %7           \n\t"                      \
+        "addq  %%rax,%0     \n\t"                      \
+        "adcq  %%rdx,%1     \n\t"                      \
+        "adcq  $0,%2        \n\t"                      \
+        : "=r"(sc0), "=r"(sc1), "=r"(sc2)              \
+        : "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) \
+        : "%rax", "%rdx", "cc");
+
+#define SQRADDDB                                                  \
+    __asm__(                                                      \
+        "addq %6,%0         \n\t"                                 \
+        "adcq %7,%1         \n\t"                                 \
+        "adcq %8,%2         \n\t"                                 \
+        "addq %6,%0         \n\t"                                 \
+        "adcq %7,%1         \n\t"                                 \
+        "adcq %8,%2         \n\t"                                 \
+        : "=&r"(c0), "=&r"(c1), "=&r"(c2)                         \
+        : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) \
+        : "cc");
+
+void
+s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C)
+{
+    mp_digit c0, c1, c2, at[8];
+
+    memcpy(at, A->dp, 4 * sizeof(mp_digit));
+    memcpy(at + 4, B->dp, 4 * sizeof(mp_digit));
+    COMBA_START;
+
+    COMBA_CLEAR;
+    /* 0 */
+    MULADD(at[0], at[4]);
+    COMBA_STORE(C->dp[0]);
+    /* 1 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[5]);
+    MULADD(at[1], at[4]);
+    COMBA_STORE(C->dp[1]);
+    /* 2 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[6]);
+    MULADD(at[1], at[5]);
+    MULADD(at[2], at[4]);
+    COMBA_STORE(C->dp[2]);
+    /* 3 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[7]);
+    MULADD(at[1], at[6]);
+    MULADD(at[2], at[5]);
+    MULADD(at[3], at[4]);
+    COMBA_STORE(C->dp[3]);
+    /* 4 */
+    COMBA_FORWARD;
+    MULADD(at[1], at[7]);
+    MULADD(at[2], at[6]);
+    MULADD(at[3], at[5]);
+    COMBA_STORE(C->dp[4]);
+    /* 5 */
+    COMBA_FORWARD;
+    MULADD(at[2], at[7]);
+    MULADD(at[3], at[6]);
+    COMBA_STORE(C->dp[5]);
+    /* 6 */
+    COMBA_FORWARD;
+    MULADD(at[3], at[7]);
+    COMBA_STORE(C->dp[6]);
+    COMBA_STORE2(C->dp[7]);
+    C->used = 8;
+    C->sign = A->sign ^ B->sign;
+    mp_clamp(C);
+    COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C)
+{
+    mp_digit c0, c1, c2, at[16];
+
+    memcpy(at, A->dp, 8 * sizeof(mp_digit));
+    memcpy(at + 8, B->dp, 8 * sizeof(mp_digit));
+    COMBA_START;
+
+    COMBA_CLEAR;
+    /* 0 */
+    MULADD(at[0], at[8]);
+    COMBA_STORE(C->dp[0]);
+    /* 1 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[9]);
+    MULADD(at[1], at[8]);
+    COMBA_STORE(C->dp[1]);
+    /* 2 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[10]);
+    MULADD(at[1], at[9]);
+    MULADD(at[2], at[8]);
+    COMBA_STORE(C->dp[2]);
+    /* 3 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[11]);
+    MULADD(at[1], at[10]);
+    MULADD(at[2], at[9]);
+    MULADD(at[3], at[8]);
+    COMBA_STORE(C->dp[3]);
+    /* 4 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[12]);
+    MULADD(at[1], at[11]);
+    MULADD(at[2], at[10]);
+    MULADD(at[3], at[9]);
+    MULADD(at[4], at[8]);
+    COMBA_STORE(C->dp[4]);
+    /* 5 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[13]);
+    MULADD(at[1], at[12]);
+    MULADD(at[2], at[11]);
+    MULADD(at[3], at[10]);
+    MULADD(at[4], at[9]);
+    MULADD(at[5], at[8]);
+    COMBA_STORE(C->dp[5]);
+    /* 6 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[14]);
+    MULADD(at[1], at[13]);
+    MULADD(at[2], at[12]);
+    MULADD(at[3], at[11]);
+    MULADD(at[4], at[10]);
+    MULADD(at[5], at[9]);
+    MULADD(at[6], at[8]);
+    COMBA_STORE(C->dp[6]);
+    /* 7 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[15]);
+    MULADD(at[1], at[14]);
+    MULADD(at[2], at[13]);
+    MULADD(at[3], at[12]);
+    MULADD(at[4], at[11]);
+    MULADD(at[5], at[10]);
+    MULADD(at[6], at[9]);
+    MULADD(at[7], at[8]);
+    COMBA_STORE(C->dp[7]);
+    /* 8 */
+    COMBA_FORWARD;
+    MULADD(at[1], at[15]);
+    MULADD(at[2], at[14]);
+    MULADD(at[3], at[13]);
+    MULADD(at[4], at[12]);
+    MULADD(at[5], at[11]);
+    MULADD(at[6], at[10]);
+    MULADD(at[7], at[9]);
+    COMBA_STORE(C->dp[8]);
+    /* 9 */
+    COMBA_FORWARD;
+    MULADD(at[2], at[15]);
+    MULADD(at[3], at[14]);
+    MULADD(at[4], at[13]);
+    MULADD(at[5], at[12]);
+    MULADD(at[6], at[11]);
+    MULADD(at[7], at[10]);
+    COMBA_STORE(C->dp[9]);
+    /* 10 */
+    COMBA_FORWARD;
+    MULADD(at[3], at[15]);
+    MULADD(at[4], at[14]);
+    MULADD(at[5], at[13]);
+    MULADD(at[6], at[12]);
+    MULADD(at[7], at[11]);
+    COMBA_STORE(C->dp[10]);
+    /* 11 */
+    COMBA_FORWARD;
+    MULADD(at[4], at[15]);
+    MULADD(at[5], at[14]);
+    MULADD(at[6], at[13]);
+    MULADD(at[7], at[12]);
+    COMBA_STORE(C->dp[11]);
+    /* 12 */
+    COMBA_FORWARD;
+    MULADD(at[5], at[15]);
+    MULADD(at[6], at[14]);
+    MULADD(at[7], at[13]);
+    COMBA_STORE(C->dp[12]);
+    /* 13 */
+    COMBA_FORWARD;
+    MULADD(at[6], at[15]);
+    MULADD(at[7], at[14]);
+    COMBA_STORE(C->dp[13]);
+    /* 14 */
+    COMBA_FORWARD;
+    MULADD(at[7], at[15]);
+    COMBA_STORE(C->dp[14]);
+    COMBA_STORE2(C->dp[15]);
+    C->used = 16;
+    C->sign = A->sign ^ B->sign;
+    mp_clamp(C);
+    COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C)
+{
+    mp_digit c0, c1, c2, at[32];
+
+    memcpy(at, A->dp, 16 * sizeof(mp_digit));
+    memcpy(at + 16, B->dp, 16 * sizeof(mp_digit));
+    COMBA_START;
+
+    COMBA_CLEAR;
+    /* 0 */
+    MULADD(at[0], at[16]);
+    COMBA_STORE(C->dp[0]);
+    /* 1 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[17]);
+    MULADD(at[1], at[16]);
+    COMBA_STORE(C->dp[1]);
+    /* 2 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[18]);
+    MULADD(at[1], at[17]);
+    MULADD(at[2], at[16]);
+    COMBA_STORE(C->dp[2]);
+    /* 3 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[19]);
+    MULADD(at[1], at[18]);
+    MULADD(at[2], at[17]);
+    MULADD(at[3], at[16]);
+    COMBA_STORE(C->dp[3]);
+    /* 4 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[20]);
+    MULADD(at[1], at[19]);
+    MULADD(at[2], at[18]);
+    MULADD(at[3], at[17]);
+    MULADD(at[4], at[16]);
+    COMBA_STORE(C->dp[4]);
+    /* 5 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[21]);
+    MULADD(at[1], at[20]);
+    MULADD(at[2], at[19]);
+    MULADD(at[3], at[18]);
+    MULADD(at[4], at[17]);
+    MULADD(at[5], at[16]);
+    COMBA_STORE(C->dp[5]);
+    /* 6 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[22]);
+    MULADD(at[1], at[21]);
+    MULADD(at[2], at[20]);
+    MULADD(at[3], at[19]);
+    MULADD(at[4], at[18]);
+    MULADD(at[5], at[17]);
+    MULADD(at[6], at[16]);
+    COMBA_STORE(C->dp[6]);
+    /* 7 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[23]);
+    MULADD(at[1], at[22]);
+    MULADD(at[2], at[21]);
+    MULADD(at[3], at[20]);
+    MULADD(at[4], at[19]);
+    MULADD(at[5], at[18]);
+    MULADD(at[6], at[17]);
+    MULADD(at[7], at[16]);
+    COMBA_STORE(C->dp[7]);
+    /* 8 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[24]);
+    MULADD(at[1], at[23]);
+    MULADD(at[2], at[22]);
+    MULADD(at[3], at[21]);
+    MULADD(at[4], at[20]);
+    MULADD(at[5], at[19]);
+    MULADD(at[6], at[18]);
+    MULADD(at[7], at[17]);
+    MULADD(at[8], at[16]);
+    COMBA_STORE(C->dp[8]);
+    /* 9 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[25]);
+    MULADD(at[1], at[24]);
+    MULADD(at[2], at[23]);
+    MULADD(at[3], at[22]);
+    MULADD(at[4], at[21]);
+    MULADD(at[5], at[20]);
+    MULADD(at[6], at[19]);
+    MULADD(at[7], at[18]);
+    MULADD(at[8], at[17]);
+    MULADD(at[9], at[16]);
+    COMBA_STORE(C->dp[9]);
+    /* 10 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[26]);
+    MULADD(at[1], at[25]);
+    MULADD(at[2], at[24]);
+    MULADD(at[3], at[23]);
+    MULADD(at[4], at[22]);
+    MULADD(at[5], at[21]);
+    MULADD(at[6], at[20]);
+    MULADD(at[7], at[19]);
+    MULADD(at[8], at[18]);
+    MULADD(at[9], at[17]);
+    MULADD(at[10], at[16]);
+    COMBA_STORE(C->dp[10]);
+    /* 11 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[27]);
+    MULADD(at[1], at[26]);
+    MULADD(at[2], at[25]);
+    MULADD(at[3], at[24]);
+    MULADD(at[4], at[23]);
+    MULADD(at[5], at[22]);
+    MULADD(at[6], at[21]);
+    MULADD(at[7], at[20]);
+    MULADD(at[8], at[19]);
+    MULADD(at[9], at[18]);
+    MULADD(at[10], at[17]);
+    MULADD(at[11], at[16]);
+    COMBA_STORE(C->dp[11]);
+    /* 12 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[28]);
+    MULADD(at[1], at[27]);
+    MULADD(at[2], at[26]);
+    MULADD(at[3], at[25]);
+    MULADD(at[4], at[24]);
+    MULADD(at[5], at[23]);
+    MULADD(at[6], at[22]);
+    MULADD(at[7], at[21]);
+    MULADD(at[8], at[20]);
+    MULADD(at[9], at[19]);
+    MULADD(at[10], at[18]);
+    MULADD(at[11], at[17]);
+    MULADD(at[12], at[16]);
+    COMBA_STORE(C->dp[12]);
+    /* 13 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[29]);
+    MULADD(at[1], at[28]);
+    MULADD(at[2], at[27]);
+    MULADD(at[3], at[26]);
+    MULADD(at[4], at[25]);
+    MULADD(at[5], at[24]);
+    MULADD(at[6], at[23]);
+    MULADD(at[7], at[22]);
+    MULADD(at[8], at[21]);
+    MULADD(at[9], at[20]);
+    MULADD(at[10], at[19]);
+    MULADD(at[11], at[18]);
+    MULADD(at[12], at[17]);
+    MULADD(at[13], at[16]);
+    COMBA_STORE(C->dp[13]);
+    /* 14 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[30]);
+    MULADD(at[1], at[29]);
+    MULADD(at[2], at[28]);
+    MULADD(at[3], at[27]);
+    MULADD(at[4], at[26]);
+    MULADD(at[5], at[25]);
+    MULADD(at[6], at[24]);
+    MULADD(at[7], at[23]);
+    MULADD(at[8], at[22]);
+    MULADD(at[9], at[21]);
+    MULADD(at[10], at[20]);
+    MULADD(at[11], at[19]);
+    MULADD(at[12], at[18]);
+    MULADD(at[13], at[17]);
+    MULADD(at[14], at[16]);
+    COMBA_STORE(C->dp[14]);
+    /* 15 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[31]);
+    MULADD(at[1], at[30]);
+    MULADD(at[2], at[29]);
+    MULADD(at[3], at[28]);
+    MULADD(at[4], at[27]);
+    MULADD(at[5], at[26]);
+    MULADD(at[6], at[25]);
+    MULADD(at[7], at[24]);
+    MULADD(at[8], at[23]);
+    MULADD(at[9], at[22]);
+    MULADD(at[10], at[21]);
+    MULADD(at[11], at[20]);
+    MULADD(at[12], at[19]);
+    MULADD(at[13], at[18]);
+    MULADD(at[14], at[17]);
+    MULADD(at[15], at[16]);
+    COMBA_STORE(C->dp[15]);
+    /* 16 */
+    COMBA_FORWARD;
+    MULADD(at[1], at[31]);
+    MULADD(at[2], at[30]);
+    MULADD(at[3], at[29]);
+    MULADD(at[4], at[28]);
+    MULADD(at[5], at[27]);
+    MULADD(at[6], at[26]);
+    MULADD(at[7], at[25]);
+    MULADD(at[8], at[24]);
+    MULADD(at[9], at[23]);
+    MULADD(at[10], at[22]);
+    MULADD(at[11], at[21]);
+    MULADD(at[12], at[20]);
+    MULADD(at[13], at[19]);
+    MULADD(at[14], at[18]);
+    MULADD(at[15], at[17]);
+    COMBA_STORE(C->dp[16]);
+    /* 17 */
+    COMBA_FORWARD;
+    MULADD(at[2], at[31]);
+    MULADD(at[3], at[30]);
+    MULADD(at[4], at[29]);
+    MULADD(at[5], at[28]);
+    MULADD(at[6], at[27]);
+    MULADD(at[7], at[26]);
+    MULADD(at[8], at[25]);
+    MULADD(at[9], at[24]);
+    MULADD(at[10], at[23]);
+    MULADD(at[11], at[22]);
+    MULADD(at[12], at[21]);
+    MULADD(at[13], at[20]);
+    MULADD(at[14], at[19]);
+    MULADD(at[15], at[18]);
+    COMBA_STORE(C->dp[17]);
+    /* 18 */
+    COMBA_FORWARD;
+    MULADD(at[3], at[31]);
+    MULADD(at[4], at[30]);
+    MULADD(at[5], at[29]);
+    MULADD(at[6], at[28]);
+    MULADD(at[7], at[27]);
+    MULADD(at[8], at[26]);
+    MULADD(at[9], at[25]);
+    MULADD(at[10], at[24]);
+    MULADD(at[11], at[23]);
+    MULADD(at[12], at[22]);
+    MULADD(at[13], at[21]);
+    MULADD(at[14], at[20]);
+    MULADD(at[15], at[19]);
+    COMBA_STORE(C->dp[18]);
+    /* 19 */
+    COMBA_FORWARD;
+    MULADD(at[4], at[31]);
+    MULADD(at[5], at[30]);
+    MULADD(at[6], at[29]);
+    MULADD(at[7], at[28]);
+    MULADD(at[8], at[27]);
+    MULADD(at[9], at[26]);
+    MULADD(at[10], at[25]);
+    MULADD(at[11], at[24]);
+    MULADD(at[12], at[23]);
+    MULADD(at[13], at[22]);
+    MULADD(at[14], at[21]);
+    MULADD(at[15], at[20]);
+    COMBA_STORE(C->dp[19]);
+    /* 20 */
+    COMBA_FORWARD;
+    MULADD(at[5], at[31]);
+    MULADD(at[6], at[30]);
+    MULADD(at[7], at[29]);
+    MULADD(at[8], at[28]);
+    MULADD(at[9], at[27]);
+    MULADD(at[10], at[26]);
+    MULADD(at[11], at[25]);
+    MULADD(at[12], at[24]);
+    MULADD(at[13], at[23]);
+    MULADD(at[14], at[22]);
+    MULADD(at[15], at[21]);
+    COMBA_STORE(C->dp[20]);
+    /* 21 */
+    COMBA_FORWARD;
+    MULADD(at[6], at[31]);
+    MULADD(at[7], at[30]);
+    MULADD(at[8], at[29]);
+    MULADD(at[9], at[28]);
+    MULADD(at[10], at[27]);
+    MULADD(at[11], at[26]);
+    MULADD(at[12], at[25]);
+    MULADD(at[13], at[24]);
+    MULADD(at[14], at[23]);
+    MULADD(at[15], at[22]);
+    COMBA_STORE(C->dp[21]);
+    /* 22 */
+    COMBA_FORWARD;
+    MULADD(at[7], at[31]);
+    MULADD(at[8], at[30]);
+    MULADD(at[9], at[29]);
+    MULADD(at[10], at[28]);
+    MULADD(at[11], at[27]);
+    MULADD(at[12], at[26]);
+    MULADD(at[13], at[25]);
+    MULADD(at[14], at[24]);
+    MULADD(at[15], at[23]);
+    COMBA_STORE(C->dp[22]);
+    /* 23 */
+    COMBA_FORWARD;
+    MULADD(at[8], at[31]);
+    MULADD(at[9], at[30]);
+    MULADD(at[10], at[29]);
+    MULADD(at[11], at[28]);
+    MULADD(at[12], at[27]);
+    MULADD(at[13], at[26]);
+    MULADD(at[14], at[25]);
+    MULADD(at[15], at[24]);
+    COMBA_STORE(C->dp[23]);
+    /* 24 */
+    COMBA_FORWARD;
+    MULADD(at[9], at[31]);
+    MULADD(at[10], at[30]);
+    MULADD(at[11], at[29]);
+    MULADD(at[12], at[28]);
+    MULADD(at[13], at[27]);
+    MULADD(at[14], at[26]);
+    MULADD(at[15], at[25]);
+    COMBA_STORE(C->dp[24]);
+    /* 25 */
+    COMBA_FORWARD;
+    MULADD(at[10], at[31]);
+    MULADD(at[11], at[30]);
+    MULADD(at[12], at[29]);
+    MULADD(at[13], at[28]);
+    MULADD(at[14], at[27]);
+    MULADD(at[15], at[26]);
+    COMBA_STORE(C->dp[25]);
+    /* 26 */
+    COMBA_FORWARD;
+    MULADD(at[11], at[31]);
+    MULADD(at[12], at[30]);
+    MULADD(at[13], at[29]);
+    MULADD(at[14], at[28]);
+    MULADD(at[15], at[27]);
+    COMBA_STORE(C->dp[26]);
+    /* 27 */
+    COMBA_FORWARD;
+    MULADD(at[12], at[31]);
+    MULADD(at[13], at[30]);
+    MULADD(at[14], at[29]);
+    MULADD(at[15], at[28]);
+    COMBA_STORE(C->dp[27]);
+    /* 28 */
+    COMBA_FORWARD;
+    MULADD(at[13], at[31]);
+    MULADD(at[14], at[30]);
+    MULADD(at[15], at[29]);
+    COMBA_STORE(C->dp[28]);
+    /* 29 */
+    COMBA_FORWARD;
+    MULADD(at[14], at[31]);
+    MULADD(at[15], at[30]);
+    COMBA_STORE(C->dp[29]);
+    /* 30 */
+    COMBA_FORWARD;
+    MULADD(at[15], at[31]);
+    COMBA_STORE(C->dp[30]);
+    COMBA_STORE2(C->dp[31]);
+    C->used = 32;
+    C->sign = A->sign ^ B->sign;
+    mp_clamp(C);
+    COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C)
+{
+    mp_digit c0, c1, c2, at[64];
+
+    memcpy(at, A->dp, 32 * sizeof(mp_digit));
+    memcpy(at + 32, B->dp, 32 * sizeof(mp_digit));
+    COMBA_START;
+
+    COMBA_CLEAR;
+    /* 0 */
+    MULADD(at[0], at[32]);
+    COMBA_STORE(C->dp[0]);
+    /* 1 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[33]);
+    MULADD(at[1], at[32]);
+    COMBA_STORE(C->dp[1]);
+    /* 2 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[34]);
+    MULADD(at[1], at[33]);
+    MULADD(at[2], at[32]);
+    COMBA_STORE(C->dp[2]);
+    /* 3 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[35]);
+    MULADD(at[1], at[34]);
+    MULADD(at[2], at[33]);
+    MULADD(at[3], at[32]);
+    COMBA_STORE(C->dp[3]);
+    /* 4 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[36]);
+    MULADD(at[1], at[35]);
+    MULADD(at[2], at[34]);
+    MULADD(at[3], at[33]);
+    MULADD(at[4], at[32]);
+    COMBA_STORE(C->dp[4]);
+    /* 5 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[37]);
+    MULADD(at[1], at[36]);
+    MULADD(at[2], at[35]);
+    MULADD(at[3], at[34]);
+    MULADD(at[4], at[33]);
+    MULADD(at[5], at[32]);
+    COMBA_STORE(C->dp[5]);
+    /* 6 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[38]);
+    MULADD(at[1], at[37]);
+    MULADD(at[2], at[36]);
+    MULADD(at[3], at[35]);
+    MULADD(at[4], at[34]);
+    MULADD(at[5], at[33]);
+    MULADD(at[6], at[32]);
+    COMBA_STORE(C->dp[6]);
+    /* 7 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[39]);
+    MULADD(at[1], at[38]);
+    MULADD(at[2], at[37]);
+    MULADD(at[3], at[36]);
+    MULADD(at[4], at[35]);
+    MULADD(at[5], at[34]);
+    MULADD(at[6], at[33]);
+    MULADD(at[7], at[32]);
+    COMBA_STORE(C->dp[7]);
+    /* 8 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[40]);
+    MULADD(at[1], at[39]);
+    MULADD(at[2], at[38]);
+    MULADD(at[3], at[37]);
+    MULADD(at[4], at[36]);
+    MULADD(at[5], at[35]);
+    MULADD(at[6], at[34]);
+    MULADD(at[7], at[33]);
+    MULADD(at[8], at[32]);
+    COMBA_STORE(C->dp[8]);
+    /* 9 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[41]);
+    MULADD(at[1], at[40]);
+    MULADD(at[2], at[39]);
+    MULADD(at[3], at[38]);
+    MULADD(at[4], at[37]);
+    MULADD(at[5], at[36]);
+    MULADD(at[6], at[35]);
+    MULADD(at[7], at[34]);
+    MULADD(at[8], at[33]);
+    MULADD(at[9], at[32]);
+    COMBA_STORE(C->dp[9]);
+    /* 10 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[42]);
+    MULADD(at[1], at[41]);
+    MULADD(at[2], at[40]);
+    MULADD(at[3], at[39]);
+    MULADD(at[4], at[38]);
+    MULADD(at[5], at[37]);
+    MULADD(at[6], at[36]);
+    MULADD(at[7], at[35]);
+    MULADD(at[8], at[34]);
+    MULADD(at[9], at[33]);
+    MULADD(at[10], at[32]);
+    COMBA_STORE(C->dp[10]);
+    /* 11 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[43]);
+    MULADD(at[1], at[42]);
+    MULADD(at[2], at[41]);
+    MULADD(at[3], at[40]);
+    MULADD(at[4], at[39]);
+    MULADD(at[5], at[38]);
+    MULADD(at[6], at[37]);
+    MULADD(at[7], at[36]);
+    MULADD(at[8], at[35]);
+    MULADD(at[9], at[34]);
+    MULADD(at[10], at[33]);
+    MULADD(at[11], at[32]);
+    COMBA_STORE(C->dp[11]);
+    /* 12 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[44]);
+    MULADD(at[1], at[43]);
+    MULADD(at[2], at[42]);
+    MULADD(at[3], at[41]);
+    MULADD(at[4], at[40]);
+    MULADD(at[5], at[39]);
+    MULADD(at[6], at[38]);
+    MULADD(at[7], at[37]);
+    MULADD(at[8], at[36]);
+    MULADD(at[9], at[35]);
+    MULADD(at[10], at[34]);
+    MULADD(at[11], at[33]);
+    MULADD(at[12], at[32]);
+    COMBA_STORE(C->dp[12]);
+    /* 13 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[45]);
+    MULADD(at[1], at[44]);
+    MULADD(at[2], at[43]);
+    MULADD(at[3], at[42]);
+    MULADD(at[4], at[41]);
+    MULADD(at[5], at[40]);
+    MULADD(at[6], at[39]);
+    MULADD(at[7], at[38]);
+    MULADD(at[8], at[37]);
+    MULADD(at[9], at[36]);
+    MULADD(at[10], at[35]);
+    MULADD(at[11], at[34]);
+    MULADD(at[12], at[33]);
+    MULADD(at[13], at[32]);
+    COMBA_STORE(C->dp[13]);
+    /* 14 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[46]);
+    MULADD(at[1], at[45]);
+    MULADD(at[2], at[44]);
+    MULADD(at[3], at[43]);
+    MULADD(at[4], at[42]);
+    MULADD(at[5], at[41]);
+    MULADD(at[6], at[40]);
+    MULADD(at[7], at[39]);
+    MULADD(at[8], at[38]);
+    MULADD(at[9], at[37]);
+    MULADD(at[10], at[36]);
+    MULADD(at[11], at[35]);
+    MULADD(at[12], at[34]);
+    MULADD(at[13], at[33]);
+    MULADD(at[14], at[32]);
+    COMBA_STORE(C->dp[14]);
+    /* 15 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[47]);
+    MULADD(at[1], at[46]);
+    MULADD(at[2], at[45]);
+    MULADD(at[3], at[44]);
+    MULADD(at[4], at[43]);
+    MULADD(at[5], at[42]);
+    MULADD(at[6], at[41]);
+    MULADD(at[7], at[40]);
+    MULADD(at[8], at[39]);
+    MULADD(at[9], at[38]);
+    MULADD(at[10], at[37]);
+    MULADD(at[11], at[36]);
+    MULADD(at[12], at[35]);
+    MULADD(at[13], at[34]);
+    MULADD(at[14], at[33]);
+    MULADD(at[15], at[32]);
+    COMBA_STORE(C->dp[15]);
+    /* 16 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[48]);
+    MULADD(at[1], at[47]);
+    MULADD(at[2], at[46]);
+    MULADD(at[3], at[45]);
+    MULADD(at[4], at[44]);
+    MULADD(at[5], at[43]);
+    MULADD(at[6], at[42]);
+    MULADD(at[7], at[41]);
+    MULADD(at[8], at[40]);
+    MULADD(at[9], at[39]);
+    MULADD(at[10], at[38]);
+    MULADD(at[11], at[37]);
+    MULADD(at[12], at[36]);
+    MULADD(at[13], at[35]);
+    MULADD(at[14], at[34]);
+    MULADD(at[15], at[33]);
+    MULADD(at[16], at[32]);
+    COMBA_STORE(C->dp[16]);
+    /* 17 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[49]);
+    MULADD(at[1], at[48]);
+    MULADD(at[2], at[47]);
+    MULADD(at[3], at[46]);
+    MULADD(at[4], at[45]);
+    MULADD(at[5], at[44]);
+    MULADD(at[6], at[43]);
+    MULADD(at[7], at[42]);
+    MULADD(at[8], at[41]);
+    MULADD(at[9], at[40]);
+    MULADD(at[10], at[39]);
+    MULADD(at[11], at[38]);
+    MULADD(at[12], at[37]);
+    MULADD(at[13], at[36]);
+    MULADD(at[14], at[35]);
+    MULADD(at[15], at[34]);
+    MULADD(at[16], at[33]);
+    MULADD(at[17], at[32]);
+    COMBA_STORE(C->dp[17]);
+    /* 18 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[50]);
+    MULADD(at[1], at[49]);
+    MULADD(at[2], at[48]);
+    MULADD(at[3], at[47]);
+    MULADD(at[4], at[46]);
+    MULADD(at[5], at[45]);
+    MULADD(at[6], at[44]);
+    MULADD(at[7], at[43]);
+    MULADD(at[8], at[42]);
+    MULADD(at[9], at[41]);
+    MULADD(at[10], at[40]);
+    MULADD(at[11], at[39]);
+    MULADD(at[12], at[38]);
+    MULADD(at[13], at[37]);
+    MULADD(at[14], at[36]);
+    MULADD(at[15], at[35]);
+    MULADD(at[16], at[34]);
+    MULADD(at[17], at[33]);
+    MULADD(at[18], at[32]);
+    COMBA_STORE(C->dp[18]);
+    /* 19 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[51]);
+    MULADD(at[1], at[50]);
+    MULADD(at[2], at[49]);
+    MULADD(at[3], at[48]);
+    MULADD(at[4], at[47]);
+    MULADD(at[5], at[46]);
+    MULADD(at[6], at[45]);
+    MULADD(at[7], at[44]);
+    MULADD(at[8], at[43]);
+    MULADD(at[9], at[42]);
+    MULADD(at[10], at[41]);
+    MULADD(at[11], at[40]);
+    MULADD(at[12], at[39]);
+    MULADD(at[13], at[38]);
+    MULADD(at[14], at[37]);
+    MULADD(at[15], at[36]);
+    MULADD(at[16], at[35]);
+    MULADD(at[17], at[34]);
+    MULADD(at[18], at[33]);
+    MULADD(at[19], at[32]);
+    COMBA_STORE(C->dp[19]);
+    /* 20 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[52]);
+    MULADD(at[1], at[51]);
+    MULADD(at[2], at[50]);
+    MULADD(at[3], at[49]);
+    MULADD(at[4], at[48]);
+    MULADD(at[5], at[47]);
+    MULADD(at[6], at[46]);
+    MULADD(at[7], at[45]);
+    MULADD(at[8], at[44]);
+    MULADD(at[9], at[43]);
+    MULADD(at[10], at[42]);
+    MULADD(at[11], at[41]);
+    MULADD(at[12], at[40]);
+    MULADD(at[13], at[39]);
+    MULADD(at[14], at[38]);
+    MULADD(at[15], at[37]);
+    MULADD(at[16], at[36]);
+    MULADD(at[17], at[35]);
+    MULADD(at[18], at[34]);
+    MULADD(at[19], at[33]);
+    MULADD(at[20], at[32]);
+    COMBA_STORE(C->dp[20]);
+    /* 21 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[53]);
+    MULADD(at[1], at[52]);
+    MULADD(at[2], at[51]);
+    MULADD(at[3], at[50]);
+    MULADD(at[4], at[49]);
+    MULADD(at[5], at[48]);
+    MULADD(at[6], at[47]);
+    MULADD(at[7], at[46]);
+    MULADD(at[8], at[45]);
+    MULADD(at[9], at[44]);
+    MULADD(at[10], at[43]);
+    MULADD(at[11], at[42]);
+    MULADD(at[12], at[41]);
+    MULADD(at[13], at[40]);
+    MULADD(at[14], at[39]);
+    MULADD(at[15], at[38]);
+    MULADD(at[16], at[37]);
+    MULADD(at[17], at[36]);
+    MULADD(at[18], at[35]);
+    MULADD(at[19], at[34]);
+    MULADD(at[20], at[33]);
+    MULADD(at[21], at[32]);
+    COMBA_STORE(C->dp[21]);
+    /* 22 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[54]);
+    MULADD(at[1], at[53]);
+    MULADD(at[2], at[52]);
+    MULADD(at[3], at[51]);
+    MULADD(at[4], at[50]);
+    MULADD(at[5], at[49]);
+    MULADD(at[6], at[48]);
+    MULADD(at[7], at[47]);
+    MULADD(at[8], at[46]);
+    MULADD(at[9], at[45]);
+    MULADD(at[10], at[44]);
+    MULADD(at[11], at[43]);
+    MULADD(at[12], at[42]);
+    MULADD(at[13], at[41]);
+    MULADD(at[14], at[40]);
+    MULADD(at[15], at[39]);
+    MULADD(at[16], at[38]);
+    MULADD(at[17], at[37]);
+    MULADD(at[18], at[36]);
+    MULADD(at[19], at[35]);
+    MULADD(at[20], at[34]);
+    MULADD(at[21], at[33]);
+    MULADD(at[22], at[32]);
+    COMBA_STORE(C->dp[22]);
+    /* 23 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[55]);
+    MULADD(at[1], at[54]);
+    MULADD(at[2], at[53]);
+    MULADD(at[3], at[52]);
+    MULADD(at[4], at[51]);
+    MULADD(at[5], at[50]);
+    MULADD(at[6], at[49]);
+    MULADD(at[7], at[48]);
+    MULADD(at[8], at[47]);
+    MULADD(at[9], at[46]);
+    MULADD(at[10], at[45]);
+    MULADD(at[11], at[44]);
+    MULADD(at[12], at[43]);
+    MULADD(at[13], at[42]);
+    MULADD(at[14], at[41]);
+    MULADD(at[15], at[40]);
+    MULADD(at[16], at[39]);
+    MULADD(at[17], at[38]);
+    MULADD(at[18], at[37]);
+    MULADD(at[19], at[36]);
+    MULADD(at[20], at[35]);
+    MULADD(at[21], at[34]);
+    MULADD(at[22], at[33]);
+    MULADD(at[23], at[32]);
+    COMBA_STORE(C->dp[23]);
+    /* 24 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[56]);
+    MULADD(at[1], at[55]);
+    MULADD(at[2], at[54]);
+    MULADD(at[3], at[53]);
+    MULADD(at[4], at[52]);
+    MULADD(at[5], at[51]);
+    MULADD(at[6], at[50]);
+    MULADD(at[7], at[49]);
+    MULADD(at[8], at[48]);
+    MULADD(at[9], at[47]);
+    MULADD(at[10], at[46]);
+    MULADD(at[11], at[45]);
+    MULADD(at[12], at[44]);
+    MULADD(at[13], at[43]);
+    MULADD(at[14], at[42]);
+    MULADD(at[15], at[41]);
+    MULADD(at[16], at[40]);
+    MULADD(at[17], at[39]);
+    MULADD(at[18], at[38]);
+    MULADD(at[19], at[37]);
+    MULADD(at[20], at[36]);
+    MULADD(at[21], at[35]);
+    MULADD(at[22], at[34]);
+    MULADD(at[23], at[33]);
+    MULADD(at[24], at[32]);
+    COMBA_STORE(C->dp[24]);
+    /* 25 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[57]);
+    MULADD(at[1], at[56]);
+    MULADD(at[2], at[55]);
+    MULADD(at[3], at[54]);
+    MULADD(at[4], at[53]);
+    MULADD(at[5], at[52]);
+    MULADD(at[6], at[51]);
+    MULADD(at[7], at[50]);
+    MULADD(at[8], at[49]);
+    MULADD(at[9], at[48]);
+    MULADD(at[10], at[47]);
+    MULADD(at[11], at[46]);
+    MULADD(at[12], at[45]);
+    MULADD(at[13], at[44]);
+    MULADD(at[14], at[43]);
+    MULADD(at[15], at[42]);
+    MULADD(at[16], at[41]);
+    MULADD(at[17], at[40]);
+    MULADD(at[18], at[39]);
+    MULADD(at[19], at[38]);
+    MULADD(at[20], at[37]);
+    MULADD(at[21], at[36]);
+    MULADD(at[22], at[35]);
+    MULADD(at[23], at[34]);
+    MULADD(at[24], at[33]);
+    MULADD(at[25], at[32]);
+    COMBA_STORE(C->dp[25]);
+    /* 26 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[58]);
+    MULADD(at[1], at[57]);
+    MULADD(at[2], at[56]);
+    MULADD(at[3], at[55]);
+    MULADD(at[4], at[54]);
+    MULADD(at[5], at[53]);
+    MULADD(at[6], at[52]);
+    MULADD(at[7], at[51]);
+    MULADD(at[8], at[50]);
+    MULADD(at[9], at[49]);
+    MULADD(at[10], at[48]);
+    MULADD(at[11], at[47]);
+    MULADD(at[12], at[46]);
+    MULADD(at[13], at[45]);
+    MULADD(at[14], at[44]);
+    MULADD(at[15], at[43]);
+    MULADD(at[16], at[42]);
+    MULADD(at[17], at[41]);
+    MULADD(at[18], at[40]);
+    MULADD(at[19], at[39]);
+    MULADD(at[20], at[38]);
+    MULADD(at[21], at[37]);
+    MULADD(at[22], at[36]);
+    MULADD(at[23], at[35]);
+    MULADD(at[24], at[34]);
+    MULADD(at[25], at[33]);
+    MULADD(at[26], at[32]);
+    COMBA_STORE(C->dp[26]);
+    /* 27 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[59]);
+    MULADD(at[1], at[58]);
+    MULADD(at[2], at[57]);
+    MULADD(at[3], at[56]);
+    MULADD(at[4], at[55]);
+    MULADD(at[5], at[54]);
+    MULADD(at[6], at[53]);
+    MULADD(at[7], at[52]);
+    MULADD(at[8], at[51]);
+    MULADD(at[9], at[50]);
+    MULADD(at[10], at[49]);
+    MULADD(at[11], at[48]);
+    MULADD(at[12], at[47]);
+    MULADD(at[13], at[46]);
+    MULADD(at[14], at[45]);
+    MULADD(at[15], at[44]);
+    MULADD(at[16], at[43]);
+    MULADD(at[17], at[42]);
+    MULADD(at[18], at[41]);
+    MULADD(at[19], at[40]);
+    MULADD(at[20], at[39]);
+    MULADD(at[21], at[38]);
+    MULADD(at[22], at[37]);
+    MULADD(at[23], at[36]);
+    MULADD(at[24], at[35]);
+    MULADD(at[25], at[34]);
+    MULADD(at[26], at[33]);
+    MULADD(at[27], at[32]);
+    COMBA_STORE(C->dp[27]);
+    /* 28 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[60]);
+    MULADD(at[1], at[59]);
+    MULADD(at[2], at[58]);
+    MULADD(at[3], at[57]);
+    MULADD(at[4], at[56]);
+    MULADD(at[5], at[55]);
+    MULADD(at[6], at[54]);
+    MULADD(at[7], at[53]);
+    MULADD(at[8], at[52]);
+    MULADD(at[9], at[51]);
+    MULADD(at[10], at[50]);
+    MULADD(at[11], at[49]);
+    MULADD(at[12], at[48]);
+    MULADD(at[13], at[47]);
+    MULADD(at[14], at[46]);
+    MULADD(at[15], at[45]);
+    MULADD(at[16], at[44]);
+    MULADD(at[17], at[43]);
+    MULADD(at[18], at[42]);
+    MULADD(at[19], at[41]);
+    MULADD(at[20], at[40]);
+    MULADD(at[21], at[39]);
+    MULADD(at[22], at[38]);
+    MULADD(at[23], at[37]);
+    MULADD(at[24], at[36]);
+    MULADD(at[25], at[35]);
+    MULADD(at[26], at[34]);
+    MULADD(at[27], at[33]);
+    MULADD(at[28], at[32]);
+    COMBA_STORE(C->dp[28]);
+    /* 29 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[61]);
+    MULADD(at[1], at[60]);
+    MULADD(at[2], at[59]);
+    MULADD(at[3], at[58]);
+    MULADD(at[4], at[57]);
+    MULADD(at[5], at[56]);
+    MULADD(at[6], at[55]);
+    MULADD(at[7], at[54]);
+    MULADD(at[8], at[53]);
+    MULADD(at[9], at[52]);
+    MULADD(at[10], at[51]);
+    MULADD(at[11], at[50]);
+    MULADD(at[12], at[49]);
+    MULADD(at[13], at[48]);
+    MULADD(at[14], at[47]);
+    MULADD(at[15], at[46]);
+    MULADD(at[16], at[45]);
+    MULADD(at[17], at[44]);
+    MULADD(at[18], at[43]);
+    MULADD(at[19], at[42]);
+    MULADD(at[20], at[41]);
+    MULADD(at[21], at[40]);
+    MULADD(at[22], at[39]);
+    MULADD(at[23], at[38]);
+    MULADD(at[24], at[37]);
+    MULADD(at[25], at[36]);
+    MULADD(at[26], at[35]);
+    MULADD(at[27], at[34]);
+    MULADD(at[28], at[33]);
+    MULADD(at[29], at[32]);
+    COMBA_STORE(C->dp[29]);
+    /* 30 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[62]);
+    MULADD(at[1], at[61]);
+    MULADD(at[2], at[60]);
+    MULADD(at[3], at[59]);
+    MULADD(at[4], at[58]);
+    MULADD(at[5], at[57]);
+    MULADD(at[6], at[56]);
+    MULADD(at[7], at[55]);
+    MULADD(at[8], at[54]);
+    MULADD(at[9], at[53]);
+    MULADD(at[10], at[52]);
+    MULADD(at[11], at[51]);
+    MULADD(at[12], at[50]);
+    MULADD(at[13], at[49]);
+    MULADD(at[14], at[48]);
+    MULADD(at[15], at[47]);
+    MULADD(at[16], at[46]);
+    MULADD(at[17], at[45]);
+    MULADD(at[18], at[44]);
+    MULADD(at[19], at[43]);
+    MULADD(at[20], at[42]);
+    MULADD(at[21], at[41]);
+    MULADD(at[22], at[40]);
+    MULADD(at[23], at[39]);
+    MULADD(at[24], at[38]);
+    MULADD(at[25], at[37]);
+    MULADD(at[26], at[36]);
+    MULADD(at[27], at[35]);
+    MULADD(at[28], at[34]);
+    MULADD(at[29], at[33]);
+    MULADD(at[30], at[32]);
+    COMBA_STORE(C->dp[30]);
+    /* 31 */
+    COMBA_FORWARD;
+    MULADD(at[0], at[63]);
+    MULADD(at[1], at[62]);
+    MULADD(at[2], at[61]);
+    MULADD(at[3], at[60]);
+    MULADD(at[4], at[59]);
+    MULADD(at[5], at[58]);
+    MULADD(at[6], at[57]);
+    MULADD(at[7], at[56]);
+    MULADD(at[8], at[55]);
+    MULADD(at[9], at[54]);
+    MULADD(at[10], at[53]);
+    MULADD(at[11], at[52]);
+    MULADD(at[12], at[51]);
+    MULADD(at[13], at[50]);
+    MULADD(at[14], at[49]);
+    MULADD(at[15], at[48]);
+    MULADD(at[16], at[47]);
+    MULADD(at[17], at[46]);
+    MULADD(at[18], at[45]);
+    MULADD(at[19], at[44]);
+    MULADD(at[20], at[43]);
+    MULADD(at[21], at[42]);
+    MULADD(at[22], at[41]);
+    MULADD(at[23], at[40]);
+    MULADD(at[24], at[39]);
+    MULADD(at[25], at[38]);
+    MULADD(at[26], at[37]);
+    MULADD(at[27], at[36]);
+    MULADD(at[28], at[35]);
+    MULADD(at[29], at[34]);
+    MULADD(at[30], at[33]);
+    MULADD(at[31], at[32]);
+    COMBA_STORE(C->dp[31]);
+    /* 32 */
+    COMBA_FORWARD;
+    MULADD(at[1], at[63]);
+    MULADD(at[2], at[62]);
+    MULADD(at[3], at[61]);
+    MULADD(at[4], at[60]);
+    MULADD(at[5], at[59]);
+    MULADD(at[6], at[58]);
+    MULADD(at[7], at[57]);
+    MULADD(at[8], at[56]);
+    MULADD(at[9], at[55]);
+    MULADD(at[10], at[54]);
+    MULADD(at[11], at[53]);
+    MULADD(at[12], at[52]);
+    MULADD(at[13], at[51]);
+    MULADD(at[14], at[50]);
+    MULADD(at[15], at[49]);
+    MULADD(at[16], at[48]);
+    MULADD(at[17], at[47]);
+    MULADD(at[18], at[46]);
+    MULADD(at[19], at[45]);
+    MULADD(at[20], at[44]);
+    MULADD(at[21], at[43]);
+    MULADD(at[22], at[42]);
+    MULADD(at[23], at[41]);
+    MULADD(at[24], at[40]);
+    MULADD(at[25], at[39]);
+    MULADD(at[26], at[38]);
+    MULADD(at[27], at[37]);
+    MULADD(at[28], at[36]);
+    MULADD(at[29], at[35]);
+    MULADD(at[30], at[34]);
+    MULADD(at[31], at[33]);
+    COMBA_STORE(C->dp[32]);
+    /* 33 */
+    COMBA_FORWARD;
+    MULADD(at[2], at[63]);
+    MULADD(at[3], at[62]);
+    MULADD(at[4], at[61]);
+    MULADD(at[5], at[60]);
+    MULADD(at[6], at[59]);
+    MULADD(at[7], at[58]);
+    MULADD(at[8], at[57]);
+    MULADD(at[9], at[56]);
+    MULADD(at[10], at[55]);
+    MULADD(at[11], at[54]);
+    MULADD(at[12], at[53]);
+    MULADD(at[13], at[52]);
+    MULADD(at[14], at[51]);
+    MULADD(at[15], at[50]);
+    MULADD(at[16], at[49]);
+    MULADD(at[17], at[48]);
+    MULADD(at[18], at[47]);
+    MULADD(at[19], at[46]);
+    MULADD(at[20], at[45]);
+    MULADD(at[21], at[44]);
+    MULADD(at[22], at[43]);
+    MULADD(at[23], at[42]);
+    MULADD(at[24], at[41]);
+    MULADD(at[25], at[40]);
+    MULADD(at[26], at[39]);
+    MULADD(at[27], at[38]);
+    MULADD(at[28], at[37]);
+    MULADD(at[29], at[36]);
+    MULADD(at[30], at[35]);
+    MULADD(at[31], at[34]);
+    COMBA_STORE(C->dp[33]);
+    /* 34 */
+    COMBA_FORWARD;
+    MULADD(at[3], at[63]);
+    MULADD(at[4], at[62]);
+    MULADD(at[5], at[61]);
+    MULADD(at[6], at[60]);
+    MULADD(at[7], at[59]);
+    MULADD(at[8], at[58]);
+    MULADD(at[9], at[57]);
+    MULADD(at[10], at[56]);
+    MULADD(at[11], at[55]);
+    MULADD(at[12], at[54]);
+    MULADD(at[13], at[53]);
+    MULADD(at[14], at[52]);
+    MULADD(at[15], at[51]);
+    MULADD(at[16], at[50]);
+    MULADD(at[17], at[49]);
+    MULADD(at[18], at[48]);
+    MULADD(at[19], at[47]);
+    MULADD(at[20], at[46]);
+    MULADD(at[21], at[45]);
+    MULADD(at[22], at[44]);
+    MULADD(at[23], at[43]);
+    MULADD(at[24], at[42]);
+    MULADD(at[25], at[41]);
+    MULADD(at[26], at[40]);
+    MULADD(at[27], at[39]);
+    MULADD(at[28], at[38]);
+    MULADD(at[29], at[37]);
+    MULADD(at[30], at[36]);
+    MULADD(at[31], at[35]);
+    COMBA_STORE(C->dp[34]);
+    /* 35 */
+    COMBA_FORWARD;
+    MULADD(at[4], at[63]);
+    MULADD(at[5], at[62]);
+    MULADD(at[6], at[61]);
+    MULADD(at[7], at[60]);
+    MULADD(at[8], at[59]);
+    MULADD(at[9], at[58]);
+    MULADD(at[10], at[57]);
+    MULADD(at[11], at[56]);
+    MULADD(at[12], at[55]);
+    MULADD(at[13], at[54]);
+    MULADD(at[14], at[53]);
+    MULADD(at[15], at[52]);
+    MULADD(at[16], at[51]);
+    MULADD(at[17], at[50]);
+    MULADD(at[18], at[49]);
+    MULADD(at[19], at[48]);
+    MULADD(at[20], at[47]);
+    MULADD(at[21], at[46]);
+    MULADD(at[22], at[45]);
+    MULADD(at[23], at[44]);
+    MULADD(at[24], at[43]);
+    MULADD(at[25], at[42]);
+    MULADD(at[26], at[41]);
+    MULADD(at[27], at[40]);
+    MULADD(at[28], at[39]);
+    MULADD(at[29], at[38]);
+    MULADD(at[30], at[37]);
+    MULADD(at[31], at[36]);
+    COMBA_STORE(C->dp[35]);
+    /* 36 */
+    COMBA_FORWARD;
+    MULADD(at[5], at[63]);
+    MULADD(at[6], at[62]);
+    MULADD(at[7], at[61]);
+    MULADD(at[8], at[60]);
+    MULADD(at[9], at[59]);
+    MULADD(at[10], at[58]);
+    MULADD(at[11], at[57]);
+    MULADD(at[12], at[56]);
+    MULADD(at[13], at[55]);
+    MULADD(at[14], at[54]);
+    MULADD(at[15], at[53]);
+    MULADD(at[16], at[52]);
+    MULADD(at[17], at[51]);
+    MULADD(at[18], at[50]);
+    MULADD(at[19], at[49]);
+    MULADD(at[20], at[48]);
+    MULADD(at[21], at[47]);
+    MULADD(at[22], at[46]);
+    MULADD(at[23], at[45]);
+    MULADD(at[24], at[44]);
+    MULADD(at[25], at[43]);
+    MULADD(at[26], at[42]);
+    MULADD(at[27], at[41]);
+    MULADD(at[28], at[40]);
+    MULADD(at[29], at[39]);
+    MULADD(at[30], at[38]);
+    MULADD(at[31], at[37]);
+    COMBA_STORE(C->dp[36]);
+    /* 37 */
+    COMBA_FORWARD;
+    MULADD(at[6], at[63]);
+    MULADD(at[7], at[62]);
+    MULADD(at[8], at[61]);
+    MULADD(at[9], at[60]);
+    MULADD(at[10], at[59]);
+    MULADD(at[11], at[58]);
+    MULADD(at[12], at[57]);
+    MULADD(at[13], at[56]);
+    MULADD(at[14], at[55]);
+    MULADD(at[15], at[54]);
+    MULADD(at[16], at[53]);
+    MULADD(at[17], at[52]);
+    MULADD(at[18], at[51]);
+    MULADD(at[19], at[50]);
+    MULADD(at[20], at[49]);
+    MULADD(at[21], at[48]);
+    MULADD(at[22], at[47]);
+    MULADD(at[23], at[46]);
+    MULADD(at[24], at[45]);
+    MULADD(at[25], at[44]);
+    MULADD(at[26], at[43]);
+    MULADD(at[27], at[42]);
+    MULADD(at[28], at[41]);
+    MULADD(at[29], at[40]);
+    MULADD(at[30], at[39]);
+    MULADD(at[31], at[38]);
+    COMBA_STORE(C->dp[37]);
+    /* 38 */
+    COMBA_FORWARD;
+    MULADD(at[7], at[63]);
+    MULADD(at[8], at[62]);
+    MULADD(at[9], at[61]);
+    MULADD(at[10], at[60]);
+    MULADD(at[11], at[59]);
+    MULADD(at[12], at[58]);
+    MULADD(at[13], at[57]);
+    MULADD(at[14], at[56]);
+    MULADD(at[15], at[55]);
+    MULADD(at[16], at[54]);
+    MULADD(at[17], at[53]);
+    MULADD(at[18], at[52]);
+    MULADD(at[19], at[51]);
+    MULADD(at[20], at[50]);
+    MULADD(at[21], at[49]);
+    MULADD(at[22], at[48]);
+    MULADD(at[23], at[47]);
+    MULADD(at[24], at[46]);
+    MULADD(at[25], at[45]);
+    MULADD(at[26], at[44]);
+    MULADD(at[27], at[43]);
+    MULADD(at[28], at[42]);
+    MULADD(at[29], at[41]);
+    MULADD(at[30], at[40]);
+    MULADD(at[31], at[39]);
+    COMBA_STORE(C->dp[38]);
+    /* 39 */
+    COMBA_FORWARD;
+    MULADD(at[8], at[63]);
+    MULADD(at[9], at[62]);
+    MULADD(at[10], at[61]);
+    MULADD(at[11], at[60]);
+    MULADD(at[12], at[59]);
+    MULADD(at[13], at[58]);
+    MULADD(at[14], at[57]);
+    MULADD(at[15], at[56]);
+    MULADD(at[16], at[55]);
+    MULADD(at[17], at[54]);
+    MULADD(at[18], at[53]);
+    MULADD(at[19], at[52]);
+    MULADD(at[20], at[51]);
+    MULADD(at[21], at[50]);
+    MULADD(at[22], at[49]);
+    MULADD(at[23], at[48]);
+    MULADD(at[24], at[47]);
+    MULADD(at[25], at[46]);
+    MULADD(at[26], at[45]);
+    MULADD(at[27], at[44]);
+    MULADD(at[28], at[43]);
+    MULADD(at[29], at[42]);
+    MULADD(at[30], at[41]);
+    MULADD(at[31], at[40]);
+    COMBA_STORE(C->dp[39]);
+    /* 40 */
+    COMBA_FORWARD;
+    MULADD(at[9], at[63]);
+    MULADD(at[10], at[62]);
+    MULADD(at[11], at[61]);
+    MULADD(at[12], at[60]);
+    MULADD(at[13], at[59]);
+    MULADD(at[14], at[58]);
+    MULADD(at[15], at[57]);
+    MULADD(at[16], at[56]);
+    MULADD(at[17], at[55]);
+    MULADD(at[18], at[54]);
+    MULADD(at[19], at[53]);
+    MULADD(at[20], at[52]);
+    MULADD(at[21], at[51]);
+    MULADD(at[22], at[50]);
+    MULADD(at[23], at[49]);
+    MULADD(at[24], at[48]);
+    MULADD(at[25], at[47]);
+    MULADD(at[26], at[46]);
+    MULADD(at[27], at[45]);
+    MULADD(at[28], at[44]);
+    MULADD(at[29], at[43]);
+    MULADD(at[30], at[42]);
+    MULADD(at[31], at[41]);
+    COMBA_STORE(C->dp[40]);
+    /* 41 */
+    COMBA_FORWARD;
+    MULADD(at[10], at[63]);
+    MULADD(at[11], at[62]);
+    MULADD(at[12], at[61]);
+    MULADD(at[13], at[60]);
+    MULADD(at[14], at[59]);
+    MULADD(at[15], at[58]);
+    MULADD(at[16], at[57]);
+    MULADD(at[17], at[56]);
+    MULADD(at[18], at[55]);
+    MULADD(at[19], at[54]);
+    MULADD(at[20], at[53]);
+    MULADD(at[21], at[52]);
+    MULADD(at[22], at[51]);
+    MULADD(at[23], at[50]);
+    MULADD(at[24], at[49]);
+    MULADD(at[25], at[48]);
+    MULADD(at[26], at[47]);
+    MULADD(at[27], at[46]);
+    MULADD(at[28], at[45]);
+    MULADD(at[29], at[44]);
+    MULADD(at[30], at[43]);
+    MULADD(at[31], at[42]);
+    COMBA_STORE(C->dp[41]);
+    /* 42 */
+    COMBA_FORWARD;
+    MULADD(at[11], at[63]);
+    MULADD(at[12], at[62]);
+    MULADD(at[13], at[61]);
+    MULADD(at[14], at[60]);
+    MULADD(at[15], at[59]);
+    MULADD(at[16], at[58]);
+    MULADD(at[17], at[57]);
+    MULADD(at[18], at[56]);
+    MULADD(at[19], at[55]);
+    MULADD(at[20], at[54]);
+    MULADD(at[21], at[53]);
+    MULADD(at[22], at[52]);
+    MULADD(at[23], at[51]);
+    MULADD(at[24], at[50]);
+    MULADD(at[25], at[49]);
+    MULADD(at[26], at[48]);
+    MULADD(at[27], at[47]);
+    MULADD(at[28], at[46]);
+    MULADD(at[29], at[45]);
+    MULADD(at[30], at[44]);
+    MULADD(at[31], at[43]);
+    COMBA_STORE(C->dp[42]);
+    /* 43 */
+    COMBA_FORWARD;
+    MULADD(at[12], at[63]);
+    MULADD(at[13], at[62]);
+    MULADD(at[14], at[61]);
+    MULADD(at[15], at[60]);
+    MULADD(at[16], at[59]);
+    MULADD(at[17], at[58]);
+    MULADD(at[18], at[57]);
+    MULADD(at[19], at[56]);
+    MULADD(at[20], at[55]);
+    MULADD(at[21], at[54]);
+    MULADD(at[22], at[53]);
+    MULADD(at[23], at[52]);
+    MULADD(at[24], at[51]);
+    MULADD(at[25], at[50]);
+    MULADD(at[26], at[49]);
+    MULADD(at[27], at[48]);
+    MULADD(at[28], at[47]);
+    MULADD(at[29], at[46]);
+    MULADD(at[30], at[45]);
+    MULADD(at[31], at[44]);
+    COMBA_STORE(C->dp[43]);
+    /* 44 */
+    COMBA_FORWARD;
+    MULADD(at[13], at[63]);
+    MULADD(at[14], at[62]);
+    MULADD(at[15], at[61]);
+    MULADD(at[16], at[60]);
+    MULADD(at[17], at[59]);
+    MULADD(at[18], at[58]);
+    MULADD(at[19], at[57]);
+    MULADD(at[20], at[56]);
+    MULADD(at[21], at[55]);
+    MULADD(at[22], at[54]);
+    MULADD(at[23], at[53]);
+    MULADD(at[24], at[52]);
+    MULADD(at[25], at[51]);
+    MULADD(at[26], at[50]);
+    MULADD(at[27], at[49]);
+    MULADD(at[28], at[48]);
+    MULADD(at[29], at[47]);
+    MULADD(at[30], at[46]);
+    MULADD(at[31], at[45]);
+    COMBA_STORE(C->dp[44]);
+    /* 45 */
+    COMBA_FORWARD;
+    MULADD(at[14], at[63]);
+    MULADD(at[15], at[62]);
+    MULADD(at[16], at[61]);
+    MULADD(at[17], at[60]);
+    MULADD(at[18], at[59]);
+    MULADD(at[19], at[58]);
+    MULADD(at[20], at[57]);
+    MULADD(at[21], at[56]);
+    MULADD(at[22], at[55]);
+    MULADD(at[23], at[54]);
+    MULADD(at[24], at[53]);
+    MULADD(at[25], at[52]);
+    MULADD(at[26], at[51]);
+    MULADD(at[27], at[50]);
+    MULADD(at[28], at[49]);
+    MULADD(at[29], at[48]);
+    MULADD(at[30], at[47]);
+    MULADD(at[31], at[46]);
+    COMBA_STORE(C->dp[45]);
+    /* 46 */
+    COMBA_FORWARD;
+    MULADD(at[15], at[63]);
+    MULADD(at[16], at[62]);
+    MULADD(at[17], at[61]);
+    MULADD(at[18], at[60]);
+    MULADD(at[19], at[59]);
+    MULADD(at[20], at[58]);
+    MULADD(at[21], at[57]);
+    MULADD(at[22], at[56]);
+    MULADD(at[23], at[55]);
+    MULADD(at[24], at[54]);
+    MULADD(at[25], at[53]);
+    MULADD(at[26], at[52]);
+    MULADD(at[27], at[51]);
+    MULADD(at[28], at[50]);
+    MULADD(at[29], at[49]);
+    MULADD(at[30], at[48]);
+    MULADD(at[31], at[47]);
+    COMBA_STORE(C->dp[46]);
+    /* 47 */
+    COMBA_FORWARD;
+    MULADD(at[16], at[63]);
+    MULADD(at[17], at[62]);
+    MULADD(at[18], at[61]);
+    MULADD(at[19], at[60]);
+    MULADD(at[20], at[59]);
+    MULADD(at[21], at[58]);
+    MULADD(at[22], at[57]);
+    MULADD(at[23], at[56]);
+    MULADD(at[24], at[55]);
+    MULADD(at[25], at[54]);
+    MULADD(at[26], at[53]);
+    MULADD(at[27], at[52]);
+    MULADD(at[28], at[51]);
+    MULADD(at[29], at[50]);
+    MULADD(at[30], at[49]);
+    MULADD(at[31], at[48]);
+    COMBA_STORE(C->dp[47]);
+    /* 48 */
+    COMBA_FORWARD;
+    MULADD(at[17], at[63]);
+    MULADD(at[18], at[62]);
+    MULADD(at[19], at[61]);
+    MULADD(at[20], at[60]);
+    MULADD(at[21], at[59]);
+    MULADD(at[22], at[58]);
+    MULADD(at[23], at[57]);
+    MULADD(at[24], at[56]);
+    MULADD(at[25], at[55]);
+    MULADD(at[26], at[54]);
+    MULADD(at[27], at[53]);
+    MULADD(at[28], at[52]);
+    MULADD(at[29], at[51]);
+    MULADD(at[30], at[50]);
+    MULADD(at[31], at[49]);
+    COMBA_STORE(C->dp[48]);
+    /* 49 */
+    COMBA_FORWARD;
+    MULADD(at[18], at[63]);
+    MULADD(at[19], at[62]);
+    MULADD(at[20], at[61]);
+    MULADD(at[21], at[60]);
+    MULADD(at[22], at[59]);
+    MULADD(at[23], at[58]);
+    MULADD(at[24], at[57]);
+    MULADD(at[25], at[56]);
+    MULADD(at[26], at[55]);
+    MULADD(at[27], at[54]);
+    MULADD(at[28], at[53]);
+    MULADD(at[29], at[52]);
+    MULADD(at[30], at[51]);
+    MULADD(at[31], at[50]);
+    COMBA_STORE(C->dp[49]);
+    /* 50 */
+    COMBA_FORWARD;
+    MULADD(at[19], at[63]);
+    MULADD(at[20], at[62]);
+    MULADD(at[21], at[61]);
+    MULADD(at[22], at[60]);
+    MULADD(at[23], at[59]);
+    MULADD(at[24], at[58]);
+    MULADD(at[25], at[57]);
+    MULADD(at[26], at[56]);
+    MULADD(at[27], at[55]);
+    MULADD(at[28], at[54]);
+    MULADD(at[29], at[53]);
+    MULADD(at[30], at[52]);
+    MULADD(at[31], at[51]);
+    COMBA_STORE(C->dp[50]);
+    /* 51 */
+    COMBA_FORWARD;
+    MULADD(at[20], at[63]);
+    MULADD(at[21], at[62]);
+    MULADD(at[22], at[61]);
+    MULADD(at[23], at[60]);
+    MULADD(at[24], at[59]);
+    MULADD(at[25], at[58]);
+    MULADD(at[26], at[57]);
+    MULADD(at[27], at[56]);
+    MULADD(at[28], at[55]);
+    MULADD(at[29], at[54]);
+    MULADD(at[30], at[53]);
+    MULADD(at[31], at[52]);
+    COMBA_STORE(C->dp[51]);
+    /* 52 */
+    COMBA_FORWARD;
+    MULADD(at[21], at[63]);
+    MULADD(at[22], at[62]);
+    MULADD(at[23], at[61]);
+    MULADD(at[24], at[60]);
+    MULADD(at[25], at[59]);
+    MULADD(at[26], at[58]);
+    MULADD(at[27], at[57]);
+    MULADD(at[28], at[56]);
+    MULADD(at[29], at[55]);
+    MULADD(at[30], at[54]);
+    MULADD(at[31], at[53]);
+    COMBA_STORE(C->dp[52]);
+    /* 53 */
+    COMBA_FORWARD;
+    MULADD(at[22], at[63]);
+    MULADD(at[23], at[62]);
+    MULADD(at[24], at[61]);
+    MULADD(at[25], at[60]);
+    MULADD(at[26], at[59]);
+    MULADD(at[27], at[58]);
+    MULADD(at[28], at[57]);
+    MULADD(at[29], at[56]);
+    MULADD(at[30], at[55]);
+    MULADD(at[31], at[54]);
+    COMBA_STORE(C->dp[53]);
+    /* 54 */
+    COMBA_FORWARD;
+    MULADD(at[23], at[63]);
+    MULADD(at[24], at[62]);
+    MULADD(at[25], at[61]);
+    MULADD(at[26], at[60]);
+    MULADD(at[27], at[59]);
+    MULADD(at[28], at[58]);
+    MULADD(at[29], at[57]);
+    MULADD(at[30], at[56]);
+    MULADD(at[31], at[55]);
+    COMBA_STORE(C->dp[54]);
+    /* 55 */
+    COMBA_FORWARD;
+    MULADD(at[24], at[63]);
+    MULADD(at[25], at[62]);
+    MULADD(at[26], at[61]);
+    MULADD(at[27], at[60]);
+    MULADD(at[28], at[59]);
+    MULADD(at[29], at[58]);
+    MULADD(at[30], at[57]);
+    MULADD(at[31], at[56]);
+    COMBA_STORE(C->dp[55]);
+    /* 56 */
+    COMBA_FORWARD;
+    MULADD(at[25], at[63]);
+    MULADD(at[26], at[62]);
+    MULADD(at[27], at[61]);
+    MULADD(at[28], at[60]);
+    MULADD(at[29], at[59]);
+    MULADD(at[30], at[58]);
+    MULADD(at[31], at[57]);
+    COMBA_STORE(C->dp[56]);
+    /* 57 */
+    COMBA_FORWARD;
+    MULADD(at[26], at[63]);
+    MULADD(at[27], at[62]);
+    MULADD(at[28], at[61]);
+    MULADD(at[29], at[60]);
+    MULADD(at[30], at[59]);
+    MULADD(at[31], at[58]);
+    COMBA_STORE(C->dp[57]);
+    /* 58 */
+    COMBA_FORWARD;
+    MULADD(at[27], at[63]);
+    MULADD(at[28], at[62]);
+    MULADD(at[29], at[61]);
+    MULADD(at[30], at[60]);
+    MULADD(at[31], at[59]);
+    COMBA_STORE(C->dp[58]);
+    /* 59 */
+    COMBA_FORWARD;
+    MULADD(at[28], at[63]);
+    MULADD(at[29], at[62]);
+    MULADD(at[30], at[61]);
+    MULADD(at[31], at[60]);
+    COMBA_STORE(C->dp[59]);
+    /* 60 */
+    COMBA_FORWARD;
+    MULADD(at[29], at[63]);
+    MULADD(at[30], at[62]);
+    MULADD(at[31], at[61]);
+    COMBA_STORE(C->dp[60]);
+    /* 61 */
+    COMBA_FORWARD;
+    MULADD(at[30], at[63]);
+    MULADD(at[31], at[62]);
+    COMBA_STORE(C->dp[61]);
+    /* 62 */
+    COMBA_FORWARD;
+    MULADD(at[31], at[63]);
+    COMBA_STORE(C->dp[62]);
+    COMBA_STORE2(C->dp[63]);
+    C->used = 64;
+    C->sign = A->sign ^ B->sign;
+    mp_clamp(C);
+    COMBA_FINI;
+}
+
+void
+s_mp_sqr_comba_4(const mp_int *A, mp_int *B)
+{
+    mp_digit *a, b[8], c0, c1, c2;
+
+    a = A->dp;
+    COMBA_START;
+
+    /* clear carries */
+    CLEAR_CARRY;
+
+    /* output 0 */
+    SQRADD(a[0], a[0]);
+    COMBA_STORE(b[0]);
+
+    /* output 1 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[1]);
+    COMBA_STORE(b[1]);
+
+    /* output 2 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[2]);
+    SQRADD(a[1], a[1]);
+    COMBA_STORE(b[2]);
+
+    /* output 3 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[3]);
+    SQRADD2(a[1], a[2]);
+    COMBA_STORE(b[3]);
+
+    /* output 4 */
+    CARRY_FORWARD;
+    SQRADD2(a[1], a[3]);
+    SQRADD(a[2], a[2]);
+    COMBA_STORE(b[4]);
+
+    /* output 5 */
+    CARRY_FORWARD;
+    SQRADD2(a[2], a[3]);
+    COMBA_STORE(b[5]);
+
+    /* output 6 */
+    CARRY_FORWARD;
+    SQRADD(a[3], a[3]);
+    COMBA_STORE(b[6]);
+    COMBA_STORE2(b[7]);
+    COMBA_FINI;
+
+    B->used = 8;
+    B->sign = ZPOS;
+    memcpy(B->dp, b, 8 * sizeof(mp_digit));
+    mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_8(const mp_int *A, mp_int *B)
+{
+    mp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2;
+
+    a = A->dp;
+    COMBA_START;
+
+    /* clear carries */
+    CLEAR_CARRY;
+
+    /* output 0 */
+    SQRADD(a[0], a[0]);
+    COMBA_STORE(b[0]);
+
+    /* output 1 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[1]);
+    COMBA_STORE(b[1]);
+
+    /* output 2 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[2]);
+    SQRADD(a[1], a[1]);
+    COMBA_STORE(b[2]);
+
+    /* output 3 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[3]);
+    SQRADD2(a[1], a[2]);
+    COMBA_STORE(b[3]);
+
+    /* output 4 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[4]);
+    SQRADD2(a[1], a[3]);
+    SQRADD(a[2], a[2]);
+    COMBA_STORE(b[4]);
+
+    /* output 5 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[5]);
+    SQRADDAC(a[1], a[4]);
+    SQRADDAC(a[2], a[3]);
+    SQRADDDB;
+    COMBA_STORE(b[5]);
+
+    /* output 6 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[6]);
+    SQRADDAC(a[1], a[5]);
+    SQRADDAC(a[2], a[4]);
+    SQRADDDB;
+    SQRADD(a[3], a[3]);
+    COMBA_STORE(b[6]);
+
+    /* output 7 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[7]);
+    SQRADDAC(a[1], a[6]);
+    SQRADDAC(a[2], a[5]);
+    SQRADDAC(a[3], a[4]);
+    SQRADDDB;
+    COMBA_STORE(b[7]);
+
+    /* output 8 */
+    CARRY_FORWARD;
+    SQRADDSC(a[1], a[7]);
+    SQRADDAC(a[2], a[6]);
+    SQRADDAC(a[3], a[5]);
+    SQRADDDB;
+    SQRADD(a[4], a[4]);
+    COMBA_STORE(b[8]);
+
+    /* output 9 */
+    CARRY_FORWARD;
+    SQRADDSC(a[2], a[7]);
+    SQRADDAC(a[3], a[6]);
+    SQRADDAC(a[4], a[5]);
+    SQRADDDB;
+    COMBA_STORE(b[9]);
+
+    /* output 10 */
+    CARRY_FORWARD;
+    SQRADD2(a[3], a[7]);
+    SQRADD2(a[4], a[6]);
+    SQRADD(a[5], a[5]);
+    COMBA_STORE(b[10]);
+
+    /* output 11 */
+    CARRY_FORWARD;
+    SQRADD2(a[4], a[7]);
+    SQRADD2(a[5], a[6]);
+    COMBA_STORE(b[11]);
+
+    /* output 12 */
+    CARRY_FORWARD;
+    SQRADD2(a[5], a[7]);
+    SQRADD(a[6], a[6]);
+    COMBA_STORE(b[12]);
+
+    /* output 13 */
+    CARRY_FORWARD;
+    SQRADD2(a[6], a[7]);
+    COMBA_STORE(b[13]);
+
+    /* output 14 */
+    CARRY_FORWARD;
+    SQRADD(a[7], a[7]);
+    COMBA_STORE(b[14]);
+    COMBA_STORE2(b[15]);
+    COMBA_FINI;
+
+    B->used = 16;
+    B->sign = ZPOS;
+    memcpy(B->dp, b, 16 * sizeof(mp_digit));
+    mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_16(const mp_int *A, mp_int *B)
+{
+    mp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
+
+    a = A->dp;
+    COMBA_START;
+
+    /* clear carries */
+    CLEAR_CARRY;
+
+    /* output 0 */
+    SQRADD(a[0], a[0]);
+    COMBA_STORE(b[0]);
+
+    /* output 1 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[1]);
+    COMBA_STORE(b[1]);
+
+    /* output 2 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[2]);
+    SQRADD(a[1], a[1]);
+    COMBA_STORE(b[2]);
+
+    /* output 3 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[3]);
+    SQRADD2(a[1], a[2]);
+    COMBA_STORE(b[3]);
+
+    /* output 4 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[4]);
+    SQRADD2(a[1], a[3]);
+    SQRADD(a[2], a[2]);
+    COMBA_STORE(b[4]);
+
+    /* output 5 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[5]);
+    SQRADDAC(a[1], a[4]);
+    SQRADDAC(a[2], a[3]);
+    SQRADDDB;
+    COMBA_STORE(b[5]);
+
+    /* output 6 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[6]);
+    SQRADDAC(a[1], a[5]);
+    SQRADDAC(a[2], a[4]);
+    SQRADDDB;
+    SQRADD(a[3], a[3]);
+    COMBA_STORE(b[6]);
+
+    /* output 7 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[7]);
+    SQRADDAC(a[1], a[6]);
+    SQRADDAC(a[2], a[5]);
+    SQRADDAC(a[3], a[4]);
+    SQRADDDB;
+    COMBA_STORE(b[7]);
+
+    /* output 8 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[8]);
+    SQRADDAC(a[1], a[7]);
+    SQRADDAC(a[2], a[6]);
+    SQRADDAC(a[3], a[5]);
+    SQRADDDB;
+    SQRADD(a[4], a[4]);
+    COMBA_STORE(b[8]);
+
+    /* output 9 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[9]);
+    SQRADDAC(a[1], a[8]);
+    SQRADDAC(a[2], a[7]);
+    SQRADDAC(a[3], a[6]);
+    SQRADDAC(a[4], a[5]);
+    SQRADDDB;
+    COMBA_STORE(b[9]);
+
+    /* output 10 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[10]);
+    SQRADDAC(a[1], a[9]);
+    SQRADDAC(a[2], a[8]);
+    SQRADDAC(a[3], a[7]);
+    SQRADDAC(a[4], a[6]);
+    SQRADDDB;
+    SQRADD(a[5], a[5]);
+    COMBA_STORE(b[10]);
+
+    /* output 11 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[11]);
+    SQRADDAC(a[1], a[10]);
+    SQRADDAC(a[2], a[9]);
+    SQRADDAC(a[3], a[8]);
+    SQRADDAC(a[4], a[7]);
+    SQRADDAC(a[5], a[6]);
+    SQRADDDB;
+    COMBA_STORE(b[11]);
+
+    /* output 12 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[12]);
+    SQRADDAC(a[1], a[11]);
+    SQRADDAC(a[2], a[10]);
+    SQRADDAC(a[3], a[9]);
+    SQRADDAC(a[4], a[8]);
+    SQRADDAC(a[5], a[7]);
+    SQRADDDB;
+    SQRADD(a[6], a[6]);
+    COMBA_STORE(b[12]);
+
+    /* output 13 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[13]);
+    SQRADDAC(a[1], a[12]);
+    SQRADDAC(a[2], a[11]);
+    SQRADDAC(a[3], a[10]);
+    SQRADDAC(a[4], a[9]);
+    SQRADDAC(a[5], a[8]);
+    SQRADDAC(a[6], a[7]);
+    SQRADDDB;
+    COMBA_STORE(b[13]);
+
+    /* output 14 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[14]);
+    SQRADDAC(a[1], a[13]);
+    SQRADDAC(a[2], a[12]);
+    SQRADDAC(a[3], a[11]);
+    SQRADDAC(a[4], a[10]);
+    SQRADDAC(a[5], a[9]);
+    SQRADDAC(a[6], a[8]);
+    SQRADDDB;
+    SQRADD(a[7], a[7]);
+    COMBA_STORE(b[14]);
+
+    /* output 15 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[15]);
+    SQRADDAC(a[1], a[14]);
+    SQRADDAC(a[2], a[13]);
+    SQRADDAC(a[3], a[12]);
+    SQRADDAC(a[4], a[11]);
+    SQRADDAC(a[5], a[10]);
+    SQRADDAC(a[6], a[9]);
+    SQRADDAC(a[7], a[8]);
+    SQRADDDB;
+    COMBA_STORE(b[15]);
+
+    /* output 16 */
+    CARRY_FORWARD;
+    SQRADDSC(a[1], a[15]);
+    SQRADDAC(a[2], a[14]);
+    SQRADDAC(a[3], a[13]);
+    SQRADDAC(a[4], a[12]);
+    SQRADDAC(a[5], a[11]);
+    SQRADDAC(a[6], a[10]);
+    SQRADDAC(a[7], a[9]);
+    SQRADDDB;
+    SQRADD(a[8], a[8]);
+    COMBA_STORE(b[16]);
+
+    /* output 17 */
+    CARRY_FORWARD;
+    SQRADDSC(a[2], a[15]);
+    SQRADDAC(a[3], a[14]);
+    SQRADDAC(a[4], a[13]);
+    SQRADDAC(a[5], a[12]);
+    SQRADDAC(a[6], a[11]);
+    SQRADDAC(a[7], a[10]);
+    SQRADDAC(a[8], a[9]);
+    SQRADDDB;
+    COMBA_STORE(b[17]);
+
+    /* output 18 */
+    CARRY_FORWARD;
+    SQRADDSC(a[3], a[15]);
+    SQRADDAC(a[4], a[14]);
+    SQRADDAC(a[5], a[13]);
+    SQRADDAC(a[6], a[12]);
+    SQRADDAC(a[7], a[11]);
+    SQRADDAC(a[8], a[10]);
+    SQRADDDB;
+    SQRADD(a[9], a[9]);
+    COMBA_STORE(b[18]);
+
+    /* output 19 */
+    CARRY_FORWARD;
+    SQRADDSC(a[4], a[15]);
+    SQRADDAC(a[5], a[14]);
+    SQRADDAC(a[6], a[13]);
+    SQRADDAC(a[7], a[12]);
+    SQRADDAC(a[8], a[11]);
+    SQRADDAC(a[9], a[10]);
+    SQRADDDB;
+    COMBA_STORE(b[19]);
+
+    /* output 20 */
+    CARRY_FORWARD;
+    SQRADDSC(a[5], a[15]);
+    SQRADDAC(a[6], a[14]);
+    SQRADDAC(a[7], a[13]);
+    SQRADDAC(a[8], a[12]);
+    SQRADDAC(a[9], a[11]);
+    SQRADDDB;
+    SQRADD(a[10], a[10]);
+    COMBA_STORE(b[20]);
+
+    /* output 21 */
+    CARRY_FORWARD;
+    SQRADDSC(a[6], a[15]);
+    SQRADDAC(a[7], a[14]);
+    SQRADDAC(a[8], a[13]);
+    SQRADDAC(a[9], a[12]);
+    SQRADDAC(a[10], a[11]);
+    SQRADDDB;
+    COMBA_STORE(b[21]);
+
+    /* output 22 */
+    CARRY_FORWARD;
+    SQRADDSC(a[7], a[15]);
+    SQRADDAC(a[8], a[14]);
+    SQRADDAC(a[9], a[13]);
+    SQRADDAC(a[10], a[12]);
+    SQRADDDB;
+    SQRADD(a[11], a[11]);
+    COMBA_STORE(b[22]);
+
+    /* output 23 */
+    CARRY_FORWARD;
+    SQRADDSC(a[8], a[15]);
+    SQRADDAC(a[9], a[14]);
+    SQRADDAC(a[10], a[13]);
+    SQRADDAC(a[11], a[12]);
+    SQRADDDB;
+    COMBA_STORE(b[23]);
+
+    /* output 24 */
+    CARRY_FORWARD;
+    SQRADDSC(a[9], a[15]);
+    SQRADDAC(a[10], a[14]);
+    SQRADDAC(a[11], a[13]);
+    SQRADDDB;
+    SQRADD(a[12], a[12]);
+    COMBA_STORE(b[24]);
+
+    /* output 25 */
+    CARRY_FORWARD;
+    SQRADDSC(a[10], a[15]);
+    SQRADDAC(a[11], a[14]);
+    SQRADDAC(a[12], a[13]);
+    SQRADDDB;
+    COMBA_STORE(b[25]);
+
+    /* output 26 */
+    CARRY_FORWARD;
+    SQRADD2(a[11], a[15]);
+    SQRADD2(a[12], a[14]);
+    SQRADD(a[13], a[13]);
+    COMBA_STORE(b[26]);
+
+    /* output 27 */
+    CARRY_FORWARD;
+    SQRADD2(a[12], a[15]);
+    SQRADD2(a[13], a[14]);
+    COMBA_STORE(b[27]);
+
+    /* output 28 */
+    CARRY_FORWARD;
+    SQRADD2(a[13], a[15]);
+    SQRADD(a[14], a[14]);
+    COMBA_STORE(b[28]);
+
+    /* output 29 */
+    CARRY_FORWARD;
+    SQRADD2(a[14], a[15]);
+    COMBA_STORE(b[29]);
+
+    /* output 30 */
+    CARRY_FORWARD;
+    SQRADD(a[15], a[15]);
+    COMBA_STORE(b[30]);
+    COMBA_STORE2(b[31]);
+    COMBA_FINI;
+
+    B->used = 32;
+    B->sign = ZPOS;
+    memcpy(B->dp, b, 32 * sizeof(mp_digit));
+    mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_32(const mp_int *A, mp_int *B)
+{
+    mp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
+
+    a = A->dp;
+    COMBA_START;
+
+    /* clear carries */
+    CLEAR_CARRY;
+
+    /* output 0 */
+    SQRADD(a[0], a[0]);
+    COMBA_STORE(b[0]);
+
+    /* output 1 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[1]);
+    COMBA_STORE(b[1]);
+
+    /* output 2 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[2]);
+    SQRADD(a[1], a[1]);
+    COMBA_STORE(b[2]);
+
+    /* output 3 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[3]);
+    SQRADD2(a[1], a[2]);
+    COMBA_STORE(b[3]);
+
+    /* output 4 */
+    CARRY_FORWARD;
+    SQRADD2(a[0], a[4]);
+    SQRADD2(a[1], a[3]);
+    SQRADD(a[2], a[2]);
+    COMBA_STORE(b[4]);
+
+    /* output 5 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[5]);
+    SQRADDAC(a[1], a[4]);
+    SQRADDAC(a[2], a[3]);
+    SQRADDDB;
+    COMBA_STORE(b[5]);
+
+    /* output 6 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[6]);
+    SQRADDAC(a[1], a[5]);
+    SQRADDAC(a[2], a[4]);
+    SQRADDDB;
+    SQRADD(a[3], a[3]);
+    COMBA_STORE(b[6]);
+
+    /* output 7 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[7]);
+    SQRADDAC(a[1], a[6]);
+    SQRADDAC(a[2], a[5]);
+    SQRADDAC(a[3], a[4]);
+    SQRADDDB;
+    COMBA_STORE(b[7]);
+
+    /* output 8 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[8]);
+    SQRADDAC(a[1], a[7]);
+    SQRADDAC(a[2], a[6]);
+    SQRADDAC(a[3], a[5]);
+    SQRADDDB;
+    SQRADD(a[4], a[4]);
+    COMBA_STORE(b[8]);
+
+    /* output 9 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[9]);
+    SQRADDAC(a[1], a[8]);
+    SQRADDAC(a[2], a[7]);
+    SQRADDAC(a[3], a[6]);
+    SQRADDAC(a[4], a[5]);
+    SQRADDDB;
+    COMBA_STORE(b[9]);
+
+    /* output 10 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[10]);
+    SQRADDAC(a[1], a[9]);
+    SQRADDAC(a[2], a[8]);
+    SQRADDAC(a[3], a[7]);
+    SQRADDAC(a[4], a[6]);
+    SQRADDDB;
+    SQRADD(a[5], a[5]);
+    COMBA_STORE(b[10]);
+
+    /* output 11 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[11]);
+    SQRADDAC(a[1], a[10]);
+    SQRADDAC(a[2], a[9]);
+    SQRADDAC(a[3], a[8]);
+    SQRADDAC(a[4], a[7]);
+    SQRADDAC(a[5], a[6]);
+    SQRADDDB;
+    COMBA_STORE(b[11]);
+
+    /* output 12 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[12]);
+    SQRADDAC(a[1], a[11]);
+    SQRADDAC(a[2], a[10]);
+    SQRADDAC(a[3], a[9]);
+    SQRADDAC(a[4], a[8]);
+    SQRADDAC(a[5], a[7]);
+    SQRADDDB;
+    SQRADD(a[6], a[6]);
+    COMBA_STORE(b[12]);
+
+    /* output 13 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[13]);
+    SQRADDAC(a[1], a[12]);
+    SQRADDAC(a[2], a[11]);
+    SQRADDAC(a[3], a[10]);
+    SQRADDAC(a[4], a[9]);
+    SQRADDAC(a[5], a[8]);
+    SQRADDAC(a[6], a[7]);
+    SQRADDDB;
+    COMBA_STORE(b[13]);
+
+    /* output 14 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[14]);
+    SQRADDAC(a[1], a[13]);
+    SQRADDAC(a[2], a[12]);
+    SQRADDAC(a[3], a[11]);
+    SQRADDAC(a[4], a[10]);
+    SQRADDAC(a[5], a[9]);
+    SQRADDAC(a[6], a[8]);
+    SQRADDDB;
+    SQRADD(a[7], a[7]);
+    COMBA_STORE(b[14]);
+
+    /* output 15 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[15]);
+    SQRADDAC(a[1], a[14]);
+    SQRADDAC(a[2], a[13]);
+    SQRADDAC(a[3], a[12]);
+    SQRADDAC(a[4], a[11]);
+    SQRADDAC(a[5], a[10]);
+    SQRADDAC(a[6], a[9]);
+    SQRADDAC(a[7], a[8]);
+    SQRADDDB;
+    COMBA_STORE(b[15]);
+
+    /* output 16 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[16]);
+    SQRADDAC(a[1], a[15]);
+    SQRADDAC(a[2], a[14]);
+    SQRADDAC(a[3], a[13]);
+    SQRADDAC(a[4], a[12]);
+    SQRADDAC(a[5], a[11]);
+    SQRADDAC(a[6], a[10]);
+    SQRADDAC(a[7], a[9]);
+    SQRADDDB;
+    SQRADD(a[8], a[8]);
+    COMBA_STORE(b[16]);
+
+    /* output 17 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[17]);
+    SQRADDAC(a[1], a[16]);
+    SQRADDAC(a[2], a[15]);
+    SQRADDAC(a[3], a[14]);
+    SQRADDAC(a[4], a[13]);
+    SQRADDAC(a[5], a[12]);
+    SQRADDAC(a[6], a[11]);
+    SQRADDAC(a[7], a[10]);
+    SQRADDAC(a[8], a[9]);
+    SQRADDDB;
+    COMBA_STORE(b[17]);
+
+    /* output 18 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[18]);
+    SQRADDAC(a[1], a[17]);
+    SQRADDAC(a[2], a[16]);
+    SQRADDAC(a[3], a[15]);
+    SQRADDAC(a[4], a[14]);
+    SQRADDAC(a[5], a[13]);
+    SQRADDAC(a[6], a[12]);
+    SQRADDAC(a[7], a[11]);
+    SQRADDAC(a[8], a[10]);
+    SQRADDDB;
+    SQRADD(a[9], a[9]);
+    COMBA_STORE(b[18]);
+
+    /* output 19 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[19]);
+    SQRADDAC(a[1], a[18]);
+    SQRADDAC(a[2], a[17]);
+    SQRADDAC(a[3], a[16]);
+    SQRADDAC(a[4], a[15]);
+    SQRADDAC(a[5], a[14]);
+    SQRADDAC(a[6], a[13]);
+    SQRADDAC(a[7], a[12]);
+    SQRADDAC(a[8], a[11]);
+    SQRADDAC(a[9], a[10]);
+    SQRADDDB;
+    COMBA_STORE(b[19]);
+
+    /* output 20 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[20]);
+    SQRADDAC(a[1], a[19]);
+    SQRADDAC(a[2], a[18]);
+    SQRADDAC(a[3], a[17]);
+    SQRADDAC(a[4], a[16]);
+    SQRADDAC(a[5], a[15]);
+    SQRADDAC(a[6], a[14]);
+    SQRADDAC(a[7], a[13]);
+    SQRADDAC(a[8], a[12]);
+    SQRADDAC(a[9], a[11]);
+    SQRADDDB;
+    SQRADD(a[10], a[10]);
+    COMBA_STORE(b[20]);
+
+    /* output 21 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[21]);
+    SQRADDAC(a[1], a[20]);
+    SQRADDAC(a[2], a[19]);
+    SQRADDAC(a[3], a[18]);
+    SQRADDAC(a[4], a[17]);
+    SQRADDAC(a[5], a[16]);
+    SQRADDAC(a[6], a[15]);
+    SQRADDAC(a[7], a[14]);
+    SQRADDAC(a[8], a[13]);
+    SQRADDAC(a[9], a[12]);
+    SQRADDAC(a[10], a[11]);
+    SQRADDDB;
+    COMBA_STORE(b[21]);
+
+    /* output 22 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[22]);
+    SQRADDAC(a[1], a[21]);
+    SQRADDAC(a[2], a[20]);
+    SQRADDAC(a[3], a[19]);
+    SQRADDAC(a[4], a[18]);
+    SQRADDAC(a[5], a[17]);
+    SQRADDAC(a[6], a[16]);
+    SQRADDAC(a[7], a[15]);
+    SQRADDAC(a[8], a[14]);
+    SQRADDAC(a[9], a[13]);
+    SQRADDAC(a[10], a[12]);
+    SQRADDDB;
+    SQRADD(a[11], a[11]);
+    COMBA_STORE(b[22]);
+
+    /* output 23 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[23]);
+    SQRADDAC(a[1], a[22]);
+    SQRADDAC(a[2], a[21]);
+    SQRADDAC(a[3], a[20]);
+    SQRADDAC(a[4], a[19]);
+    SQRADDAC(a[5], a[18]);
+    SQRADDAC(a[6], a[17]);
+    SQRADDAC(a[7], a[16]);
+    SQRADDAC(a[8], a[15]);
+    SQRADDAC(a[9], a[14]);
+    SQRADDAC(a[10], a[13]);
+    SQRADDAC(a[11], a[12]);
+    SQRADDDB;
+    COMBA_STORE(b[23]);
+
+    /* output 24 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[24]);
+    SQRADDAC(a[1], a[23]);
+    SQRADDAC(a[2], a[22]);
+    SQRADDAC(a[3], a[21]);
+    SQRADDAC(a[4], a[20]);
+    SQRADDAC(a[5], a[19]);
+    SQRADDAC(a[6], a[18]);
+    SQRADDAC(a[7], a[17]);
+    SQRADDAC(a[8], a[16]);
+    SQRADDAC(a[9], a[15]);
+    SQRADDAC(a[10], a[14]);
+    SQRADDAC(a[11], a[13]);
+    SQRADDDB;
+    SQRADD(a[12], a[12]);
+    COMBA_STORE(b[24]);
+
+    /* output 25 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[25]);
+    SQRADDAC(a[1], a[24]);
+    SQRADDAC(a[2], a[23]);
+    SQRADDAC(a[3], a[22]);
+    SQRADDAC(a[4], a[21]);
+    SQRADDAC(a[5], a[20]);
+    SQRADDAC(a[6], a[19]);
+    SQRADDAC(a[7], a[18]);
+    SQRADDAC(a[8], a[17]);
+    SQRADDAC(a[9], a[16]);
+    SQRADDAC(a[10], a[15]);
+    SQRADDAC(a[11], a[14]);
+    SQRADDAC(a[12], a[13]);
+    SQRADDDB;
+    COMBA_STORE(b[25]);
+
+    /* output 26 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[26]);
+    SQRADDAC(a[1], a[25]);
+    SQRADDAC(a[2], a[24]);
+    SQRADDAC(a[3], a[23]);
+    SQRADDAC(a[4], a[22]);
+    SQRADDAC(a[5], a[21]);
+    SQRADDAC(a[6], a[20]);
+    SQRADDAC(a[7], a[19]);
+    SQRADDAC(a[8], a[18]);
+    SQRADDAC(a[9], a[17]);
+    SQRADDAC(a[10], a[16]);
+    SQRADDAC(a[11], a[15]);
+    SQRADDAC(a[12], a[14]);
+    SQRADDDB;
+    SQRADD(a[13], a[13]);
+    COMBA_STORE(b[26]);
+
+    /* output 27 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[27]);
+    SQRADDAC(a[1], a[26]);
+    SQRADDAC(a[2], a[25]);
+    SQRADDAC(a[3], a[24]);
+    SQRADDAC(a[4], a[23]);
+    SQRADDAC(a[5], a[22]);
+    SQRADDAC(a[6], a[21]);
+    SQRADDAC(a[7], a[20]);
+    SQRADDAC(a[8], a[19]);
+    SQRADDAC(a[9], a[18]);
+    SQRADDAC(a[10], a[17]);
+    SQRADDAC(a[11], a[16]);
+    SQRADDAC(a[12], a[15]);
+    SQRADDAC(a[13], a[14]);
+    SQRADDDB;
+    COMBA_STORE(b[27]);
+
+    /* output 28 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[28]);
+    SQRADDAC(a[1], a[27]);
+    SQRADDAC(a[2], a[26]);
+    SQRADDAC(a[3], a[25]);
+    SQRADDAC(a[4], a[24]);
+    SQRADDAC(a[5], a[23]);
+    SQRADDAC(a[6], a[22]);
+    SQRADDAC(a[7], a[21]);
+    SQRADDAC(a[8], a[20]);
+    SQRADDAC(a[9], a[19]);
+    SQRADDAC(a[10], a[18]);
+    SQRADDAC(a[11], a[17]);
+    SQRADDAC(a[12], a[16]);
+    SQRADDAC(a[13], a[15]);
+    SQRADDDB;
+    SQRADD(a[14], a[14]);
+    COMBA_STORE(b[28]);
+
+    /* output 29 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[29]);
+    SQRADDAC(a[1], a[28]);
+    SQRADDAC(a[2], a[27]);
+    SQRADDAC(a[3], a[26]);
+    SQRADDAC(a[4], a[25]);
+    SQRADDAC(a[5], a[24]);
+    SQRADDAC(a[6], a[23]);
+    SQRADDAC(a[7], a[22]);
+    SQRADDAC(a[8], a[21]);
+    SQRADDAC(a[9], a[20]);
+    SQRADDAC(a[10], a[19]);
+    SQRADDAC(a[11], a[18]);
+    SQRADDAC(a[12], a[17]);
+    SQRADDAC(a[13], a[16]);
+    SQRADDAC(a[14], a[15]);
+    SQRADDDB;
+    COMBA_STORE(b[29]);
+
+    /* output 30 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[30]);
+    SQRADDAC(a[1], a[29]);
+    SQRADDAC(a[2], a[28]);
+    SQRADDAC(a[3], a[27]);
+    SQRADDAC(a[4], a[26]);
+    SQRADDAC(a[5], a[25]);
+    SQRADDAC(a[6], a[24]);
+    SQRADDAC(a[7], a[23]);
+    SQRADDAC(a[8], a[22]);
+    SQRADDAC(a[9], a[21]);
+    SQRADDAC(a[10], a[20]);
+    SQRADDAC(a[11], a[19]);
+    SQRADDAC(a[12], a[18]);
+    SQRADDAC(a[13], a[17]);
+    SQRADDAC(a[14], a[16]);
+    SQRADDDB;
+    SQRADD(a[15], a[15]);
+    COMBA_STORE(b[30]);
+
+    /* output 31 */
+    CARRY_FORWARD;
+    SQRADDSC(a[0], a[31]);
+    SQRADDAC(a[1], a[30]);
+    SQRADDAC(a[2], a[29]);
+    SQRADDAC(a[3], a[28]);
+    SQRADDAC(a[4], a[27]);
+    SQRADDAC(a[5], a[26]);
+    SQRADDAC(a[6], a[25]);
+    SQRADDAC(a[7], a[24]);
+    SQRADDAC(a[8], a[23]);
+    SQRADDAC(a[9], a[22]);
+    SQRADDAC(a[10], a[21]);
+    SQRADDAC(a[11], a[20]);
+    SQRADDAC(a[12], a[19]);
+    SQRADDAC(a[13], a[18]);
+    SQRADDAC(a[14], a[17]);
+    SQRADDAC(a[15], a[16]);
+    SQRADDDB;
+    COMBA_STORE(b[31]);
+
+    /* output 32 */
+    CARRY_FORWARD;
+    SQRADDSC(a[1], a[31]);
+    SQRADDAC(a[2], a[30]);
+    SQRADDAC(a[3], a[29]);
+    SQRADDAC(a[4], a[28]);
+    SQRADDAC(a[5], a[27]);
+    SQRADDAC(a[6], a[26]);
+    SQRADDAC(a[7], a[25]);
+    SQRADDAC(a[8], a[24]);
+    SQRADDAC(a[9], a[23]);
+    SQRADDAC(a[10], a[22]);
+    SQRADDAC(a[11], a[21]);
+    SQRADDAC(a[12], a[20]);
+    SQRADDAC(a[13], a[19]);
+    SQRADDAC(a[14], a[18]);
+    SQRADDAC(a[15], a[17]);
+    SQRADDDB;
+    SQRADD(a[16], a[16]);
+    COMBA_STORE(b[32]);
+
+    /* output 33 */
+    CARRY_FORWARD;
+    SQRADDSC(a[2], a[31]);
+    SQRADDAC(a[3], a[30]);
+    SQRADDAC(a[4], a[29]);
+    SQRADDAC(a[5], a[28]);
+    SQRADDAC(a[6], a[27]);
+    SQRADDAC(a[7], a[26]);
+    SQRADDAC(a[8], a[25]);
+    SQRADDAC(a[9], a[24]);
+    SQRADDAC(a[10], a[23]);
+    SQRADDAC(a[11], a[22]);
+    SQRADDAC(a[12], a[21]);
+    SQRADDAC(a[13], a[20]);
+    SQRADDAC(a[14], a[19]);
+    SQRADDAC(a[15], a[18]);
+    SQRADDAC(a[16], a[17]);
+    SQRADDDB;
+    COMBA_STORE(b[33]);
+
+    /* output 34 */
+    CARRY_FORWARD;
+    SQRADDSC(a[3], a[31]);
+    SQRADDAC(a[4], a[30]);
+    SQRADDAC(a[5], a[29]);
+    SQRADDAC(a[6], a[28]);
+    SQRADDAC(a[7], a[27]);
+    SQRADDAC(a[8], a[26]);
+    SQRADDAC(a[9], a[25]);
+    SQRADDAC(a[10], a[24]);
+    SQRADDAC(a[11], a[23]);
+    SQRADDAC(a[12], a[22]);
+    SQRADDAC(a[13], a[21]);
+    SQRADDAC(a[14], a[20]);
+    SQRADDAC(a[15], a[19]);
+    SQRADDAC(a[16], a[18]);
+    SQRADDDB;
+    SQRADD(a[17], a[17]);
+    COMBA_STORE(b[34]);
+
+    /* output 35 */
+    CARRY_FORWARD;
+    SQRADDSC(a[4], a[31]);
+    SQRADDAC(a[5], a[30]);
+    SQRADDAC(a[6], a[29]);
+    SQRADDAC(a[7], a[28]);
+    SQRADDAC(a[8], a[27]);
+    SQRADDAC(a[9], a[26]);
+    SQRADDAC(a[10], a[25]);
+    SQRADDAC(a[11], a[24]);
+    SQRADDAC(a[12], a[23]);
+    SQRADDAC(a[13], a[22]);
+    SQRADDAC(a[14], a[21]);
+    SQRADDAC(a[15], a[20]);
+    SQRADDAC(a[16], a[19]);
+    SQRADDAC(a[17], a[18]);
+    SQRADDDB;
+    COMBA_STORE(b[35]);
+
+    /* output 36 */
+    CARRY_FORWARD;
+    SQRADDSC(a[5], a[31]);
+    SQRADDAC(a[6], a[30]);
+    SQRADDAC(a[7], a[29]);
+    SQRADDAC(a[8], a[28]);
+    SQRADDAC(a[9], a[27]);
+    SQRADDAC(a[10], a[26]);
+    SQRADDAC(a[11], a[25]);
+    SQRADDAC(a[12], a[24]);
+    SQRADDAC(a[13], a[23]);
+    SQRADDAC(a[14], a[22]);
+    SQRADDAC(a[15], a[21]);
+    SQRADDAC(a[16], a[20]);
+    SQRADDAC(a[17], a[19]);
+    SQRADDDB;
+    SQRADD(a[18], a[18]);
+    COMBA_STORE(b[36]);
+
+    /* output 37 */
+    CARRY_FORWARD;
+    SQRADDSC(a[6], a[31]);
+    SQRADDAC(a[7], a[30]);
+    SQRADDAC(a[8], a[29]);
+    SQRADDAC(a[9], a[28]);
+    SQRADDAC(a[10], a[27]);
+    SQRADDAC(a[11], a[26]);
+    SQRADDAC(a[12], a[25]);
+    SQRADDAC(a[13], a[24]);
+    SQRADDAC(a[14], a[23]);
+    SQRADDAC(a[15], a[22]);
+    SQRADDAC(a[16], a[21]);
+    SQRADDAC(a[17], a[20]);
+    SQRADDAC(a[18], a[19]);
+    SQRADDDB;
+    COMBA_STORE(b[37]);
+
+    /* output 38 */
+    CARRY_FORWARD;
+    SQRADDSC(a[7], a[31]);
+    SQRADDAC(a[8], a[30]);
+    SQRADDAC(a[9], a[29]);
+    SQRADDAC(a[10], a[28]);
+    SQRADDAC(a[11], a[27]);
+    SQRADDAC(a[12], a[26]);
+    SQRADDAC(a[13], a[25]);
+    SQRADDAC(a[14], a[24]);
+    SQRADDAC(a[15], a[23]);
+    SQRADDAC(a[16], a[22]);
+    SQRADDAC(a[17], a[21]);
+    SQRADDAC(a[18], a[20]);
+    SQRADDDB;
+    SQRADD(a[19], a[19]);
+    COMBA_STORE(b[38]);
+
+    /* output 39 */
+    CARRY_FORWARD;
+    SQRADDSC(a[8], a[31]);
+    SQRADDAC(a[9], a[30]);
+    SQRADDAC(a[10], a[29]);
+    SQRADDAC(a[11], a[28]);
+    SQRADDAC(a[12], a[27]);
+    SQRADDAC(a[13], a[26]);
+    SQRADDAC(a[14], a[25]);
+    SQRADDAC(a[15], a[24]);
+    SQRADDAC(a[16], a[23]);
+    SQRADDAC(a[17], a[22]);
+    SQRADDAC(a[18], a[21]);
+    SQRADDAC(a[19], a[20]);
+    SQRADDDB;
+    COMBA_STORE(b[39]);
+
+    /* output 40 */
+    CARRY_FORWARD;
+    SQRADDSC(a[9], a[31]);
+    SQRADDAC(a[10], a[30]);
+    SQRADDAC(a[11], a[29]);
+    SQRADDAC(a[12], a[28]);
+    SQRADDAC(a[13], a[27]);
+    SQRADDAC(a[14], a[26]);
+    SQRADDAC(a[15], a[25]);
+    SQRADDAC(a[16], a[24]);
+    SQRADDAC(a[17], a[23]);
+    SQRADDAC(a[18], a[22]);
+    SQRADDAC(a[19], a[21]);
+    SQRADDDB;
+    SQRADD(a[20], a[20]);
+    COMBA_STORE(b[40]);
+
+    /* output 41 */
+    CARRY_FORWARD;
+    SQRADDSC(a[10], a[31]);
+    SQRADDAC(a[11], a[30]);
+    SQRADDAC(a[12], a[29]);
+    SQRADDAC(a[13], a[28]);
+    SQRADDAC(a[14], a[27]);
+    SQRADDAC(a[15], a[26]);
+    SQRADDAC(a[16], a[25]);
+    SQRADDAC(a[17], a[24]);
+    SQRADDAC(a[18], a[23]);
+    SQRADDAC(a[19], a[22]);
+    SQRADDAC(a[20], a[21]);
+    SQRADDDB;
+    COMBA_STORE(b[41]);
+
+    /* output 42 */
+    CARRY_FORWARD;
+    SQRADDSC(a[11], a[31]);
+    SQRADDAC(a[12], a[30]);
+    SQRADDAC(a[13], a[29]);
+    SQRADDAC(a[14], a[28]);
+    SQRADDAC(a[15], a[27]);
+    SQRADDAC(a[16], a[26]);
+    SQRADDAC(a[17], a[25]);
+    SQRADDAC(a[18], a[24]);
+    SQRADDAC(a[19], a[23]);
+    SQRADDAC(a[20], a[22]);
+    SQRADDDB;
+    SQRADD(a[21], a[21]);
+    COMBA_STORE(b[42]);
+
+    /* output 43 */
+    CARRY_FORWARD;
+    SQRADDSC(a[12], a[31]);
+    SQRADDAC(a[13], a[30]);
+    SQRADDAC(a[14], a[29]);
+    SQRADDAC(a[15], a[28]);
+    SQRADDAC(a[16], a[27]);
+    SQRADDAC(a[17], a[26]);
+    SQRADDAC(a[18], a[25]);
+    SQRADDAC(a[19], a[24]);
+    SQRADDAC(a[20], a[23]);
+    SQRADDAC(a[21], a[22]);
+    SQRADDDB;
+    COMBA_STORE(b[43]);
+
+    /* output 44 */
+    CARRY_FORWARD;
+    SQRADDSC(a[13], a[31]);
+    SQRADDAC(a[14], a[30]);
+    SQRADDAC(a[15], a[29]);
+    SQRADDAC(a[16], a[28]);
+    SQRADDAC(a[17], a[27]);
+    SQRADDAC(a[18], a[26]);
+    SQRADDAC(a[19], a[25]);
+    SQRADDAC(a[20], a[24]);
+    SQRADDAC(a[21], a[23]);
+    SQRADDDB;
+    SQRADD(a[22], a[22]);
+    COMBA_STORE(b[44]);
+
+    /* output 45 */
+    CARRY_FORWARD;
+    SQRADDSC(a[14], a[31]);
+    SQRADDAC(a[15], a[30]);
+    SQRADDAC(a[16], a[29]);
+    SQRADDAC(a[17], a[28]);
+    SQRADDAC(a[18], a[27]);
+    SQRADDAC(a[19], a[26]);
+    SQRADDAC(a[20], a[25]);
+    SQRADDAC(a[21], a[24]);
+    SQRADDAC(a[22], a[23]);
+    SQRADDDB;
+    COMBA_STORE(b[45]);
+
+    /* output 46 */
+    CARRY_FORWARD;
+    SQRADDSC(a[15], a[31]);
+    SQRADDAC(a[16], a[30]);
+    SQRADDAC(a[17], a[29]);
+    SQRADDAC(a[18], a[28]);
+    SQRADDAC(a[19], a[27]);
+    SQRADDAC(a[20], a[26]);
+    SQRADDAC(a[21], a[25]);
+    SQRADDAC(a[22], a[24]);
+    SQRADDDB;
+    SQRADD(a[23], a[23]);
+    COMBA_STORE(b[46]);
+
+    /* output 47 */
+    CARRY_FORWARD;
+    SQRADDSC(a[16], a[31]);
+    SQRADDAC(a[17], a[30]);
+    SQRADDAC(a[18], a[29]);
+    SQRADDAC(a[19], a[28]);
+    SQRADDAC(a[20], a[27]);
+    SQRADDAC(a[21], a[26]);
+    SQRADDAC(a[22], a[25]);
+    SQRADDAC(a[23], a[24]);
+    SQRADDDB;
+    COMBA_STORE(b[47]);
+
+    /* output 48 */
+    CARRY_FORWARD;
+    SQRADDSC(a[17], a[31]);
+    SQRADDAC(a[18], a[30]);
+    SQRADDAC(a[19], a[29]);
+    SQRADDAC(a[20], a[28]);
+    SQRADDAC(a[21], a[27]);
+    SQRADDAC(a[22], a[26]);
+    SQRADDAC(a[23], a[25]);
+    SQRADDDB;
+    SQRADD(a[24], a[24]);
+    COMBA_STORE(b[48]);
+
+    /* output 49 */
+    CARRY_FORWARD;
+    SQRADDSC(a[18], a[31]);
+    SQRADDAC(a[19], a[30]);
+    SQRADDAC(a[20], a[29]);
+    SQRADDAC(a[21], a[28]);
+    SQRADDAC(a[22], a[27]);
+    SQRADDAC(a[23], a[26]);
+    SQRADDAC(a[24], a[25]);
+    SQRADDDB;
+    COMBA_STORE(b[49]);
+
+    /* output 50 */
+    CARRY_FORWARD;
+    SQRADDSC(a[19], a[31]);
+    SQRADDAC(a[20], a[30]);
+    SQRADDAC(a[21], a[29]);
+    SQRADDAC(a[22], a[28]);
+    SQRADDAC(a[23], a[27]);
+    SQRADDAC(a[24], a[26]);
+    SQRADDDB;
+    SQRADD(a[25], a[25]);
+    COMBA_STORE(b[50]);
+
+    /* output 51 */
+    CARRY_FORWARD;
+    SQRADDSC(a[20], a[31]);
+    SQRADDAC(a[21], a[30]);
+    SQRADDAC(a[22], a[29]);
+    SQRADDAC(a[23], a[28]);
+    SQRADDAC(a[24], a[27]);
+    SQRADDAC(a[25], a[26]);
+    SQRADDDB;
+    COMBA_STORE(b[51]);
+
+    /* output 52 */
+    CARRY_FORWARD;
+    SQRADDSC(a[21], a[31]);
+    SQRADDAC(a[22], a[30]);
+    SQRADDAC(a[23], a[29]);
+    SQRADDAC(a[24], a[28]);
+    SQRADDAC(a[25], a[27]);
+    SQRADDDB;
+    SQRADD(a[26], a[26]);
+    COMBA_STORE(b[52]);
+
+    /* output 53 */
+    CARRY_FORWARD;
+    SQRADDSC(a[22], a[31]);
+    SQRADDAC(a[23], a[30]);
+    SQRADDAC(a[24], a[29]);
+    SQRADDAC(a[25], a[28]);
+    SQRADDAC(a[26], a[27]);
+    SQRADDDB;
+    COMBA_STORE(b[53]);
+
+    /* output 54 */
+    CARRY_FORWARD;
+    SQRADDSC(a[23], a[31]);
+    SQRADDAC(a[24], a[30]);
+    SQRADDAC(a[25], a[29]);
+    SQRADDAC(a[26], a[28]);
+    SQRADDDB;
+    SQRADD(a[27], a[27]);
+    COMBA_STORE(b[54]);
+
+    /* output 55 */
+    CARRY_FORWARD;
+    SQRADDSC(a[24], a[31]);
+    SQRADDAC(a[25], a[30]);
+    SQRADDAC(a[26], a[29]);
+    SQRADDAC(a[27], a[28]);
+    SQRADDDB;
+    COMBA_STORE(b[55]);
+
+    /* output 56 */
+    CARRY_FORWARD;
+    SQRADDSC(a[25], a[31]);
+    SQRADDAC(a[26], a[30]);
+    SQRADDAC(a[27], a[29]);
+    SQRADDDB;
+    SQRADD(a[28], a[28]);
+    COMBA_STORE(b[56]);
+
+    /* output 57 */
+    CARRY_FORWARD;
+    SQRADDSC(a[26], a[31]);
+    SQRADDAC(a[27], a[30]);
+    SQRADDAC(a[28], a[29]);
+    SQRADDDB;
+    COMBA_STORE(b[57]);
+
+    /* output 58 */
+    CARRY_FORWARD;
+    SQRADD2(a[27], a[31]);
+    SQRADD2(a[28], a[30]);
+    SQRADD(a[29], a[29]);
+    COMBA_STORE(b[58]);
+
+    /* output 59 */
+    CARRY_FORWARD;
+    SQRADD2(a[28], a[31]);
+    SQRADD2(a[29], a[30]);
+    COMBA_STORE(b[59]);
+
+    /* output 60 */
+    CARRY_FORWARD;
+    SQRADD2(a[29], a[31]);
+    SQRADD(a[30], a[30]);
+    COMBA_STORE(b[60]);
+
+    /* output 61 */
+    CARRY_FORWARD;
+    SQRADD2(a[30], a[31]);
+    COMBA_STORE(b[61]);
+
+    /* output 62 */
+    CARRY_FORWARD;
+    SQRADD(a[31], a[31]);
+    COMBA_STORE(b[62]);
+    COMBA_STORE2(b[63]);
+    COMBA_FINI;
+
+    B->used = 64;
+    B->sign = ZPOS;
+    memcpy(B->dp, b, 64 * sizeof(mp_digit));
+    mp_clamp(B);
+}
diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
new file mode 100644
index 000000000..cb432583f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
@@ -0,0 +1,13066 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;/* TomsFastMath, a fast ISO C bignum library.
+; * 
+; * This project is meant to fill in where LibTomMath
+; * falls short.  That is speed ;-)
+; *
+; * This project is public domain and free for all purposes.
+; * 
+; * Tom St Denis, tomstdenis@iahu.ca
+; */
+
+;/*
+; * The source file from which this assembly was derived
+; * comes from TFM v0.03, which has the above license.
+; * This source was from mp_comba_amd64.sun.s and convert to
+; * MASM code set.
+; */
+
+.CODE
+
+externdef memcpy:PROC
+
+public s_mp_mul_comba_4
+public s_mp_mul_comba_8
+public s_mp_mul_comba_16
+public s_mp_mul_comba_32
+public s_mp_sqr_comba_8
+public s_mp_sqr_comba_16
+public s_mp_sqr_comba_32
+
+
+; void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C)
+
+        ALIGN 16
+s_mp_mul_comba_4 PROC
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+        mov rdx, r8
+
+        push r12
+        push rbp
+        push rbx
+        sub rsp, 64
+        mov r9, qword ptr [16+rdi]
+        mov rbx, rdx
+        mov rdx, qword ptr [16+rsi]
+        mov rax, qword ptr [r9]
+        mov qword ptr [-64+64+rsp], rax
+        mov r8, qword ptr [8+r9]
+        mov qword ptr [-56+64+rsp], r8
+        mov rbp, qword ptr [16+r9]
+        mov qword ptr [-48+64+rsp], rbp
+        mov r12, qword ptr [24+r9]
+        mov qword ptr [-40+64+rsp], r12
+        mov rcx, qword ptr [rdx]
+        mov qword ptr [-32+64+rsp], rcx
+        mov r10, qword ptr [8+rdx]
+        mov qword ptr [-24+64+rsp], r10
+        mov r11, qword ptr [16+rdx]
+        xor r10d, r10d
+        mov r8, r10
+        mov r9, r10
+        mov rbp, r10
+        mov qword ptr [-16+64+rsp], r11
+        mov r11, qword ptr [16+rbx]
+        mov rax, qword ptr [24+rdx]
+        mov qword ptr [-8+64+rsp], rax
+        mov rax, qword ptr [-64+64+rsp]
+        mul qword ptr [-32+64+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rbp, 0
+        mov qword ptr [r11], r8
+        mov r8, rbp
+        mov rbp, r10
+        mov rax, qword ptr [-64+64+rsp]
+        mul qword ptr [-24+64+rsp]
+        add r9, rax
+        adc r8, rdx
+        adc rbp, 0
+        mov r12, rbp
+        mov rax, qword ptr [-56+64+rsp]
+        mul qword ptr [-32+64+rsp]
+        add r9, rax
+        adc r8, rdx
+        adc r12, 0
+        mov qword ptr [8+r11], r9
+        mov r9, r12
+        mov r12, r10
+        mov rax, qword ptr [-64+64+rsp]
+        mul qword ptr [-16+64+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc r12, 0
+        mov rcx, r12
+        mov rax, qword ptr [-56+64+rsp]
+        mul qword ptr [-24+64+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-48+64+rsp]
+        mul qword ptr [-32+64+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [16+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-64+64+rsp]
+        mul qword ptr [-8+64+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-56+64+rsp]
+        mul qword ptr [-16+64+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-48+64+rsp]
+        mul qword ptr [-24+64+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-40+64+rsp]
+        mul qword ptr [-32+64+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [24+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-56+64+rsp]
+        mul qword ptr [-8+64+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-48+64+rsp]
+        mul qword ptr [-16+64+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-40+64+rsp]
+        mul qword ptr [-24+64+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [32+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-48+64+rsp]
+        mul qword ptr [-8+64+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov r12, r8
+        mov rbp, r9
+        mov rax, qword ptr [-40+64+rsp]
+        mul qword ptr [-16+64+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [40+r11], rcx
+        mov r8, rbp
+        mov rcx, r12
+        mov rax, qword ptr [-40+64+rsp]
+        mul qword ptr [-8+64+rsp]
+        add r8, rax
+        adc rcx, rdx
+        adc r10, 0
+        mov qword ptr [48+r11], r8
+        mov esi, dword ptr [rsi]
+        xor esi, dword ptr [rdi]
+        test rcx, rcx
+        mov qword ptr [56+r11], rcx
+        mov dword ptr [8+rbx], 8
+        jne L9
+        ALIGN 16
+L18:
+        mov edx, dword ptr [8+rbx]
+        lea edi, dword ptr [-1+rdx]
+        test edi, edi
+        mov dword ptr [8+rbx], edi
+        je L9
+        lea r10d, dword ptr [-2+rdx]
+        cmp dword ptr [r11+r10*8], 0
+        je L18
+L9:
+        mov edx, dword ptr [8+rbx]
+        xor r11d, r11d
+        test edx, edx
+        cmovne r11d, esi
+        mov dword ptr [rbx], r11d
+        add rsp, 64
+        pop rbx
+        pop rbp
+        pop r12
+
+        pop rsi
+        pop rdi
+
+        ret
+
+s_mp_mul_comba_4 ENDP
+
+
+; void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C)
+
+        ALIGN 16
+s_mp_mul_comba_8 PROC
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+        mov rdx, r8
+
+        push r12
+        push rbp
+        push rbx
+        mov rbx, rdx
+        sub rsp, 8+128
+        mov rdx, qword ptr [16+rdi]
+        mov r8, qword ptr [rdx]
+        mov qword ptr [-120+128+rsp], r8
+        mov rbp, qword ptr [8+rdx]
+        mov qword ptr [-112+128+rsp], rbp
+        mov r9, qword ptr [16+rdx]
+        mov qword ptr [-104+128+rsp], r9
+        mov r12, qword ptr [24+rdx]
+        mov qword ptr [-96+128+rsp], r12
+        mov rcx, qword ptr [32+rdx]
+        mov qword ptr [-88+128+rsp], rcx
+        mov r10, qword ptr [40+rdx]
+        mov qword ptr [-80+128+rsp], r10
+        mov r11, qword ptr [48+rdx]
+        mov qword ptr [-72+128+rsp], r11
+        mov rax, qword ptr [56+rdx]
+        mov rdx, qword ptr [16+rsi]
+        mov qword ptr [-64+128+rsp], rax
+        mov r8, qword ptr [rdx]
+        mov qword ptr [-56+128+rsp], r8
+        mov rbp, qword ptr [8+rdx]
+        mov qword ptr [-48+128+rsp], rbp
+        mov r9, qword ptr [16+rdx]
+        mov qword ptr [-40+128+rsp], r9
+        mov r12, qword ptr [24+rdx]
+        mov qword ptr [-32+128+rsp], r12
+        mov rcx, qword ptr [32+rdx]
+        mov qword ptr [-24+128+rsp], rcx
+        mov r10, qword ptr [40+rdx]
+        mov qword ptr [-16+128+rsp], r10
+        mov r11, qword ptr [48+rdx]
+        xor r10d, r10d
+        mov r8, r10
+        mov r9, r10
+        mov rbp, r10
+        mov qword ptr [-8+128+rsp], r11
+        mov r11, qword ptr [16+rbx]
+        mov rax, qword ptr [56+rdx]
+        mov qword ptr [128+rsp], rax
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [-56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rbp, 0
+        mov qword ptr [r11], r8
+        mov r8, rbp
+        mov rbp, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [-48+128+rsp]
+        add r9, rax
+        adc r8, rdx
+        adc rbp, 0
+        mov r12, rbp
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [-56+128+rsp]
+        add r9, rax
+        adc r8, rdx
+        adc r12, 0
+        mov qword ptr [8+r11], r9
+        mov r9, r12
+        mov r12, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [-40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc r12, 0
+        mov rcx, r12
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [-48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [-56+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [16+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [-32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [-40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [-48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [-56+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [24+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [-24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [-32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [-40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [-48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [-56+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [32+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [-16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [-24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [-32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [-40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [-48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [-56+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [40+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [-8+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [-16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [-24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [-32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [-40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [-48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [-56+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [48+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [-8+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [-16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [-24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [-32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [-40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [-48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [-56+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [56+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [-8+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [-16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [-24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [-32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [-40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [-48+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [64+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [-8+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [-16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [-24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [-32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [-40+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [72+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [-8+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [-16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [-24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [-32+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [80+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [-8+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [-16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [-24+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [88+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [-8+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [-16+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [96+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov r12, r8
+        mov rbp, r9
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [-8+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [104+r11], rcx
+        mov r8, rbp
+        mov rcx, r12
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [128+rsp]
+        add r8, rax
+        adc rcx, rdx
+        adc r10, 0
+        mov qword ptr [112+r11], r8
+        mov esi, dword ptr [rsi]
+        xor esi, dword ptr [rdi]
+        test rcx, rcx
+        mov qword ptr [120+r11], rcx
+        mov dword ptr [8+rbx], 16
+        jne L35
+        ALIGN 16
+L43:
+        mov edx, dword ptr [8+rbx]
+        lea edi, dword ptr [-1+rdx]
+        test edi, edi
+        mov dword ptr [8+rbx], edi
+        je L35
+        lea eax, dword ptr [-2+rdx]
+        cmp dword ptr [r11+rax*8], 0
+        je L43
+L35:
+        mov r11d, dword ptr [8+rbx]
+        xor edx, edx
+        test r11d, r11d
+        cmovne edx, esi
+        mov dword ptr [rbx], edx
+        add rsp, 8+128
+        pop rbx
+        pop rbp
+        pop r12
+
+        pop rsi
+        pop rdi
+
+        ret
+
+s_mp_mul_comba_8 ENDP
+
+
+; void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C);
+
+        ALIGN 16
+s_mp_mul_comba_16 PROC
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+        mov rdx, r8
+
+        push r12
+        push rbp
+        push rbx
+        mov rbx, rdx
+        sub rsp, 136+128
+        mov rax, qword ptr [16+rdi]
+        mov r8, qword ptr [rax]
+        mov qword ptr [-120+128+rsp], r8
+        mov rbp, qword ptr [8+rax]
+        mov qword ptr [-112+128+rsp], rbp
+        mov r9, qword ptr [16+rax]
+        mov qword ptr [-104+128+rsp], r9
+        mov r12, qword ptr [24+rax]
+        mov qword ptr [-96+128+rsp], r12
+        mov rcx, qword ptr [32+rax]
+        mov qword ptr [-88+128+rsp], rcx
+        mov r10, qword ptr [40+rax]
+        mov qword ptr [-80+128+rsp], r10
+        mov rdx, qword ptr [48+rax]
+        mov qword ptr [-72+128+rsp], rdx
+        mov r11, qword ptr [56+rax]
+        mov qword ptr [-64+128+rsp], r11
+        mov r8, qword ptr [64+rax]
+        mov qword ptr [-56+128+rsp], r8
+        mov rbp, qword ptr [72+rax]
+        mov qword ptr [-48+128+rsp], rbp
+        mov r9, qword ptr [80+rax]
+        mov qword ptr [-40+128+rsp], r9
+        mov r12, qword ptr [88+rax]
+        mov qword ptr [-32+128+rsp], r12
+        mov rcx, qword ptr [96+rax]
+        mov qword ptr [-24+128+rsp], rcx
+        mov r10, qword ptr [104+rax]
+        mov qword ptr [-16+128+rsp], r10
+        mov rdx, qword ptr [112+rax]
+        mov qword ptr [-8+128+rsp], rdx
+        mov r11, qword ptr [120+rax]
+        mov qword ptr [128+rsp], r11
+        mov r11, qword ptr [16+rsi]
+        mov r8, qword ptr [r11]
+        mov qword ptr [8+128+rsp], r8
+        mov rbp, qword ptr [8+r11]
+        mov qword ptr [16+128+rsp], rbp
+        mov r9, qword ptr [16+r11]
+        mov qword ptr [24+128+rsp], r9
+        mov r12, qword ptr [24+r11]
+        mov qword ptr [32+128+rsp], r12
+        mov rcx, qword ptr [32+r11]
+        mov qword ptr [40+128+rsp], rcx
+        mov r10, qword ptr [40+r11]
+        mov qword ptr [48+128+rsp], r10
+        mov rdx, qword ptr [48+r11]
+        mov qword ptr [56+128+rsp], rdx
+        mov rax, qword ptr [56+r11]
+        mov qword ptr [64+128+rsp], rax
+        mov r8, qword ptr [64+r11]
+        mov qword ptr [72+128+rsp], r8
+        mov rbp, qword ptr [72+r11]
+        mov qword ptr [80+128+rsp], rbp
+        mov r9, qword ptr [80+r11]
+        mov qword ptr [88+128+rsp], r9
+        mov r12, qword ptr [88+r11]
+        mov qword ptr [96+128+rsp], r12
+        mov rcx, qword ptr [96+r11]
+        mov qword ptr [104+128+rsp], rcx
+        mov r10, qword ptr [104+r11]
+        mov qword ptr [112+128+rsp], r10
+        mov rdx, qword ptr [112+r11]
+        xor r10d, r10d
+        mov r8, r10
+        mov r9, r10
+        mov rbp, r10
+        mov qword ptr [120+128+rsp], rdx
+        mov rax, qword ptr [120+r11]
+        mov qword ptr [128+128+rsp], rax
+        mov r11, qword ptr [16+rbx]
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rbp, 0
+        mov qword ptr [r11], r8
+        mov r8, rbp
+        mov rbp, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r9, rax
+        adc r8, rdx
+        adc rbp, 0
+        mov r12, rbp
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r9, rax
+        adc r8, rdx
+        adc r12, 0
+        mov qword ptr [8+r11], r9
+        mov r9, r12
+        mov r12, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc r12, 0
+        mov rcx, r12
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [16+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [24+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [32+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [40+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [48+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [56+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [64+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [72+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [80+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [88+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [96+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [104+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [8+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [112+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-120+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [16+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [8+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [120+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-112+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [24+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [16+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [128+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-104+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [32+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [24+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [136+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-96+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [40+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [32+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [144+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-88+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [48+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [40+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [152+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-80+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [56+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [48+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [160+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-72+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [64+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [56+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [168+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-64+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [72+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [64+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [176+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-56+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [80+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [72+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [184+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-48+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [88+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [80+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [192+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-40+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [96+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [88+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [200+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-32+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [104+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [96+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [208+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-24+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [112+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbp, r9
+        mov r12, r8
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [104+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [216+r11], rcx
+        mov r9, r12
+        mov r8, rbp
+        mov rcx, r10
+        mov rax, qword ptr [-16+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [120+128+rsp]
+        add r8, rax
+        adc r9, rdx
+        adc rcx, 0
+        mov rbp, r9
+        mov r12, rcx
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [112+128+rsp]
+        add r8, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [224+r11], r8
+        mov r9, r12
+        mov rcx, rbp
+        mov r8, r10
+        mov rax, qword ptr [-8+128+rsp]
+        mul qword ptr [128+128+rsp]
+        add rcx, rax
+        adc r9, rdx
+        adc r8, 0
+        mov r12, r8
+        mov rbp, r9
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [120+128+rsp]
+        add rcx, rax
+        adc rbp, rdx
+        adc r12, 0
+        mov qword ptr [232+r11], rcx
+        mov r8, rbp
+        mov rcx, r12
+        mov rax, qword ptr [128+rsp]
+        mul qword ptr [128+128+rsp]
+        add r8, rax
+        adc rcx, rdx
+        adc r10, 0
+        mov qword ptr [240+r11], r8
+        mov esi, dword ptr [rsi]
+        xor esi, dword ptr [rdi]
+        test rcx, rcx
+        mov qword ptr [248+r11], rcx
+        mov dword ptr [8+rbx], 32
+        jne L76
+        ALIGN 16
+L84:
+        mov edx, dword ptr [8+rbx]
+        lea edi, dword ptr [-1+rdx]
+        test edi, edi
+        mov dword ptr [8+rbx], edi
+        je L76
+        lea eax, dword ptr [-2+rdx]
+        cmp dword ptr [r11+rax*8], 0
+        je L84
+L76:
+        mov edx, dword ptr [8+rbx]
+        xor r11d, r11d
+        test edx, edx
+        cmovne r11d, esi
+        mov dword ptr [rbx], r11d
+        add rsp, 136+128
+        pop rbx
+        pop rbp
+        pop r12
+
+        pop rsi
+        pop rdi
+
+        ret
+
+s_mp_mul_comba_16 ENDP
+
+; void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C)
+
+
+        ALIGN 16
+s_mp_mul_comba_32 PROC ; a "FRAME" function
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+        mov rdx, r8
+
+        push rbp
+        mov rbp, rsp
+        push r13
+        mov r13, rdx
+;        mov edx, 256
+        mov r8d, 256
+        push r12
+        mov r12, rsi
+        push rbx
+        mov rbx, rdi
+        sub rsp, 520+32			; +32 for "home" storage
+;        mov rsi, qword ptr [16+rdi]
+;        lea rdi, qword ptr [-544+rbp]
+        mov rdx, qword ptr [16+rdi]
+        lea rcx, qword ptr [-544+rbp]
+        call memcpy
+;        mov rsi, qword ptr [16+r12]
+;        lea rdi, qword ptr [-288+rbp]
+;        mov edx, 256
+        mov rdx, qword ptr [16+r12]
+        lea rcx, qword ptr [-288+rbp]
+        mov r8d, 256
+        call memcpy
+        mov r9, qword ptr [16+r13]
+        xor r8d, r8d
+        mov rsi, r8
+        mov rdi, r8
+        mov r10, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov qword ptr [r9], rsi
+        mov rsi, r10
+        mov r10, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-280+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc r10, 0
+        mov r11, r10
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-288+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc r11, 0
+        mov qword ptr [8+r9], rdi
+        mov rdi, r11
+        mov r11, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc r11, 0
+        mov rcx, r11
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [16+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [24+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [32+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [40+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [48+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [56+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [64+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [72+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [80+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [88+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [96+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [104+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [112+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [120+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [128+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [136+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [144+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [152+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [160+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [168+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [176+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [184+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [192+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [200+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [208+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [216+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [224+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [232+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-288+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [240+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-544+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-280+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-288+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [248+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-536+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-272+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-280+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [256+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-528+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-264+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-272+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [264+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-520+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-256+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-264+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [272+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-512+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-248+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-256+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [280+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-504+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-240+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-248+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [288+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-496+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-232+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-240+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [296+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-488+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-224+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-232+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [304+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-480+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-216+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-224+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [312+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-472+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-184+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-192+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-200+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-208+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-216+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [320+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-464+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-192+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-200+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-208+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [328+r9], rcx
+        mov rdi, r11
+        mov r11, r10
+        mov r10, r8
+        mov rax, qword ptr [-456+rbp]
+        mul qword ptr [-40+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-48+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-56+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-64+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-72+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-80+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-88+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-96+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-104+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-112+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-120+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-128+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-136+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-144+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-152+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-160+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-168+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-176+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-184+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-192+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-200+rbp]
+        add r11, rax
+        adc rdi, rdx
+        adc r10, 0
+        mov qword ptr [336+r9], r11
+        mov rsi, r10
+        mov r10, r8
+        mov rax, qword ptr [-448+rbp]
+        mul qword ptr [-40+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc r10, 0
+        mov rcx, r10
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-48+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-56+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-64+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-72+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-80+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-88+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-96+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-104+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-112+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-120+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-128+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-136+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-144+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-152+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-160+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-168+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-176+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-184+rbp]
+        add rdi, rax
+        adc rsi, rdx
+        adc rcx, 0
+        mov r11, rsi
+        mov r10, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-192+rbp]
+        add rdi, rax
+        adc r11, rdx
+        adc r10, 0
+        mov qword ptr [344+r9], rdi
+        mov rcx, r11
+        mov rdi, r10
+        mov r11, r8
+        mov rax, qword ptr [-440+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc r11, 0
+        mov rsi, r11
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-176+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-184+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [352+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-432+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-168+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-176+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [360+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-424+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-160+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-168+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [368+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-416+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-152+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-160+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [376+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-408+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-144+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-152+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [384+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-400+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-136+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-144+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [392+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-392+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-128+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-136+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [400+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-384+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-120+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-128+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [408+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-376+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-112+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-120+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [416+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-368+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-104+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-112+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [424+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-360+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-96+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-104+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [432+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-352+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-88+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-96+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [440+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-344+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-80+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-88+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [448+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-336+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-72+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-80+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [456+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-328+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-64+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-72+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [464+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-320+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-56+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r10, rdi
+        mov r11, rcx
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-64+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [472+r9], rsi
+        mov rdi, r11
+        mov rcx, r10
+        mov rsi, r8
+        mov rax, qword ptr [-312+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-48+rbp]
+        add rcx, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r10, rdi
+        mov r11, rsi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-56+rbp]
+        add rcx, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [480+r9], rcx
+        mov rdi, r11
+        mov rsi, r10
+        mov rcx, r8
+        mov rax, qword ptr [-304+rbp]
+        mul qword ptr [-40+rbp]
+        add rsi, rax
+        adc rdi, rdx
+        adc rcx, 0
+        mov r11, rcx
+        mov r10, rdi
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-48+rbp]
+        add rsi, rax
+        adc r10, rdx
+        adc r11, 0
+        mov qword ptr [488+r9], rsi
+        mov rcx, r10
+        mov rsi, r11
+        mov rax, qword ptr [-296+rbp]
+        mul qword ptr [-40+rbp]
+        add rcx, rax
+        adc rsi, rdx
+        adc r8, 0
+        mov qword ptr [496+r9], rcx
+        mov ecx, dword ptr [r12]
+        xor ecx, dword ptr [rbx]
+        test rsi, rsi
+        mov qword ptr [504+r9], rsi
+        mov dword ptr [8+r13], 64
+        jne L149
+        ALIGN 16
+L157:
+        mov edx, dword ptr [8+r13]
+        lea ebx, dword ptr [-1+rdx]
+        test ebx, ebx
+        mov dword ptr [8+r13], ebx
+        je L149
+        lea r12d, dword ptr [-2+rdx]
+        cmp dword ptr [r9+r12*8], 0
+        je L157
+L149:
+        mov r9d, dword ptr [8+r13]
+        xor edx, edx
+        test r9d, r9d
+        cmovne edx, ecx
+        mov dword ptr [r13], edx
+        add rsp, 520+32			; +32 for "home" storage
+        pop rbx
+        pop r12
+        pop r13
+        pop rbp
+        pop rsi
+        pop rdi
+
+        ret
+
+s_mp_mul_comba_32 ENDP
+
+
+; void s_mp_sqr_comba_4(const mp_int *A, mp_int *B);
+
+        ALIGN 16
+s_mp_sqr_comba_4 PROC
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+
+        push rbp
+        push rbx
+        sub rsp, 80
+        mov r11, rsi
+        xor esi, esi
+        mov r10, rsi
+        mov rbp, rsi
+        mov r8, rsi
+        mov rbx, rsi
+        mov rcx, qword ptr [16+rdi]
+        mov rdi, rsi
+        mov rax, qword ptr [rcx]
+        mul rax
+        add r10, rax
+        adc rbx, rdx
+        adc rdi, 0
+        mov qword ptr [-72+80+rsp], r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [8+rcx]
+        add rbx, rax
+        adc rdi, rdx
+        adc rbp, 0
+        add rbx, rax
+        adc rdi, rdx
+        adc rbp, 0
+        mov qword ptr [-64+80+rsp], rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [16+rcx]
+        add rdi, rax
+        adc rbp, rdx
+        adc r8, 0
+        add rdi, rax
+        adc rbp, rdx
+        adc r8, 0
+        mov rbx, rbp
+        mov rbp, r8
+        mov rax, qword ptr [8+rcx]
+        mul rax
+        add rdi, rax
+        adc rbx, rdx
+        adc rbp, 0
+        mov qword ptr [-56+80+rsp], rdi
+        mov r9, rbp
+        mov r8, rbx
+        mov rdi, rsi
+        mov rax, qword ptr [rcx]
+        mul qword ptr [24+rcx]
+        add r8, rax
+        adc r9, rdx
+        adc rdi, 0
+        add r8, rax
+        adc r9, rdx
+        adc rdi, 0
+        mov rbx, r9
+        mov rbp, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [16+rcx]
+        add r8, rax
+        adc rbx, rdx
+        adc rbp, 0
+        add r8, rax
+        adc rbx, rdx
+        adc rbp, 0
+        mov qword ptr [-48+80+rsp], r8
+        mov r9, rbp
+        mov rdi, rbx
+        mov r8, rsi
+        mov dword ptr [8+r11], 8
+        mov dword ptr [r11], 0
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [24+rcx]
+        add rdi, rax
+        adc r9, rdx
+        adc r8, 0
+        add rdi, rax
+        adc r9, rdx
+        adc r8, 0
+        mov rbx, r9
+        mov rbp, r8
+        mov rax, qword ptr [16+rcx]
+        mul rax
+        add rdi, rax
+        adc rbx, rdx
+        adc rbp, 0
+        mov rax, rbp
+        mov qword ptr [-40+80+rsp], rdi
+        mov rbp, rbx
+        mov rdi, rax
+        mov rbx, rsi
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [24+rcx]
+        add rbp, rax
+        adc rdi, rdx
+        adc rbx, 0
+        add rbp, rax
+        adc rdi, rdx
+        adc rbx, 0
+        mov qword ptr [-32+80+rsp], rbp
+        mov r9, rbx
+        mov rax, qword ptr [24+rcx]
+        mul rax
+        add rdi, rax
+        adc r9, rdx
+        adc rsi, 0
+        mov rdx, qword ptr [16+r11]
+        mov qword ptr [-24+80+rsp], rdi
+        mov qword ptr [-16+80+rsp], r9
+        mov qword ptr [rdx], r10
+        mov r8, qword ptr [-64+80+rsp]
+        mov qword ptr [8+rdx], r8
+        mov rbp, qword ptr [-56+80+rsp]
+        mov qword ptr [16+rdx], rbp
+        mov rdi, qword ptr [-48+80+rsp]
+        mov qword ptr [24+rdx], rdi
+        mov rsi, qword ptr [-40+80+rsp]
+        mov qword ptr [32+rdx], rsi
+        mov rbx, qword ptr [-32+80+rsp]
+        mov qword ptr [40+rdx], rbx
+        mov rcx, qword ptr [-24+80+rsp]
+        mov qword ptr [48+rdx], rcx
+        mov rax, qword ptr [-16+80+rsp]
+        mov qword ptr [56+rdx], rax
+        mov edx, dword ptr [8+r11]
+        test edx, edx
+        je L168
+        lea ecx, dword ptr [-1+rdx]
+        mov rsi, qword ptr [16+r11]
+        mov r10d, ecx
+        cmp dword ptr [rsi+r10*8], 0
+        jne L166
+        mov edx, ecx
+        ALIGN 16
+L167:
+        test edx, edx
+        mov ecx, edx
+        je L171
+        dec edx
+        mov eax, edx
+        cmp dword ptr [rsi+rax*8], 0
+        je L167
+        mov dword ptr [8+r11], ecx
+        mov edx, ecx
+L166:
+        test edx, edx
+        je L168
+        mov eax, dword ptr [r11]
+        jmp L169
+
+L171:
+        mov dword ptr [8+r11], edx
+L168:
+        xor eax, eax
+L169:
+        add rsp, 80
+        pop rbx
+        pop rbp
+        mov dword ptr [r11], eax
+
+        pop rsi
+        pop rdi
+
+        ret
+
+s_mp_sqr_comba_4 ENDP
+
+
+; void s_mp_sqr_comba_8(const mp_int *A, mp_int *B);
+
+        ALIGN 16
+s_mp_sqr_comba_8 PROC
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+        mov rdx, r8
+        mov rcx, r9
+
+        push r14
+        xor r9d, r9d
+        mov r14, r9
+        mov r10, r9
+        push r13
+        mov r13, r9
+        push r12
+        mov r12, r9
+        push rbp
+        mov rbp, rsi
+        mov rsi, r9
+        push rbx
+        mov rbx, r9
+        sub rsp, 8+128
+        mov rcx, qword ptr [16+rdi]
+        mov rax, qword ptr [rcx]
+        mul rax
+        add r14, rax
+        adc rbx, rdx
+        adc r12, 0
+        mov qword ptr [-120+128+rsp], r14
+        mov rax, qword ptr [rcx]
+        mul qword ptr [8+rcx]
+        add rbx, rax
+        adc r12, rdx
+        adc r10, 0
+        add rbx, rax
+        adc r12, rdx
+        adc r10, 0
+        mov qword ptr [-112+128+rsp], rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [16+rcx]
+        add r12, rax
+        adc r10, rdx
+        adc r13, 0
+        add r12, rax
+        adc r10, rdx
+        adc r13, 0
+        mov rbx, r10
+        mov r10, r13
+        mov r13, r9
+        mov rax, qword ptr [8+rcx]
+        mul rax
+        add r12, rax
+        adc rbx, rdx
+        adc r10, 0
+        mov qword ptr [-104+128+rsp], r12
+        mov rdi, r10
+        mov r11, rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [24+rcx]
+        add r11, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r11, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, rdi
+        mov r10, rsi
+        mov rdi, r9
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [16+rcx]
+        add r11, rax
+        adc rbx, rdx
+        adc r10, 0
+        add r11, rax
+        adc rbx, rdx
+        adc r10, 0
+        mov rsi, r9
+        mov qword ptr [-96+128+rsp], r11
+        mov r8, r10
+        mov r12, rbx
+        mov r11, r9
+        mov rax, qword ptr [rcx]
+        mul qword ptr [32+rcx]
+        add r12, rax
+        adc r8, rdx
+        adc r13, 0
+        add r12, rax
+        adc r8, rdx
+        adc r13, 0
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [24+rcx]
+        add r12, rax
+        adc r8, rdx
+        adc r13, 0
+        add r12, rax
+        adc r8, rdx
+        adc r13, 0
+        mov rbx, r8
+        mov r10, r13
+        mov r8, r9
+        mov rax, qword ptr [16+rcx]
+        mul rax
+        add r12, rax
+        adc rbx, rdx
+        adc r10, 0
+        mov qword ptr [-88+128+rsp], r12
+        mov rax, qword ptr [rcx]
+        mul qword ptr [40+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [24+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add rbx, r8
+        adc r10, rdi
+        adc r11, rsi
+        add rbx, r8
+        adc r10, rdi
+        adc r11, rsi
+        mov qword ptr [-80+128+rsp], rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [48+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc r11, r13
+        adc rax, r12
+        add r10, r8
+        adc r11, r13
+        adc rax, r12
+        mov rdx, rax
+        mov rbx, r11
+        mov rdi, r13
+        mov r11, rdx
+        mov rsi, r12
+        mov rax, qword ptr [24+rcx]
+        mul rax
+        add r10, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-72+128+rsp], r10
+        mov r10, r11
+        mov rax, qword ptr [rcx]
+        mul qword ptr [56+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        add rbx, r8
+        adc r10, rdi
+        adc rax, rsi
+        add rbx, r8
+        adc r10, rdi
+        adc rax, rsi
+        mov qword ptr [-64+128+rsp], rbx
+        mov r11, rax
+        mov rbx, r9
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [56+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc r11, r13
+        adc rbx, r12
+        add r10, r8
+        adc r11, r13
+        adc rbx, r12
+        mov rsi, rbx
+        mov rdi, r13
+        mov rbx, r11
+        mov r13, r12
+        mov r11, rsi
+        mov rax, qword ptr [32+rcx]
+        mul rax
+        add r10, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-56+128+rsp], r10
+        mov r10, r9
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [56+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor r13, r13
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc r13, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc r13, 0
+        mov r12, rdi
+        mov rax, r13
+        add rbx, r8
+        adc r11, r12
+        adc r10, rax
+        add rbx, r8
+        adc r11, r12
+        adc r10, rax
+        mov qword ptr [-48+128+rsp], rbx
+        mov r12, r11
+        mov rsi, r10
+        mov rbx, r9
+        mov r11, r9
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [56+rcx]
+        add r12, rax
+        adc rsi, rdx
+        adc rbx, 0
+        add r12, rax
+        adc rsi, rdx
+        adc rbx, 0
+        mov r13, rbx
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [48+rcx]
+        add r12, rax
+        adc rsi, rdx
+        adc r13, 0
+        add r12, rax
+        adc rsi, rdx
+        adc r13, 0
+        mov r10, rsi
+        mov rbx, r13
+        mov r13, r9
+        mov rax, qword ptr [40+rcx]
+        mul rax
+        add r12, rax
+        adc r10, rdx
+        adc rbx, 0
+        mov qword ptr [-40+128+rsp], r12
+        mov r8, rbx
+        mov rdi, r10
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [56+rcx]
+        add rdi, rax
+        adc r8, rdx
+        adc r11, 0
+        add rdi, rax
+        adc r8, rdx
+        adc r11, 0
+        mov r10, r8
+        mov rbx, r11
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [48+rcx]
+        add rdi, rax
+        adc r10, rdx
+        adc rbx, 0
+        add rdi, rax
+        adc r10, rdx
+        adc rbx, 0
+        mov qword ptr [-32+128+rsp], rdi
+        mov rsi, rbx
+        mov r12, r10
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [56+rcx]
+        add r12, rax
+        adc rsi, rdx
+        adc r13, 0
+        add r12, rax
+        adc rsi, rdx
+        adc r13, 0
+        mov r10, rsi
+        mov rbx, r13
+        mov rax, qword ptr [48+rcx]
+        mul rax
+        add r12, rax
+        adc r10, rdx
+        adc rbx, 0
+        mov qword ptr [-24+128+rsp], r12
+        mov rdi, r10
+        mov rsi, rbx
+        mov r10, r9
+        mov dword ptr [8+rbp], 16
+        mov dword ptr [rbp], 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [56+rcx]
+        add rdi, rax
+        adc rsi, rdx
+        adc r10, 0
+        add rdi, rax
+        adc rsi, rdx
+        adc r10, 0
+        mov qword ptr [-16+128+rsp], rdi
+        mov r8, r10
+        mov rax, qword ptr [56+rcx]
+        mul rax
+        add rsi, rax
+        adc r8, rdx
+        adc r9, 0
+        mov rax, qword ptr [16+rbp]
+        mov qword ptr [-8+128+rsp], rsi
+        mov qword ptr [128+rsp], r8
+        mov qword ptr [rax], r14
+        mov rbx, qword ptr [-112+128+rsp]
+        mov qword ptr [8+rax], rbx
+        mov rcx, qword ptr [-104+128+rsp]
+        mov qword ptr [16+rax], rcx
+        mov rdx, qword ptr [-96+128+rsp]
+        mov qword ptr [24+rax], rdx
+        mov r14, qword ptr [-88+128+rsp]
+        mov qword ptr [32+rax], r14
+        mov r13, qword ptr [-80+128+rsp]
+        mov qword ptr [40+rax], r13
+        mov r12, qword ptr [-72+128+rsp]
+        mov qword ptr [48+rax], r12
+        mov r11, qword ptr [-64+128+rsp]
+        mov qword ptr [56+rax], r11
+        mov r10, qword ptr [-56+128+rsp]
+        mov qword ptr [64+rax], r10
+        mov r9, qword ptr [-48+128+rsp]
+        mov qword ptr [72+rax], r9
+        mov r8, qword ptr [-40+128+rsp]
+        mov qword ptr [80+rax], r8
+        mov rdi, qword ptr [-32+128+rsp]
+        mov qword ptr [88+rax], rdi
+        mov rsi, qword ptr [-24+128+rsp]
+        mov qword ptr [96+rax], rsi
+        mov rbx, qword ptr [-16+128+rsp]
+        mov qword ptr [104+rax], rbx
+        mov rcx, qword ptr [-8+128+rsp]
+        mov qword ptr [112+rax], rcx
+        mov rdx, qword ptr [128+rsp]
+        mov qword ptr [120+rax], rdx
+        mov edx, dword ptr [8+rbp]
+        test edx, edx
+        je L192
+        lea ecx, dword ptr [-1+rdx]
+        mov rsi, qword ptr [16+rbp]
+        mov r14d, ecx
+        cmp dword ptr [rsi+r14*8], 0
+        jne L190
+        mov edx, ecx
+        ALIGN 16
+L191:
+        test edx, edx
+        mov ecx, edx
+        je L195
+        dec edx
+        mov r9d, edx
+        cmp dword ptr [rsi+r9*8], 0
+        je L191
+        mov dword ptr [8+rbp], ecx
+        mov edx, ecx
+L190:
+        test edx, edx
+        je L192
+        mov eax, dword ptr [rbp]
+        jmp L193
+
+L195:
+        mov dword ptr [8+rbp], edx
+L192:
+        xor eax, eax
+L193:
+        mov dword ptr [rbp], eax
+        add rsp, 8+128
+        pop rbx
+        pop rbp
+        pop r12
+        pop r13
+        pop r14
+
+        pop rsi
+        pop rdi
+
+        ret
+
+s_mp_sqr_comba_8 ENDP
+
+
+; void s_mp_sqr_comba_16(const mp_int *A, mp_int *B)
+
+        ALIGN 16
+s_mp_sqr_comba_16 PROC ; A "FRAME" function
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+
+        push rbp
+        xor r9d, r9d
+        mov r8, r9
+        mov r11, r9
+        mov rbp, rsp
+        push r14
+        mov r14, rsi
+        mov rsi, r9
+        push r13
+        mov r13, r9
+        push r12
+        mov r12, r9
+        push rbx
+        mov rbx, r9
+        sub rsp, 256+32			; +32 for "home" storage
+        mov rcx, qword ptr [16+rdi]
+        mov rax, qword ptr [rcx]
+        mul rax
+        add r8, rax
+        adc rbx, rdx
+        adc rsi, 0
+        mov qword ptr [-288+rbp], r8
+        mov rax, qword ptr [rcx]
+        mul qword ptr [8+rcx]
+        add rbx, rax
+        adc rsi, rdx
+        adc r12, 0
+        add rbx, rax
+        adc rsi, rdx
+        adc r12, 0
+        mov qword ptr [-280+rbp], rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [16+rcx]
+        add rsi, rax
+        adc r12, rdx
+        adc r13, 0
+        add rsi, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rbx, r12
+        mov r10, r13
+        mov rax, qword ptr [8+rcx]
+        mul rax
+        add rsi, rax
+        adc rbx, rdx
+        adc r10, 0
+        mov qword ptr [-272+rbp], rsi
+        mov rdi, r10
+        mov rsi, r9
+        mov r10, rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [24+rcx]
+        add r10, rax
+        adc rdi, rdx
+        adc r11, 0
+        add r10, rax
+        adc rdi, rdx
+        adc r11, 0
+        mov r12, rdi
+        mov rbx, r11
+        mov rdi, r9
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [16+rcx]
+        add r10, rax
+        adc r12, rdx
+        adc rbx, 0
+        add r10, rax
+        adc r12, rdx
+        adc rbx, 0
+        mov r11, r9
+        mov qword ptr [-264+rbp], r10
+        mov r8, rbx
+        mov r13, r12
+        mov r12, r9
+        mov rax, qword ptr [rcx]
+        mul qword ptr [32+rcx]
+        add r13, rax
+        adc r8, rdx
+        adc r12, 0
+        add r13, rax
+        adc r8, rdx
+        adc r12, 0
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [24+rcx]
+        add r13, rax
+        adc r8, rdx
+        adc r12, 0
+        add r13, rax
+        adc r8, rdx
+        adc r12, 0
+        mov rbx, r8
+        mov r10, r12
+        mov r8, r9
+        mov rax, qword ptr [16+rcx]
+        mul rax
+        add r13, rax
+        adc rbx, rdx
+        adc r10, 0
+        mov qword ptr [-256+rbp], r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [40+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [24+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add rbx, r8
+        adc r10, rdi
+        adc r11, rsi
+        add rbx, r8
+        adc r10, rdi
+        adc r11, rsi
+        mov qword ptr [-248+rbp], rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [48+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc r11, r13
+        adc rax, r12
+        add r10, r8
+        adc r11, r13
+        adc rax, r12
+        mov rdx, rax
+        mov rbx, r11
+        mov rdi, r13
+        mov r11, rdx
+        mov rsi, r12
+        mov rax, qword ptr [24+rcx]
+        mul rax
+        add r10, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-240+rbp], r10
+        mov r10, r11
+        mov rax, qword ptr [rcx]
+        mul qword ptr [56+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rdx, r9
+        add rbx, r8
+        adc r10, rdi
+        adc rdx, rsi
+        add rbx, r8
+        adc r10, rdi
+        adc rdx, rsi
+        mov r11, rdx
+        mov qword ptr [-232+rbp], rbx
+        mov rbx, r9
+        mov rax, qword ptr [rcx]
+        mul qword ptr [64+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc r11, r13
+        adc rbx, r12
+        add r10, r8
+        adc r11, r13
+        adc rbx, r12
+        mov rax, qword ptr [32+rcx]
+        mul rax
+        add r10, rax
+        adc r11, rdx
+        adc rbx, 0
+        mov rdi, r13
+        mov qword ptr [-224+rbp], r10
+        mov rsi, r12
+        mov r10, rbx
+        mov r12, r9
+        mov rax, qword ptr [rcx]
+        mul qword ptr [72+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r11, r8
+        adc r10, rdi
+        adc r12, rsi
+        add r11, r8
+        adc r10, rdi
+        adc r12, rsi
+        mov qword ptr [-216+rbp], r11
+        mov rbx, r12
+        mov rax, qword ptr [rcx]
+        mul qword ptr [80+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc rbx, r13
+        adc rax, r12
+        add r10, r8
+        adc rbx, r13
+        adc rax, r12
+        mov rdx, rax
+        mov r11, rbx
+        mov rdi, r13
+        mov rbx, rdx
+        mov rsi, r12
+        mov rax, qword ptr [40+rcx]
+        mul rax
+        add r10, rax
+        adc r11, rdx
+        adc rbx, 0
+        mov qword ptr [-208+rbp], r10
+        mov r10, rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [88+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rdx, r9
+        add r11, r8
+        adc r10, rdi
+        adc rdx, rsi
+        add r11, r8
+        adc r10, rdi
+        adc rdx, rsi
+        mov r13, rdx
+        mov qword ptr [-200+rbp], r11
+        mov r12, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [96+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        mov rdx, rdi
+        mov r11, rsi
+        add r10, r8
+        adc r12, rdx
+        adc rax, r11
+        add r10, r8
+        adc r12, rdx
+        adc rax, r11
+        mov rbx, rdx
+        mov r13, rax
+        mov rsi, r11
+        mov rax, qword ptr [48+rcx]
+        mul rax
+        add r10, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rdi, rbx
+        mov qword ptr [-192+rbp], r10
+        mov r10, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [104+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r9
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r12, r8
+        adc r10, rdi
+        adc r13, rsi
+        add r12, r8
+        adc r10, rdi
+        adc r13, rsi
+        mov qword ptr [-184+rbp], r12
+        mov r12, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [112+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        mov rbx, rdi
+        mov rdx, rsi
+        add r10, r8
+        adc r12, rbx
+        adc rax, rdx
+        add r10, r8
+        adc r12, rbx
+        adc rax, rdx
+        mov r11, rdx
+        mov r13, rax
+        mov rdi, rbx
+        mov rax, qword ptr [56+rcx]
+        mul rax
+        add r10, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-176+rbp], r10
+        mov r10, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r9
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r12, r8
+        adc r10, rdi
+        adc r13, rsi
+        add r12, r8
+        adc r10, rdi
+        adc r13, rsi
+        mov qword ptr [-168+rbp], r12
+        mov r12, r13
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        mov rbx, rdi
+        mov rdx, rsi
+        add r10, r8
+        adc r12, rbx
+        adc rax, rdx
+        add r10, r8
+        adc r12, rbx
+        adc rax, rdx
+        mov r11, rdx
+        mov r13, rax
+        mov rdi, rbx
+        mov rax, qword ptr [64+rcx]
+        mul rax
+        add r10, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-160+rbp], r10
+        mov r11, r9
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r10, r13
+        mov rbx, r9
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r12, r8
+        adc r10, rdi
+        adc r11, rsi
+        add r12, r8
+        adc r10, rdi
+        adc r11, rsi
+        mov qword ptr [-152+rbp], r12
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc r11, r13
+        adc rbx, r12
+        add r10, r8
+        adc r11, r13
+        adc rbx, r12
+        mov rdx, rbx
+        mov rdi, r13
+        mov rbx, r11
+        mov rsi, r12
+        mov r11, rdx
+        mov r12, r9
+        mov rax, qword ptr [72+rcx]
+        mul rax
+        add r10, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-144+rbp], r10
+        mov r10, r11
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add rbx, r8
+        adc r10, rdi
+        adc r12, rsi
+        add rbx, r8
+        adc r10, rdi
+        adc r12, rsi
+        mov qword ptr [-136+rbp], rbx
+        mov r11, r12
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc r11, r13
+        adc rax, r12
+        add r10, r8
+        adc r11, r13
+        adc rax, r12
+        mov rdx, rax
+        mov rbx, r11
+        mov rdi, r13
+        mov r11, rdx
+        mov rsi, r12
+        mov rax, qword ptr [80+rcx]
+        mul rax
+        add r10, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-128+rbp], r10
+        mov r10, r11
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rdx, r9
+        add rbx, r8
+        adc r10, rdi
+        adc rdx, rsi
+        add rbx, r8
+        adc r10, rdi
+        adc rdx, rsi
+        mov qword ptr [-120+rbp], rbx
+        mov r11, rdx
+        mov rbx, r9
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc r11, r13
+        adc rbx, r12
+        add r10, r8
+        adc r11, r13
+        adc rbx, r12
+        mov rdx, rbx
+        mov rdi, r13
+        mov rbx, r11
+        mov rsi, r12
+        mov r11, rdx
+        mov r12, r9
+        mov rax, qword ptr [88+rcx]
+        mul rax
+        add r10, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-112+rbp], r10
+        mov r10, r11
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add rbx, r8
+        adc r10, rdi
+        adc r12, rsi
+        add rbx, r8
+        adc r10, rdi
+        adc r12, rsi
+        mov qword ptr [-104+rbp], rbx
+        mov r11, r12
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r9
+        mov r13, rdi
+        mov r12, rsi
+        add r10, r8
+        adc r11, r13
+        adc rax, r12
+        add r10, r8
+        adc r11, r13
+        adc rax, r12
+        mov rdx, rax
+        mov rbx, r11
+        mov rdi, r13
+        mov r11, rdx
+        mov rsi, r12
+        mov rax, qword ptr [96+rcx]
+        mul rax
+        add r10, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-96+rbp], r10
+        mov r10, r9
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r12, rdi
+        mov rax, rsi
+        mov rsi, r9
+        add rbx, r8
+        adc r11, r12
+        adc r10, rax
+        add rbx, r8
+        adc r11, r12
+        adc r10, rax
+        mov r12, r9
+        mov qword ptr [-88+rbp], rbx
+        mov r13, r11
+        mov r11, r10
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [120+rcx]
+        add r13, rax
+        adc r11, rdx
+        adc r12, 0
+        add r13, rax
+        adc r11, rdx
+        adc r12, 0
+        mov rdi, r12
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [112+rcx]
+        add r13, rax
+        adc r11, rdx
+        adc rdi, 0
+        add r13, rax
+        adc r11, rdx
+        adc rdi, 0
+        mov rbx, r11
+        mov r10, rdi
+        mov r11, r9
+        mov rax, qword ptr [104+rcx]
+        mul rax
+        add r13, rax
+        adc rbx, rdx
+        adc r10, 0
+        mov qword ptr [-80+rbp], r13
+        mov r8, r10
+        mov r10, rbx
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [120+rcx]
+        add r10, rax
+        adc r8, rdx
+        adc rsi, 0
+        add r10, rax
+        adc r8, rdx
+        adc rsi, 0
+        mov r12, r8
+        mov rbx, rsi
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [112+rcx]
+        add r10, rax
+        adc r12, rdx
+        adc rbx, 0
+        add r10, rax
+        adc r12, rdx
+        adc rbx, 0
+        mov qword ptr [-72+rbp], r10
+        mov r13, rbx
+        mov rbx, r12
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [120+rcx]
+        add rbx, rax
+        adc r13, rdx
+        adc r11, 0
+        add rbx, rax
+        adc r13, rdx
+        adc r11, 0
+        mov r12, r11
+        mov r10, r13
+        mov rax, qword ptr [112+rcx]
+        mul rax
+        add rbx, rax
+        adc r10, rdx
+        adc r12, 0
+        mov qword ptr [-64+rbp], rbx
+        mov rdi, r10
+        mov rbx, r9
+        mov rsi, r12
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [120+rcx]
+        add rdi, rax
+        adc rsi, rdx
+        adc rbx, 0
+        add rdi, rax
+        adc rsi, rdx
+        adc rbx, 0
+        mov qword ptr [-56+rbp], rdi
+        mov r8, rbx
+        mov rax, qword ptr [120+rcx]
+        mul rax
+        add rsi, rax
+        adc r8, rdx
+        adc r9, 0
+        mov qword ptr [-48+rbp], rsi
+        mov qword ptr [-40+rbp], r8
+        mov dword ptr [8+r14], 32
+        mov dword ptr [r14], 0
+;        mov rdi, qword ptr [16+r14]
+;        lea rsi, qword ptr [-288+rbp]
+;        mov edx, 256
+        mov rcx, qword ptr [16+r14]
+        lea rdx, qword ptr [-288+rbp]
+        mov r8d, 256
+        call memcpy
+        mov edx, dword ptr [8+r14]
+        test edx, edx
+        je L232
+        lea ecx, dword ptr [-1+rdx]
+        mov rsi, qword ptr [16+r14]
+        mov r9d, ecx
+        cmp dword ptr [rsi+r9*8], 0
+        jne L230
+        mov edx, ecx
+        ALIGN 16
+L231:
+        test edx, edx
+        mov ecx, edx
+        je L235
+        dec edx
+        mov eax, edx
+        cmp dword ptr [rsi+rax*8], 0
+        je L231
+        mov dword ptr [8+r14], ecx
+        mov edx, ecx
+L230:
+        test edx, edx
+        je L232
+        mov eax, dword ptr [r14]
+        jmp L233
+
+L235:
+        mov dword ptr [8+r14], edx
+L232:
+        xor eax, eax
+L233:
+        mov dword ptr [r14], eax
+        add rsp, 256+32			; +32 for "home" storage
+        pop rbx
+        pop r12
+        pop r13
+        pop r14
+        pop rbp
+        pop rsi
+        pop rdi
+
+        ret
+
+s_mp_sqr_comba_16 ENDP
+
+
+; void s_mp_sqr_comba_32(const mp_int *A, mp_int *B);
+
+        ALIGN 16
+s_mp_sqr_comba_32 PROC ; A "FRAME" function
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+
+        push rbp
+        xor r10d, r10d
+        mov r8, r10
+        mov r11, r10
+        mov rbp, rsp
+        push r14
+        mov r14, rsi
+        mov rsi, r10
+        push r13
+        mov r13, r10
+        push r12
+        mov r12, r10
+        push rbx
+        mov rbx, r10
+        sub rsp, 512+32			; +32 for "home" storage
+        mov rcx, qword ptr [16+rdi]
+        mov rax, qword ptr [rcx]
+        mul rax
+        add r8, rax
+        adc rbx, rdx
+        adc rsi, 0
+        mov qword ptr [-544+rbp], r8
+        mov rax, qword ptr [rcx]
+        mul qword ptr [8+rcx]
+        add rbx, rax
+        adc rsi, rdx
+        adc r12, 0
+        add rbx, rax
+        adc rsi, rdx
+        adc r12, 0
+        mov qword ptr [-536+rbp], rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [16+rcx]
+        add rsi, rax
+        adc r12, rdx
+        adc r13, 0
+        add rsi, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rbx, r12
+        mov r9, r13
+        mov rax, qword ptr [8+rcx]
+        mul rax
+        add rsi, rax
+        adc rbx, rdx
+        adc r9, 0
+        mov qword ptr [-528+rbp], rsi
+        mov rdi, r9
+        mov rsi, r10
+        mov r9, rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [24+rcx]
+        add r9, rax
+        adc rdi, rdx
+        adc r11, 0
+        add r9, rax
+        adc rdi, rdx
+        adc r11, 0
+        mov r12, rdi
+        mov r13, r11
+        mov rdi, r10
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [16+rcx]
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov r11, r10
+        mov qword ptr [-520+rbp], r9
+        mov r8, r13
+        mov r13, r12
+        mov r12, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [32+rcx]
+        add r13, rax
+        adc r8, rdx
+        adc r12, 0
+        add r13, rax
+        adc r8, rdx
+        adc r12, 0
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [24+rcx]
+        add r13, rax
+        adc r8, rdx
+        adc r12, 0
+        add r13, rax
+        adc r8, rdx
+        adc r12, 0
+        mov rbx, r8
+        mov r9, r12
+        mov r8, r10
+        mov rax, qword ptr [16+rcx]
+        mul rax
+        add r13, rax
+        adc rbx, rdx
+        adc r9, 0
+        mov qword ptr [-512+rbp], r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [40+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [24+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add rbx, r8
+        adc r9, rdi
+        adc r11, rsi
+        add rbx, r8
+        adc r9, rdi
+        adc r11, rsi
+        mov qword ptr [-504+rbp], rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [48+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r10
+        mov r13, rdi
+        mov r12, rsi
+        add r9, r8
+        adc r11, r13
+        adc rax, r12
+        add r9, r8
+        adc r11, r13
+        adc rax, r12
+        mov rdx, rax
+        mov rbx, r11
+        mov rdi, r13
+        mov r11, rdx
+        mov rsi, r12
+        mov rax, qword ptr [24+rcx]
+        mul rax
+        add r9, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-496+rbp], r9
+        mov r9, r11
+        mov rax, qword ptr [rcx]
+        mul qword ptr [56+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [32+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rdx, r10
+        add rbx, r8
+        adc r9, rdi
+        adc rdx, rsi
+        add rbx, r8
+        adc r9, rdi
+        adc rdx, rsi
+        mov r11, rdx
+        mov qword ptr [-488+rbp], rbx
+        mov rbx, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [64+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r13, rdi
+        mov r12, rsi
+        add r9, r8
+        adc r11, r13
+        adc rbx, r12
+        add r9, r8
+        adc r11, r13
+        adc rbx, r12
+        mov rax, qword ptr [32+rcx]
+        mul rax
+        add r9, rax
+        adc r11, rdx
+        adc rbx, 0
+        mov rdi, r13
+        mov qword ptr [-480+rbp], r9
+        mov rsi, r12
+        mov r9, rbx
+        mov r12, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [72+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [40+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r11, r8
+        adc r9, rdi
+        adc r12, rsi
+        add r11, r8
+        adc r9, rdi
+        adc r12, rsi
+        mov qword ptr [-472+rbp], r11
+        mov rbx, r12
+        mov rax, qword ptr [rcx]
+        mul qword ptr [80+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r10
+        mov r13, rdi
+        mov r12, rsi
+        add r9, r8
+        adc rbx, r13
+        adc rax, r12
+        add r9, r8
+        adc rbx, r13
+        adc rax, r12
+        mov rdx, rax
+        mov r11, rbx
+        mov rdi, r13
+        mov rbx, rdx
+        mov rsi, r12
+        mov rax, qword ptr [40+rcx]
+        mul rax
+        add r9, rax
+        adc r11, rdx
+        adc rbx, 0
+        mov qword ptr [-464+rbp], r9
+        mov r9, rbx
+        mov rax, qword ptr [rcx]
+        mul qword ptr [88+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [48+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rdx, r10
+        add r11, r8
+        adc r9, rdi
+        adc rdx, rsi
+        add r11, r8
+        adc r9, rdi
+        adc rdx, rsi
+        mov r13, rdx
+        mov qword ptr [-456+rbp], r11
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [96+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, rdi
+        mov r11, rsi
+        add r9, r8
+        adc r12, rax
+        adc r13, r11
+        add r9, r8
+        adc r12, rax
+        adc r13, r11
+        mov rbx, rax
+        mov rsi, r11
+        mov rax, qword ptr [48+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rdi, rbx
+        mov qword ptr [-448+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [104+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [56+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r12, r8
+        adc r9, rdi
+        adc r13, rsi
+        add r12, r8
+        adc r9, rdi
+        adc r13, rsi
+        mov qword ptr [-440+rbp], r12
+        mov r12, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [112+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rdx, r13
+        mov rbx, rdi
+        mov r13, rsi
+        add r9, r8
+        adc rdx, rbx
+        adc r12, r13
+        add r9, r8
+        adc rdx, rbx
+        adc r12, r13
+        mov rax, r12
+        mov r11, r13
+        mov r12, rdx
+        mov r13, rax
+        mov rdi, rbx
+        mov rsi, r11
+        mov rax, qword ptr [56+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov qword ptr [-432+rbp], r9
+        mov r9, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [120+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [64+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r8
+        mov rdx, rdi
+        mov rbx, rsi
+        add r12, rax
+        adc r9, rdx
+        adc r13, rbx
+        add r12, rax
+        adc r9, rdx
+        adc r13, rbx
+        mov qword ptr [-424+rbp], r12
+        mov r8, rdx
+        mov rsi, rax
+        mov rdi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [128+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [112+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [104+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [96+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [88+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [80+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [72+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, rsi
+        mov rbx, r8
+        mov rdx, rdi
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        mov r11, rdx
+        mov r8, rax
+        mov rdi, rbx
+        mov rax, qword ptr [64+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-416+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [136+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [128+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [120+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [72+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, r8
+        mov rax, rdi
+        mov rdx, rsi
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        mov qword ptr [-408+rbp], r12
+        mov rdi, rdx
+        mov r8, rax
+        mov rsi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [144+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [112+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [104+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [96+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [88+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [80+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, rsi
+        mov rbx, r8
+        mov rdx, rdi
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        mov r11, rdx
+        mov r8, rax
+        mov rdi, rbx
+        mov rax, qword ptr [72+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-400+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [152+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [144+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [136+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [128+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [120+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [80+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, r8
+        mov rax, rdi
+        mov rdx, rsi
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        mov qword ptr [-392+rbp], r12
+        mov rdi, rdx
+        mov r8, rax
+        mov rsi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [160+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [112+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [104+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [96+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [88+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, rsi
+        mov rbx, r8
+        mov rdx, rdi
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        mov r11, rdx
+        mov r8, rax
+        mov rdi, rbx
+        mov rax, qword ptr [80+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-384+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [168+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [160+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [152+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [144+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [136+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [128+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [120+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [88+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, r8
+        mov rax, rdi
+        mov rdx, rsi
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        mov qword ptr [-376+rbp], r12
+        mov rdi, rdx
+        mov r8, rax
+        mov rsi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [176+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [112+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [104+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [96+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, rsi
+        mov rbx, r8
+        mov rdx, rdi
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        mov r11, rdx
+        mov r8, rax
+        mov rdi, rbx
+        mov rax, qword ptr [88+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-368+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [184+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [176+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [168+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [160+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [152+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [144+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [136+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [128+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [120+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [112+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [104+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [96+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, r8
+        mov rax, rdi
+        mov rdx, rsi
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        mov rdi, rdx
+        mov qword ptr [-360+rbp], r12
+        mov r8, rax
+        mov rsi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [192+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [112+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [104+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rbx, r8
+        mov rax, rdi
+        add r9, rsi
+        adc r12, rbx
+        adc r13, rax
+        add r9, rsi
+        adc r12, rbx
+        adc r13, rax
+        mov r11, rax
+        mov r8, rbx
+        mov rax, qword ptr [96+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rdi, r11
+        mov qword ptr [-352+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [rcx]
+        mul qword ptr [200+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov r13, r10
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [112+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [104+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        add r12, rsi
+        adc r9, r8
+        adc r13, rdi
+        add r12, rsi
+        adc r9, r8
+        adc r13, rdi
+        mov qword ptr [-344+rbp], r12
+        mov r12, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [208+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [112+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rdx, r13
+        mov rbx, r8
+        mov r13, rdi
+        add r9, rsi
+        adc rdx, rbx
+        adc r12, r13
+        add r9, rsi
+        adc rdx, rbx
+        adc r12, r13
+        mov rax, r12
+        mov r11, r13
+        mov r12, rdx
+        mov r13, rax
+        mov r8, rbx
+        mov rdi, r11
+        mov rax, qword ptr [104+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov qword ptr [-336+rbp], r9
+        mov r9, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [216+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [112+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        add r12, rsi
+        adc r9, r8
+        adc r13, rdi
+        add r12, rsi
+        adc r9, r8
+        adc r13, rdi
+        mov qword ptr [-328+rbp], r12
+        mov rax, qword ptr [rcx]
+        mul qword ptr [224+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, r13
+        mov rdx, r10
+        mov rbx, r8
+        mov r12, rdi
+        add r9, rsi
+        adc rax, rbx
+        adc rdx, r12
+        add r9, rsi
+        adc rax, rbx
+        adc rdx, r12
+        mov rdi, rdx
+        mov r11, r12
+        mov r8, rbx
+        mov r12, rax
+        mov r13, rdi
+        mov rdi, r11
+        mov rax, qword ptr [112+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov qword ptr [-320+rbp], r9
+        mov rbx, r13
+        mov r9, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [232+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [120+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        add r12, rsi
+        adc rbx, r8
+        adc r9, rdi
+        add r12, rsi
+        adc rbx, r8
+        adc r9, rdi
+        mov qword ptr [-312+rbp], r12
+        mov r13, r9
+        mov rax, qword ptr [rcx]
+        mul qword ptr [240+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, r10
+        mov r11, r8
+        mov rdx, rdi
+        add rbx, rsi
+        adc r13, r11
+        adc rax, rdx
+        add rbx, rsi
+        adc r13, r11
+        adc rax, rdx
+        mov r9, rdx
+        mov rdx, rax
+        mov r12, r13
+        mov r8, r11
+        mov r13, rdx
+        mov rdi, r9
+        mov rax, qword ptr [120+rcx]
+        mul rax
+        add rbx, rax
+        adc r12, rdx
+        adc r13, 0
+        mov qword ptr [-304+rbp], rbx
+        mov rbx, r13
+        mov r13, r10
+        mov rax, qword ptr [rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [128+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        add r12, rsi
+        adc rbx, r8
+        adc r13, rdi
+        add r12, rsi
+        adc rbx, r8
+        adc r13, rdi
+        mov qword ptr [-296+rbp], r12
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [8+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov r11, r8
+        mov rax, rdi
+        add rbx, rsi
+        adc r12, r11
+        adc r13, rax
+        add rbx, rsi
+        adc r12, r11
+        adc r13, rax
+        mov r9, rax
+        mov r8, r11
+        mov rax, qword ptr [128+rcx]
+        mul rax
+        add rbx, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rdi, r9
+        mov qword ptr [-288+rbp], rbx
+        mov r9, r13
+        mov rax, qword ptr [16+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov r13, r10
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [136+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        add r12, rsi
+        adc r9, r8
+        adc r13, rdi
+        add r12, rsi
+        adc r9, r8
+        adc r13, rdi
+        mov qword ptr [-280+rbp], r12
+        mov r12, r10
+        mov rax, qword ptr [24+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rdx, r13
+        mov rbx, r8
+        mov r13, rdi
+        add r9, rsi
+        adc rdx, rbx
+        adc r12, r13
+        add r9, rsi
+        adc rdx, rbx
+        adc r12, r13
+        mov rax, r12
+        mov r11, r13
+        mov r12, rdx
+        mov r13, rax
+        mov r8, rbx
+        mov rdi, r11
+        mov rax, qword ptr [136+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov qword ptr [-272+rbp], r9
+        mov r9, r13
+        mov r13, r10
+        mov rax, qword ptr [32+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [144+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        add r12, rsi
+        adc r9, r8
+        adc r13, rdi
+        add r12, rsi
+        adc r9, r8
+        adc r13, rdi
+        mov qword ptr [-264+rbp], r12
+        mov rax, qword ptr [40+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, r13
+        mov rdx, r10
+        mov rbx, r8
+        mov r12, rdi
+        add r9, rsi
+        adc rax, rbx
+        adc rdx, r12
+        add r9, rsi
+        adc rax, rbx
+        adc rdx, r12
+        mov rdi, rdx
+        mov r11, r12
+        mov r8, rbx
+        mov r12, rax
+        mov r13, rdi
+        mov rdi, r11
+        mov rax, qword ptr [144+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov r11, r10
+        mov qword ptr [-256+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [48+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [152+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        add r12, rsi
+        adc r9, r8
+        adc r11, rdi
+        add r12, rsi
+        adc r9, r8
+        adc r11, rdi
+        mov qword ptr [-248+rbp], r12
+        mov r13, r11
+        mov rax, qword ptr [56+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [160+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, r10
+        mov rdx, rsi
+        mov rbx, r8
+        mov r12, rdi
+        add r9, rdx
+        adc r13, rbx
+        adc rax, r12
+        add r9, rdx
+        adc r13, rbx
+        adc rax, r12
+        mov r11, r12
+        mov r8, rdx
+        mov rdx, rax
+        mov r12, r13
+        mov rdi, rbx
+        mov r13, rdx
+        mov rsi, r11
+        mov rax, qword ptr [152+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov qword ptr [-240+rbp], r9
+        mov r9, r13
+        mov r13, r10
+        mov rax, qword ptr [64+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [200+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [192+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [184+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [176+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [168+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [160+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r8
+        mov rdx, rdi
+        mov rbx, rsi
+        add r12, rax
+        adc r9, rdx
+        adc r13, rbx
+        add r12, rax
+        adc r9, rdx
+        adc r13, rbx
+        mov qword ptr [-232+rbp], r12
+        mov r8, rdx
+        mov rsi, rax
+        mov rdi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [72+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [168+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, rsi
+        mov rbx, r8
+        mov rdx, rdi
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        mov r11, rdx
+        mov r8, rax
+        mov rdi, rbx
+        mov rax, qword ptr [160+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-224+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [80+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [200+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [192+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [184+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [176+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [168+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, r8
+        mov rax, rdi
+        mov rdx, rsi
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        mov qword ptr [-216+rbp], r12
+        mov rdi, rdx
+        mov r8, rax
+        mov rsi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [88+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [176+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, rsi
+        mov rbx, r8
+        mov rdx, rdi
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        mov r11, rdx
+        mov r8, rax
+        mov rdi, rbx
+        mov rax, qword ptr [168+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-208+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [96+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [200+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [192+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [184+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [176+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, r8
+        mov rax, rdi
+        mov rdx, rsi
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        mov qword ptr [-200+rbp], r12
+        mov rdi, rdx
+        mov r8, rax
+        mov rsi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [104+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [184+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, rsi
+        mov rbx, r8
+        mov rdx, rdi
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        mov r11, rdx
+        mov r8, rax
+        mov rdi, rbx
+        mov rax, qword ptr [176+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-192+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [112+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [200+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [192+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [184+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, r8
+        mov rax, rdi
+        mov rdx, rsi
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        add r12, rbx
+        adc r9, rax
+        adc r13, rdx
+        mov qword ptr [-184+rbp], r12
+        mov rdi, rdx
+        mov r8, rax
+        mov rsi, rbx
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [120+rcx]
+        mul qword ptr [248+rcx]
+        mov rsi, rax
+        mov r8, rdx
+        xor rdi, rdi
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [232+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [224+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [216+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [208+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [200+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [192+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc rdi, 0
+        mov rax, rsi
+        mov rbx, r8
+        mov rdx, rdi
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        add r9, rax
+        adc r12, rbx
+        adc r13, rdx
+        mov r11, rdx
+        mov r8, rax
+        mov rdi, rbx
+        mov rax, qword ptr [184+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-176+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [128+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov r13, r10
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [200+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [184+rcx]
+        mul qword ptr [192+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r12, r8
+        adc r9, rdi
+        adc r13, rsi
+        add r12, r8
+        adc r9, rdi
+        adc r13, rsi
+        mov qword ptr [-168+rbp], r12
+        mov r12, r13
+        mov r13, r10
+        mov rax, qword ptr [136+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [184+rcx]
+        mul qword ptr [200+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rbx, rdi
+        mov rax, rsi
+        add r9, r8
+        adc r12, rbx
+        adc r13, rax
+        add r9, r8
+        adc r12, rbx
+        adc r13, rax
+        mov r11, rax
+        mov rdi, rbx
+        mov rbx, r10
+        mov rax, qword ptr [192+rcx]
+        mul rax
+        add r9, rax
+        adc r12, rdx
+        adc r13, 0
+        mov rsi, r11
+        mov qword ptr [-160+rbp], r9
+        mov r9, r13
+        mov rax, qword ptr [144+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [184+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [192+rcx]
+        mul qword ptr [200+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add r12, r8
+        adc r9, rdi
+        adc rbx, rsi
+        add r12, r8
+        adc r9, rdi
+        adc rbx, rsi
+        mov qword ptr [-152+rbp], r12
+        mov rax, qword ptr [152+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [184+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [192+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rdx, r10
+        mov r13, rdi
+        mov r12, rsi
+        add r9, r8
+        adc rbx, r13
+        adc rdx, r12
+        add r9, r8
+        adc rbx, r13
+        adc rdx, r12
+        mov rax, rdx
+        mov rdi, r13
+        mov rsi, r12
+        mov r11, rax
+        mov r12, r10
+        mov rax, qword ptr [200+rcx]
+        mul rax
+        add r9, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-144+rbp], r9
+        mov r9, r11
+        mov rax, qword ptr [160+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [184+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [192+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [200+rcx]
+        mul qword ptr [208+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add rbx, r8
+        adc r9, rdi
+        adc r12, rsi
+        add rbx, r8
+        adc r9, rdi
+        adc r12, rsi
+        mov qword ptr [-136+rbp], rbx
+        mov r11, r12
+        mov rax, qword ptr [168+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [184+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [192+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [200+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r10
+        mov r13, rdi
+        mov r12, rsi
+        add r9, r8
+        adc r11, r13
+        adc rax, r12
+        add r9, r8
+        adc r11, r13
+        adc rax, r12
+        mov rdx, rax
+        mov rbx, r11
+        mov rdi, r13
+        mov r11, rdx
+        mov rsi, r12
+        mov rax, qword ptr [208+rcx]
+        mul rax
+        add r9, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-128+rbp], r9
+        mov r9, r11
+        mov rax, qword ptr [176+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [184+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [192+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [200+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [208+rcx]
+        mul qword ptr [216+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rdx, r10
+        add rbx, r8
+        adc r9, rdi
+        adc rdx, rsi
+        add rbx, r8
+        adc r9, rdi
+        adc rdx, rsi
+        mov qword ptr [-120+rbp], rbx
+        mov r11, rdx
+        mov rbx, r10
+        mov rax, qword ptr [184+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [192+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [200+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [208+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r13, rdi
+        mov r12, rsi
+        add r9, r8
+        adc r11, r13
+        adc rbx, r12
+        add r9, r8
+        adc r11, r13
+        adc rbx, r12
+        mov rdx, rbx
+        mov rdi, r13
+        mov rbx, r11
+        mov rsi, r12
+        mov r11, rdx
+        mov r12, r10
+        mov rax, qword ptr [216+rcx]
+        mul rax
+        add r9, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-112+rbp], r9
+        mov r9, r11
+        mov rax, qword ptr [192+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [200+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [208+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [216+rcx]
+        mul qword ptr [224+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        add rbx, r8
+        adc r9, rdi
+        adc r12, rsi
+        add rbx, r8
+        adc r9, rdi
+        adc r12, rsi
+        mov qword ptr [-104+rbp], rbx
+        mov r11, r12
+        mov rax, qword ptr [200+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [208+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [216+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, r10
+        mov r13, rdi
+        mov r12, rsi
+        add r9, r8
+        adc r11, r13
+        adc rax, r12
+        add r9, r8
+        adc r11, r13
+        adc rax, r12
+        mov rdx, rax
+        mov rbx, r11
+        mov rdi, r13
+        mov r11, rdx
+        mov rsi, r12
+        mov r12, r10
+        mov rax, qword ptr [224+rcx]
+        mul rax
+        add r9, rax
+        adc rbx, rdx
+        adc r11, 0
+        mov qword ptr [-96+rbp], r9
+        mov r9, r10
+        mov rax, qword ptr [208+rcx]
+        mul qword ptr [248+rcx]
+        mov r8, rax
+        mov rdi, rdx
+        xor rsi, rsi
+        mov rax, qword ptr [216+rcx]
+        mul qword ptr [240+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov rax, qword ptr [224+rcx]
+        mul qword ptr [232+rcx]
+        add r8, rax
+        adc rdi, rdx
+        adc rsi, 0
+        mov r13, rdi
+        mov rax, rsi
+        add rbx, r8
+        adc r11, r13
+        adc r9, rax
+        add rbx, r8
+        adc r11, r13
+        adc r9, rax
+        mov qword ptr [-88+rbp], rbx
+        mov rsi, r11
+        mov r8, r9
+        mov rax, qword ptr [216+rcx]
+        mul qword ptr [248+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc r12, 0
+        add rsi, rax
+        adc r8, rdx
+        adc r12, 0
+        mov r11, r12
+        mov rax, qword ptr [224+rcx]
+        mul qword ptr [240+rcx]
+        add rsi, rax
+        adc r8, rdx
+        adc r11, 0
+        add rsi, rax
+        adc r8, rdx
+        adc r11, 0
+        mov r13, r8
+        mov rbx, r11
+        mov rax, qword ptr [232+rcx]
+        mul rax
+        add rsi, rax
+        adc r13, rdx
+        adc rbx, 0
+        mov qword ptr [-80+rbp], rsi
+        mov r12, rbx
+        mov rdi, r13
+        mov r13, r10
+        mov rax, qword ptr [224+rcx]
+        mul qword ptr [248+rcx]
+        add rdi, rax
+        adc r12, rdx
+        adc r13, 0
+        add rdi, rax
+        adc r12, rdx
+        adc r13, 0
+        mov r9, r12
+        mov r12, r13
+        mov rax, qword ptr [232+rcx]
+        mul qword ptr [240+rcx]
+        add rdi, rax
+        adc r9, rdx
+        adc r12, 0
+        add rdi, rax
+        adc r9, rdx
+        adc r12, 0
+        mov qword ptr [-72+rbp], rdi
+        mov r11, r9
+        mov rbx, r12
+        mov r9, r10
+        mov rax, qword ptr [232+rcx]
+        mul qword ptr [248+rcx]
+        add r11, rax
+        adc rbx, rdx
+        adc r9, 0
+        add r11, rax
+        adc rbx, rdx
+        adc r9, 0
+        mov r13, rbx
+        mov rbx, r9
+        mov r9, r10
+        mov rax, qword ptr [240+rcx]
+        mul rax
+        add r11, rax
+        adc r13, rdx
+        adc rbx, 0
+        mov qword ptr [-64+rbp], r11
+        mov rdi, r13
+        mov rsi, rbx
+        mov rax, qword ptr [240+rcx]
+        mul qword ptr [248+rcx]
+        add rdi, rax
+        adc rsi, rdx
+        adc r9, 0
+        add rdi, rax
+        adc rsi, rdx
+        adc r9, 0
+        mov qword ptr [-56+rbp], rdi
+        mov r8, r9
+        mov rax, qword ptr [248+rcx]
+        mul rax
+        add rsi, rax
+        adc r8, rdx
+        adc r10, 0
+        mov qword ptr [-48+rbp], rsi
+        mov qword ptr [-40+rbp], r8
+        mov dword ptr [8+r14], 64
+        mov dword ptr [r14], 0
+;        mov rdi, qword ptr [16+r14]
+;        lea rsi, qword ptr [-544+rbp]
+;        mov edx, 512
+        mov rcx, qword ptr [16+r14]
+        lea rdx, qword ptr [-544+rbp]
+        mov r8d, 512
+        call memcpy
+        mov edx, dword ptr [8+r14]
+        test edx, edx
+        je L304
+        lea ecx, dword ptr [-1+rdx]
+        mov rsi, qword ptr [16+r14]
+        mov r10d, ecx
+        cmp dword ptr [rsi+r10*8], 0
+        jne L302
+        mov edx, ecx
+        ALIGN 16
+L303:
+        test edx, edx
+        mov ecx, edx
+        je L307
+        dec edx
+        mov eax, edx
+        cmp dword ptr [rsi+rax*8], 0
+        je L303
+        mov dword ptr [8+r14], ecx
+        mov edx, ecx
+L302:
+        test edx, edx
+        je L304
+        mov eax, dword ptr [r14]
+        jmp L305
+
+L307:
+        mov dword ptr [8+r14], edx
+L304:
+        xor eax, eax
+L305:
+        mov dword ptr [r14], eax
+        add rsp, 512+32			; +32 for "home" storage
+        pop rbx
+        pop r12
+        pop r13
+        pop r14
+        pop rbp
+
+        pop rsi
+        pop rdi
+
+        ret
+
+s_mp_sqr_comba_32 ENDP
+
+END
diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
new file mode 100644
index 000000000..a5181df33
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
@@ -0,0 +1,16097 @@
+//* TomsFastMath, a fast ISO C bignum library.
+/ * 
+/ * This project is meant to fill in where LibTomMath
+/ * falls short.  That is speed ;-)
+/ *
+/ * This project is public domain and free for all purposes.
+/ * 
+/ * Tom St Denis, tomstdenis@iahu.ca
+/ */
+
+//*
+/ * The source file from which this assembly was derived
+/ * comes from TFM v0.03, which has the above license.
+/ * This source was compiled with an unnamed compiler at
+/ * the highest optimization level.  Afterwards, the
+/ * trailing .section was removed because it causes errors
+/ * in the Studio 10 compiler on AMD 64.
+/ */
+
+       	.file	"mp_comba.c"
+	.text
+	.align 16
+.globl s_mp_mul_comba_4
+	.type	s_mp_mul_comba_4, @function
+s_mp_mul_comba_4:
+.LFB2:
+	pushq	%r12
+.LCFI0:
+	pushq	%rbp
+.LCFI1:
+	pushq	%rbx
+.LCFI2:
+	movq	16(%rdi), %r9
+	movq	%rdx, %rbx
+	movq	16(%rsi), %rdx
+	movq	(%r9), %rax
+	movq	%rax, -64(%rsp)
+	movq	8(%r9), %r8
+	movq	%r8, -56(%rsp)
+	movq	16(%r9), %rbp
+	movq	%rbp, -48(%rsp)
+	movq	24(%r9), %r12
+	movq	%r12, -40(%rsp)
+	movq	(%rdx), %rcx
+	movq	%rcx, -32(%rsp)
+	movq	8(%rdx), %r10
+	movq	%r10, -24(%rsp)
+	movq	16(%rdx), %r11
+	xorl	%r10d, %r10d
+	movq	%r10, %r8
+	movq	%r10, %r9
+	movq	%r10, %rbp
+	movq	%r11, -16(%rsp)
+	movq	16(%rbx), %r11
+	movq	24(%rdx), %rax
+	movq	%rax, -8(%rsp)
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%r8, (%r11)
+	movq	%rbp, %r8
+	movq	%r10, %rbp
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%r9     
+	adcq  %rdx,%r8     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%rbp, %r12
+/APP
+	movq  -56(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%r9     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r9, 8(%r11)
+	movq	%r12, %r9
+	movq	%r10, %r12
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r12, %rcx
+/APP
+	movq  -56(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -48(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 16(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -56(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -48(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -40(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 24(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -56(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -48(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -40(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 32(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -48(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r8, %r12
+	movq	%r9, %rbp
+/APP
+	movq  -40(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 40(%r11)
+	movq	%rbp, %r8
+	movq	%r12, %rcx
+/APP
+	movq  -40(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rcx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r8, 48(%r11)
+	movl	(%rsi), %esi
+	xorl	(%rdi), %esi
+	testq	%rcx, %rcx
+	movq	%rcx, 56(%r11)
+	movl	$8, 8(%rbx)
+	jne	.L9
+	.align 16
+.L18:
+	movl	8(%rbx), %edx
+	leal	-1(%rdx), %edi
+	testl	%edi, %edi
+	movl	%edi, 8(%rbx)
+	je	.L9
+	leal	-2(%rdx), %r10d
+	cmpq	$0, (%r11,%r10,8)
+	je	.L18
+.L9:
+	movl	8(%rbx), %edx
+	xorl	%r11d, %r11d
+	testl	%edx, %edx
+	cmovne	%esi, %r11d
+	movl	%r11d, (%rbx)
+	popq	%rbx
+	popq	%rbp
+	popq	%r12
+	ret
+.LFE2:
+	.size	s_mp_mul_comba_4, .-s_mp_mul_comba_4
+	.align 16
+.globl s_mp_mul_comba_8
+	.type	s_mp_mul_comba_8, @function
+s_mp_mul_comba_8:
+.LFB3:
+	pushq	%r12
+.LCFI3:
+	pushq	%rbp
+.LCFI4:
+	pushq	%rbx
+.LCFI5:
+	movq	%rdx, %rbx
+	subq	$8, %rsp
+.LCFI6:
+	movq	16(%rdi), %rdx
+	movq	(%rdx), %r8
+	movq	%r8, -120(%rsp)
+	movq	8(%rdx), %rbp
+	movq	%rbp, -112(%rsp)
+	movq	16(%rdx), %r9
+	movq	%r9, -104(%rsp)
+	movq	24(%rdx), %r12
+	movq	%r12, -96(%rsp)
+	movq	32(%rdx), %rcx
+	movq	%rcx, -88(%rsp)
+	movq	40(%rdx), %r10
+	movq	%r10, -80(%rsp)
+	movq	48(%rdx), %r11
+	movq	%r11, -72(%rsp)
+	movq	56(%rdx), %rax
+	movq	16(%rsi), %rdx
+	movq	%rax, -64(%rsp)
+	movq	(%rdx), %r8
+	movq	%r8, -56(%rsp)
+	movq	8(%rdx), %rbp
+	movq	%rbp, -48(%rsp)
+	movq	16(%rdx), %r9
+	movq	%r9, -40(%rsp)
+	movq	24(%rdx), %r12
+	movq	%r12, -32(%rsp)
+	movq	32(%rdx), %rcx
+	movq	%rcx, -24(%rsp)
+	movq	40(%rdx), %r10
+	movq	%r10, -16(%rsp)
+	movq	48(%rdx), %r11
+	xorl	%r10d, %r10d
+	movq	%r10, %r8
+	movq	%r10, %r9
+	movq	%r10, %rbp
+	movq	%r11, -8(%rsp)
+	movq	16(%rbx), %r11
+	movq	56(%rdx), %rax
+	movq	%rax, (%rsp)
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  -56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%r8, (%r11)
+	movq	%rbp, %r8
+	movq	%r10, %rbp
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  -48(%rsp)           
+	addq  %rax,%r9     
+	adcq  %rdx,%r8     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%rbp, %r12
+/APP
+	movq  -112(%rsp),%rax     
+	mulq  -56(%rsp)           
+	addq  %rax,%r9     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r9, 8(%r11)
+	movq	%r12, %r9
+	movq	%r10, %r12
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  -40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r12, %rcx
+/APP
+	movq  -112(%rsp),%rax     
+	mulq  -48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -104(%rsp),%rax     
+	mulq  -56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 16(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  -40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  -48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -96(%rsp),%rax     
+	mulq  -56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 24(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -112(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  -40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  -48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -88(%rsp),%rax     
+	mulq  -56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 32(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  -40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  -48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -80(%rsp),%rax     
+	mulq  -56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 40(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -112(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  -40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  -48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -72(%rsp),%rax     
+	mulq  -56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 48(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  (%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  -40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  -48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 56(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -112(%rsp),%rax     
+	mulq  (%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  -40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 64(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -104(%rsp),%rax     
+	mulq  (%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 72(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -96(%rsp),%rax     
+	mulq  (%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 80(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -88(%rsp),%rax     
+	mulq  (%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 88(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -80(%rsp),%rax     
+	mulq  (%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 96(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -72(%rsp),%rax     
+	mulq  (%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r8, %r12
+	movq	%r9, %rbp
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  -8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 104(%r11)
+	movq	%rbp, %r8
+	movq	%r12, %rcx
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  (%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rcx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r8, 112(%r11)
+	movl	(%rsi), %esi
+	xorl	(%rdi), %esi
+	testq	%rcx, %rcx
+	movq	%rcx, 120(%r11)
+	movl	$16, 8(%rbx)
+	jne	.L35
+	.align 16
+.L43:
+	movl	8(%rbx), %edx
+	leal	-1(%rdx), %edi
+	testl	%edi, %edi
+	movl	%edi, 8(%rbx)
+	je	.L35
+	leal	-2(%rdx), %eax
+	cmpq	$0, (%r11,%rax,8)
+	je	.L43
+.L35:
+	movl	8(%rbx), %r11d
+	xorl	%edx, %edx
+	testl	%r11d, %r11d
+	cmovne	%esi, %edx
+	movl	%edx, (%rbx)
+	addq	$8, %rsp
+	popq	%rbx
+	popq	%rbp
+	popq	%r12
+	ret
+.LFE3:
+	.size	s_mp_mul_comba_8, .-s_mp_mul_comba_8
+	.align 16
+.globl s_mp_mul_comba_16
+	.type	s_mp_mul_comba_16, @function
+s_mp_mul_comba_16:
+.LFB4:
+	pushq	%r12
+.LCFI7:
+	pushq	%rbp
+.LCFI8:
+	pushq	%rbx
+.LCFI9:
+	movq	%rdx, %rbx
+	subq	$136, %rsp
+.LCFI10:
+	movq	16(%rdi), %rax
+	movq	(%rax), %r8
+	movq	%r8, -120(%rsp)
+	movq	8(%rax), %rbp
+	movq	%rbp, -112(%rsp)
+	movq	16(%rax), %r9
+	movq	%r9, -104(%rsp)
+	movq	24(%rax), %r12
+	movq	%r12, -96(%rsp)
+	movq	32(%rax), %rcx
+	movq	%rcx, -88(%rsp)
+	movq	40(%rax), %r10
+	movq	%r10, -80(%rsp)
+	movq	48(%rax), %rdx
+	movq	%rdx, -72(%rsp)
+	movq	56(%rax), %r11
+	movq	%r11, -64(%rsp)
+	movq	64(%rax), %r8
+	movq	%r8, -56(%rsp)
+	movq	72(%rax), %rbp
+	movq	%rbp, -48(%rsp)
+	movq	80(%rax), %r9
+	movq	%r9, -40(%rsp)
+	movq	88(%rax), %r12
+	movq	%r12, -32(%rsp)
+	movq	96(%rax), %rcx
+	movq	%rcx, -24(%rsp)
+	movq	104(%rax), %r10
+	movq	%r10, -16(%rsp)
+	movq	112(%rax), %rdx
+	movq	%rdx, -8(%rsp)
+	movq	120(%rax), %r11
+	movq	%r11, (%rsp)
+	movq	16(%rsi), %r11
+	movq	(%r11), %r8
+	movq	%r8, 8(%rsp)
+	movq	8(%r11), %rbp
+	movq	%rbp, 16(%rsp)
+	movq	16(%r11), %r9
+	movq	%r9, 24(%rsp)
+	movq	24(%r11), %r12
+	movq	%r12, 32(%rsp)
+	movq	32(%r11), %rcx
+	movq	%rcx, 40(%rsp)
+	movq	40(%r11), %r10
+	movq	%r10, 48(%rsp)
+	movq	48(%r11), %rdx
+	movq	%rdx, 56(%rsp)
+	movq	56(%r11), %rax
+	movq	%rax, 64(%rsp)
+	movq	64(%r11), %r8
+	movq	%r8, 72(%rsp)
+	movq	72(%r11), %rbp
+	movq	%rbp, 80(%rsp)
+	movq	80(%r11), %r9
+	movq	%r9, 88(%rsp)
+	movq	88(%r11), %r12
+	movq	%r12, 96(%rsp)
+	movq	96(%r11), %rcx
+	movq	%rcx, 104(%rsp)
+	movq	104(%r11), %r10
+	movq	%r10, 112(%rsp)
+	movq	112(%r11), %rdx
+	xorl	%r10d, %r10d
+	movq	%r10, %r8
+	movq	%r10, %r9
+	movq	%r10, %rbp
+	movq	%rdx, 120(%rsp)
+	movq	120(%r11), %rax
+	movq	%rax, 128(%rsp)
+	movq	16(%rbx), %r11
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%r8, (%r11)
+	movq	%rbp, %r8
+	movq	%r10, %rbp
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r9     
+	adcq  %rdx,%r8     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%rbp, %r12
+/APP
+	movq  -112(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r9     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r9, 8(%r11)
+	movq	%r12, %r9
+	movq	%r10, %r12
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r12, %rcx
+/APP
+	movq  -112(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -104(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 16(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -96(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 24(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -112(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -88(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 32(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -80(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 40(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -112(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -72(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 48(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 56(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -112(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -64(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -56(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 64(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -64(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -56(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -48(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 72(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -112(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -64(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -56(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -48(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -40(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 80(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -64(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -56(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -48(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -40(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -32(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 88(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -112(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -64(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -56(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -48(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -40(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -32(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -24(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 96(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -64(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -56(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -48(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -40(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -32(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -24(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  -16(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 104(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -112(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -64(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -56(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -48(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -40(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -32(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -24(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -16(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  -8(%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 112(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -120(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -112(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -104(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -64(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -56(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -48(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -40(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -32(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -24(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -16(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -8(%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  8(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 120(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -112(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -104(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -96(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -64(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -56(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -48(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -40(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -32(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -24(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -16(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -8(%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  16(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 128(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -104(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -96(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -88(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -64(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -56(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -48(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -40(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -32(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -24(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -16(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -8(%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  24(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 136(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -96(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -88(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -80(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -64(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -56(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -48(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -40(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -32(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -24(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -16(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -8(%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  32(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 144(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -88(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -80(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -72(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -64(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -56(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -48(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -40(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -32(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -24(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -16(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -8(%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  40(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 152(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -80(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -72(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -64(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -56(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -48(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -40(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -32(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -24(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -16(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -8(%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  48(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 160(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -72(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -64(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -56(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -48(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -40(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -32(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -24(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -16(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -8(%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  56(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 168(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -64(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -56(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -48(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -40(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -32(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -24(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -16(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -8(%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  64(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 176(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -56(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -48(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -40(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -32(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -24(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -16(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -8(%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  72(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 184(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -48(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -40(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -32(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -24(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -16(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -8(%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  80(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 192(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -40(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -32(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -24(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -16(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -8(%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  88(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 200(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -32(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -24(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -16(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -8(%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  96(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 208(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -24(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -16(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+	movq  -8(%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%r8, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  104(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 216(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %r8
+	movq	%r10, %rcx
+/APP
+	movq  -16(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+	movq  -8(%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%r9, %rbp
+	movq	%rcx, %r12
+/APP
+	movq  (%rsp),%rax     
+	mulq  112(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, 224(%r11)
+	movq	%r12, %r9
+	movq	%rbp, %rcx
+	movq	%r10, %r8
+/APP
+	movq  -8(%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r8, %r12
+	movq	%r9, %rbp
+/APP
+	movq  (%rsp),%rax     
+	mulq  120(%rsp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rcx, 232(%r11)
+	movq	%rbp, %r8
+	movq	%r12, %rcx
+/APP
+	movq  (%rsp),%rax     
+	mulq  128(%rsp)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rcx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r8, 240(%r11)
+	movl	(%rsi), %esi
+	xorl	(%rdi), %esi
+	testq	%rcx, %rcx
+	movq	%rcx, 248(%r11)
+	movl	$32, 8(%rbx)
+	jne	.L76
+	.align 16
+.L84:
+	movl	8(%rbx), %edx
+	leal	-1(%rdx), %edi
+	testl	%edi, %edi
+	movl	%edi, 8(%rbx)
+	je	.L76
+	leal	-2(%rdx), %eax
+	cmpq	$0, (%r11,%rax,8)
+	je	.L84
+.L76:
+	movl	8(%rbx), %edx
+	xorl	%r11d, %r11d
+	testl	%edx, %edx
+	cmovne	%esi, %r11d
+	movl	%r11d, (%rbx)
+	addq	$136, %rsp
+	popq	%rbx
+	popq	%rbp
+	popq	%r12
+	ret
+.LFE4:
+	.size	s_mp_mul_comba_16, .-s_mp_mul_comba_16
+	.align 16
+.globl s_mp_mul_comba_32
+	.type	s_mp_mul_comba_32, @function
+s_mp_mul_comba_32:
+.LFB5:
+	pushq	%rbp
+.LCFI11:
+	movq	%rsp, %rbp
+.LCFI12:
+	pushq	%r13
+.LCFI13:
+	movq	%rdx, %r13
+	movl	$256, %edx
+	pushq	%r12
+.LCFI14:
+	movq	%rsi, %r12
+	pushq	%rbx
+.LCFI15:
+	movq	%rdi, %rbx
+	subq	$520, %rsp
+.LCFI16:
+	movq	16(%rdi), %rsi
+	leaq	-544(%rbp), %rdi
+	call	memcpy@PLT
+	movq	16(%r12), %rsi
+	leaq	-288(%rbp), %rdi
+	movl	$256, %edx
+	call	memcpy@PLT
+	movq	16(%r13), %r9
+	xorl	%r8d, %r8d
+	movq	%r8, %rsi
+	movq	%r8, %rdi
+	movq	%r8, %r10
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%rsi, (%r9)
+	movq	%r10, %rsi
+	movq	%r8, %r10
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r10, %r11
+/APP
+	movq  -536(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rdi, 8(%r9)
+	movq	%r11, %rdi
+	movq	%r8, %r11
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r11, %rcx
+/APP
+	movq  -536(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -528(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 16(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -520(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 24(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -512(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 32(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -504(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 40(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -496(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 48(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -488(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 56(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -480(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 64(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -472(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 72(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -464(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 80(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -456(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 88(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -448(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 96(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -440(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 104(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -432(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 112(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -424(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 120(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -416(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 128(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -408(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 136(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -400(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 144(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -392(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 152(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -384(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 160(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -376(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 168(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -368(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 176(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -360(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 184(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -352(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 192(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -344(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 200(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -336(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 208(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -328(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 216(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -320(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 224(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -312(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 232(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -304(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 240(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -544(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -536(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -288(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 248(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -536(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -528(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -280(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 256(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -528(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -520(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -272(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 264(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -520(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -512(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -264(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 272(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -512(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -504(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -256(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 280(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -504(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -496(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -248(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 288(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -496(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -488(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -240(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 296(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -488(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -480(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -232(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 304(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -480(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -472(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -224(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 312(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -472(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -464(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -216(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 320(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -464(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -456(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -208(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 328(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %r11
+	movq	%r8, %r10
+/APP
+	movq  -456(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -448(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -440(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+	movq  -296(%rbp),%rax     
+	mulq  -200(%rbp)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r11, 336(%r9)
+	movq	%r10, %rsi
+	movq	%r8, %r10
+/APP
+	movq  -448(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r10, %rcx
+/APP
+	movq  -440(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -432(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rsi, %r11
+	movq	%rcx, %r10
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -192(%rbp)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%r11     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%rdi, 344(%r9)
+	movq	%r11, %rcx
+	movq	%r10, %rdi
+	movq	%r8, %r11
+/APP
+	movq  -440(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r11, %rsi
+/APP
+	movq  -432(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -184(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 352(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -432(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -424(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -176(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 360(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -424(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -416(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -168(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 368(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -416(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -408(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -160(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 376(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -408(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -400(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -152(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 384(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -400(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -392(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -144(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 392(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -392(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -384(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -136(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 400(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -384(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -376(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -128(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 408(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -376(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -368(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -120(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 416(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -368(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -360(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -112(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 424(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -360(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -352(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -104(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 432(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -352(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -344(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -96(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 440(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -344(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -336(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -88(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 448(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -336(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -328(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -80(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 456(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -328(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -320(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -72(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 464(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -320(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -312(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rcx, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -64(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 472(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rcx
+	movq	%r8, %rsi
+/APP
+	movq  -312(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  -304(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r10
+	movq	%rsi, %r11
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -56(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rcx, 480(%r9)
+	movq	%r11, %rdi
+	movq	%r10, %rsi
+	movq	%r8, %rcx
+/APP
+	movq  -304(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rcx        
+	
+/NO_APP
+	movq	%rcx, %r11
+	movq	%rdi, %r10
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -48(%rbp)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r10     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rsi, 488(%r9)
+	movq	%r10, %rcx
+	movq	%r11, %rsi
+/APP
+	movq  -296(%rbp),%rax     
+	mulq  -40(%rbp)           
+	addq  %rax,%rcx     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%rcx, 496(%r9)
+	movl	(%r12), %ecx
+	xorl	(%rbx), %ecx
+	testq	%rsi, %rsi
+	movq	%rsi, 504(%r9)
+	movl	$64, 8(%r13)
+	jne	.L149
+	.align 16
+.L157:
+	movl	8(%r13), %edx
+	leal	-1(%rdx), %ebx
+	testl	%ebx, %ebx
+	movl	%ebx, 8(%r13)
+	je	.L149
+	leal	-2(%rdx), %r12d
+	cmpq	$0, (%r9,%r12,8)
+	je	.L157
+.L149:
+	movl	8(%r13), %r9d
+	xorl	%edx, %edx
+	testl	%r9d, %r9d
+	cmovne	%ecx, %edx
+	movl	%edx, (%r13)
+	addq	$520, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	leave
+	ret
+.LFE5:
+	.size	s_mp_mul_comba_32, .-s_mp_mul_comba_32
+	.align 16
+.globl s_mp_sqr_comba_4
+	.type	s_mp_sqr_comba_4, @function
+s_mp_sqr_comba_4:
+.LFB6:
+	pushq	%rbp
+.LCFI17:
+	movq	%rsi, %r11
+	xorl	%esi, %esi
+	movq	%rsi, %r10
+	movq	%rsi, %rbp
+	movq	%rsi, %r8
+	pushq	%rbx
+.LCFI18:
+	movq	%rsi, %rbx
+	movq	16(%rdi), %rcx
+	movq	%rsi, %rdi
+/APP
+	movq  (%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%rbx     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r10, -72(%rsp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  8(%rcx)           
+	addq  %rax,%rbx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rbp        
+	addq  %rax,%rbx     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%rbx, -64(%rsp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  16(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r8        
+	addq  %rax,%rdi     
+	adcq  %rdx,%rbp     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%rbp, %rbx
+	movq	%r8, %rbp
+/APP
+	movq  8(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rdi     
+	adcq  %rdx,%rbx     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%rdi, -56(%rsp)
+	movq	%rbp, %r9
+	movq	%rbx, %r8
+	movq	%rsi, %rdi
+/APP
+	movq  (%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rdi        
+	addq  %rax,%r8     
+	adcq  %rdx,%r9     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r9, %rbx
+	movq	%rdi, %rbp
+/APP
+	movq  8(%rcx),%rax     
+	mulq  16(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rbx     
+	adcq  $0,%rbp        
+	addq  %rax,%r8     
+	adcq  %rdx,%rbx     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%r8, -48(%rsp)
+	movq	%rbp, %r9
+	movq	%rbx, %rdi
+	movq	%rsi, %r8
+	movl	$8, 8(%r11)
+	movl	$0, (%r11)
+/APP
+	movq  8(%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	addq  %rax,%rdi     
+	adcq  %rdx,%r9     
+	adcq  $0,%r8        
+	
+/NO_APP
+	movq	%r9, %rbx
+	movq	%r8, %rbp
+/APP
+	movq  16(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rdi     
+	adcq  %rdx,%rbx     
+	adcq  $0,%rbp        
+	
+/NO_APP
+	movq	%rbp, %rax
+	movq	%rdi, -40(%rsp)
+	movq	%rbx, %rbp
+	movq	%rax, %rdi
+	movq	%rsi, %rbx
+/APP
+	movq  16(%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%rbp     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rbx        
+	addq  %rax,%rbp     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%rbp, -32(%rsp)
+	movq	%rbx, %r9
+/APP
+	movq  24(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rdi     
+	adcq  %rdx,%r9     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	16(%r11), %rdx
+	movq	%rdi, -24(%rsp)
+	movq	%r9, -16(%rsp)
+	movq	%r10, (%rdx)
+	movq	-64(%rsp), %r8
+	movq	%r8, 8(%rdx)
+	movq	-56(%rsp), %rbp
+	movq	%rbp, 16(%rdx)
+	movq	-48(%rsp), %rdi
+	movq	%rdi, 24(%rdx)
+	movq	-40(%rsp), %rsi
+	movq	%rsi, 32(%rdx)
+	movq	-32(%rsp), %rbx
+	movq	%rbx, 40(%rdx)
+	movq	-24(%rsp), %rcx
+	movq	%rcx, 48(%rdx)
+	movq	-16(%rsp), %rax
+	movq	%rax, 56(%rdx)
+	movl	8(%r11), %edx
+	testl	%edx, %edx
+	je	.L168
+	leal	-1(%rdx), %ecx
+	movq	16(%r11), %rsi
+	mov	%ecx, %r10d
+	cmpq	$0, (%rsi,%r10,8)
+	jne	.L166
+	movl	%ecx, %edx
+	.align 16
+.L167:
+	testl	%edx, %edx
+	movl	%edx, %ecx
+	je	.L171
+	decl	%edx
+	mov	%edx, %eax
+	cmpq	$0, (%rsi,%rax,8)
+	je	.L167
+	movl	%ecx, 8(%r11)
+	movl	%ecx, %edx
+.L166:
+	testl	%edx, %edx
+	je	.L168
+	popq	%rbx
+	popq	%rbp
+	movl	(%r11), %eax
+	movl	%eax, (%r11)
+	ret
+.L171:
+	movl	%edx, 8(%r11)
+	.align 16
+.L168:
+	popq	%rbx
+	popq	%rbp
+	xorl	%eax, %eax
+	movl	%eax, (%r11)
+	ret
+.LFE6:
+	.size	s_mp_sqr_comba_4, .-s_mp_sqr_comba_4
+	.align 16
+.globl s_mp_sqr_comba_8
+	.type	s_mp_sqr_comba_8, @function
+s_mp_sqr_comba_8:
+.LFB7:
+	pushq	%r14
+.LCFI19:
+	xorl	%r9d, %r9d
+	movq	%r9, %r14
+	movq	%r9, %r10
+	pushq	%r13
+.LCFI20:
+	movq	%r9, %r13
+	pushq	%r12
+.LCFI21:
+	movq	%r9, %r12
+	pushq	%rbp
+.LCFI22:
+	movq	%rsi, %rbp
+	movq	%r9, %rsi
+	pushq	%rbx
+.LCFI23:
+	movq	%r9, %rbx
+	subq	$8, %rsp
+.LCFI24:
+	movq	16(%rdi), %rcx
+/APP
+	movq  (%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r14     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r14, -120(%rsp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  8(%rcx)           
+	addq  %rax,%rbx     
+	adcq  %rdx,%r12     
+	adcq  $0,%r10        
+	addq  %rax,%rbx     
+	adcq  %rdx,%r12     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%rbx, -112(%rsp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  16(%rcx)           
+	addq  %rax,%r12     
+	adcq  %rdx,%r10     
+	adcq  $0,%r13        
+	addq  %rax,%r12     
+	adcq  %rdx,%r10     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r10, %rbx
+	movq	%r13, %r10
+	movq	%r9, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r12     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r12, -104(%rsp)
+	movq	%r10, %rdi
+	movq	%rbx, %r11
+/APP
+	movq  (%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	addq  %rax,%r11     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %rbx
+	movq	%rsi, %r10
+	movq	%r9, %rdi
+/APP
+	movq  8(%rcx),%rax     
+	mulq  16(%rcx)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r10        
+	addq  %rax,%r11     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r9, %rsi
+	movq	%r11, -96(%rsp)
+	movq	%r10, %r8
+	movq	%rbx, %r12
+	movq	%r9, %r11
+/APP
+	movq  (%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r12     
+	adcq  %rdx,%r8     
+	adcq  $0,%r13        
+	addq  %rax,%r12     
+	adcq  %rdx,%r8     
+	adcq  $0,%r13        
+	
+	movq  8(%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r12     
+	adcq  %rdx,%r8     
+	adcq  $0,%r13        
+	addq  %rax,%r12     
+	adcq  %rdx,%r8     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%r13, %r10
+	movq	%r9, %r8
+/APP
+	movq  16(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r12     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r12, -88(%rsp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  40(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%r11         
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%r11         
+	
+/NO_APP
+	movq	%rbx, -80(%rsp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  48(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%r11, %rbx
+	movq	%r13, %rdi
+	movq	%rdx, %r11
+	movq	%r12, %rsi
+/APP
+	movq  24(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r10, -72(%rsp)
+	movq	%r11, %r10
+/APP
+	movq  (%rcx),%rax     
+	mulq  56(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+/APP
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%rax         
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%rax         
+	
+/NO_APP
+	movq	%rbx, -64(%rsp)
+	movq	%rax, %r11
+	movq	%r9, %rbx
+/APP
+	movq  8(%rcx),%rax     
+	mulq  56(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	
+/NO_APP
+	movq	%rbx, %rsi
+	movq	%r13, %rdi
+	movq	%r11, %rbx
+	movq	%r12, %r13
+	movq	%rsi, %r11
+/APP
+	movq  32(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r10, -56(%rsp)
+	movq	%r9, %r10
+/APP
+	movq  16(%rcx),%rax     
+	mulq  56(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %r13,%r13        
+	
+	movq  24(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r13        
+	
+	movq  32(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%rdi, %r12
+	movq	%r13, %rax
+/APP
+	addq %r8,%rbx         
+	adcq %r12,%r11         
+	adcq %rax,%r10         
+	addq %r8,%rbx         
+	adcq %r12,%r11         
+	adcq %rax,%r10         
+	
+/NO_APP
+	movq	%rbx, -48(%rsp)
+	movq	%r11, %r12
+	movq	%r10, %rsi
+	movq	%r9, %rbx
+	movq	%r9, %r11
+/APP
+	movq  24(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r12     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rbx        
+	addq  %rax,%r12     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%rbx, %r13
+/APP
+	movq  32(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r12     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r13        
+	addq  %rax,%r12     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%rsi, %r10
+	movq	%r13, %rbx
+	movq	%r9, %r13
+/APP
+	movq  40(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r12     
+	adcq  %rdx,%r10     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r12, -40(%rsp)
+	movq	%rbx, %r8
+	movq	%r10, %rdi
+/APP
+	movq  32(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r11        
+	addq  %rax,%rdi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r8, %r10
+	movq	%r11, %rbx
+/APP
+	movq  40(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%r10     
+	adcq  $0,%rbx        
+	addq  %rax,%rdi     
+	adcq  %rdx,%r10     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%rdi, -32(%rsp)
+	movq	%rbx, %rsi
+	movq	%r10, %r12
+/APP
+	movq  40(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r12     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r13        
+	addq  %rax,%r12     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%rsi, %r10
+	movq	%r13, %rbx
+/APP
+	movq  48(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r12     
+	adcq  %rdx,%r10     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r12, -24(%rsp)
+	movq	%r10, %rdi
+	movq	%rbx, %rsi
+	movq	%r9, %r10
+	movl	$16, 8(%rbp)
+	movl	$0, (%rbp)
+/APP
+	movq  48(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r10        
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%rdi, -16(%rsp)
+	movq	%r10, %r8
+/APP
+	movq  56(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r9        
+	
+/NO_APP
+	movq	16(%rbp), %rax
+	movq	%rsi, -8(%rsp)
+	movq	%r8, (%rsp)
+	movq	%r14, (%rax)
+	movq	-112(%rsp), %rbx
+	movq	%rbx, 8(%rax)
+	movq	-104(%rsp), %rcx
+	movq	%rcx, 16(%rax)
+	movq	-96(%rsp), %rdx
+	movq	%rdx, 24(%rax)
+	movq	-88(%rsp), %r14
+	movq	%r14, 32(%rax)
+	movq	-80(%rsp), %r13
+	movq	%r13, 40(%rax)
+	movq	-72(%rsp), %r12
+	movq	%r12, 48(%rax)
+	movq	-64(%rsp), %r11
+	movq	%r11, 56(%rax)
+	movq	-56(%rsp), %r10
+	movq	%r10, 64(%rax)
+	movq	-48(%rsp), %r9
+	movq	%r9, 72(%rax)
+	movq	-40(%rsp), %r8
+	movq	%r8, 80(%rax)
+	movq	-32(%rsp), %rdi
+	movq	%rdi, 88(%rax)
+	movq	-24(%rsp), %rsi
+	movq	%rsi, 96(%rax)
+	movq	-16(%rsp), %rbx
+	movq	%rbx, 104(%rax)
+	movq	-8(%rsp), %rcx
+	movq	%rcx, 112(%rax)
+	movq	(%rsp), %rdx
+	movq	%rdx, 120(%rax)
+	movl	8(%rbp), %edx
+	testl	%edx, %edx
+	je	.L192
+	leal	-1(%rdx), %ecx
+	movq	16(%rbp), %rsi
+	mov	%ecx, %r14d
+	cmpq	$0, (%rsi,%r14,8)
+	jne	.L190
+	movl	%ecx, %edx
+	.align 16
+.L191:
+	testl	%edx, %edx
+	movl	%edx, %ecx
+	je	.L195
+	decl	%edx
+	mov	%edx, %r9d
+	cmpq	$0, (%rsi,%r9,8)
+	je	.L191
+	movl	%ecx, 8(%rbp)
+	movl	%ecx, %edx
+.L190:
+	testl	%edx, %edx
+	je	.L192
+	movl	(%rbp), %eax
+	movl	%eax, (%rbp)
+	addq	$8, %rsp
+	popq	%rbx
+	popq	%rbp
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	ret
+.L195:
+	movl	%edx, 8(%rbp)
+	.align 16
+.L192:
+	xorl	%eax, %eax
+	movl	%eax, (%rbp)
+	addq	$8, %rsp
+	popq	%rbx
+	popq	%rbp
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	ret
+.LFE7:
+	.size	s_mp_sqr_comba_8, .-s_mp_sqr_comba_8
+	.align 16
+.globl s_mp_sqr_comba_16
+	.type	s_mp_sqr_comba_16, @function
+s_mp_sqr_comba_16:
+.LFB8:
+	pushq	%rbp
+.LCFI25:
+	xorl	%r9d, %r9d
+	movq	%r9, %r8
+	movq	%r9, %r11
+	movq	%rsp, %rbp
+.LCFI26:
+	pushq	%r14
+.LCFI27:
+	movq	%rsi, %r14
+	movq	%r9, %rsi
+	pushq	%r13
+.LCFI28:
+	movq	%r9, %r13
+	pushq	%r12
+.LCFI29:
+	movq	%r9, %r12
+	pushq	%rbx
+.LCFI30:
+	movq	%r9, %rbx
+	subq	$256, %rsp
+.LCFI31:
+	movq	16(%rdi), %rcx
+/APP
+	movq  (%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r8     
+	adcq  %rdx,%rbx     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, -288(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  8(%rcx)           
+	addq  %rax,%rbx     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r12        
+	addq  %rax,%rbx     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rbx, -280(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  16(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	addq  %rax,%rsi     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r12, %rbx
+	movq	%r13, %r10
+/APP
+	movq  8(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rsi     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%rsi, -272(%rbp)
+	movq	%r10, %rdi
+	movq	%r9, %rsi
+	movq	%rbx, %r10
+/APP
+	movq  (%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r10     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r11        
+	addq  %rax,%r10     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rdi, %r12
+	movq	%r11, %rbx
+	movq	%r9, %rdi
+/APP
+	movq  8(%rcx),%rax     
+	mulq  16(%rcx)           
+	addq  %rax,%r10     
+	adcq  %rdx,%r12     
+	adcq  $0,%rbx        
+	addq  %rax,%r10     
+	adcq  %rdx,%r12     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r9, %r11
+	movq	%r10, -264(%rbp)
+	movq	%rbx, %r8
+	movq	%r12, %r13
+	movq	%r9, %r12
+/APP
+	movq  (%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r13     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	addq  %rax,%r13     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	
+	movq  8(%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r13     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	addq  %rax,%r13     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%r12, %r10
+	movq	%r9, %r8
+/APP
+	movq  16(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r13     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r13, -256(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  40(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%r11         
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%r11         
+	
+/NO_APP
+	movq	%rbx, -248(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  48(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%r11, %rbx
+	movq	%r13, %rdi
+	movq	%rdx, %r11
+	movq	%r12, %rsi
+/APP
+	movq  24(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r10, -240(%rbp)
+	movq	%r11, %r10
+/APP
+	movq  (%rcx),%rax     
+	mulq  56(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rdx
+/APP
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%rdx         
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%rdx         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rbx, -232(%rbp)
+	movq	%r9, %rbx
+/APP
+	movq  (%rcx),%rax     
+	mulq  64(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	
+	movq  32(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%r11     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r13, %rdi
+	movq	%r10, -224(%rbp)
+	movq	%r12, %rsi
+	movq	%rbx, %r10
+	movq	%r9, %r12
+/APP
+	movq  (%rcx),%rax     
+	mulq  72(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%r11         
+	adcq %rdi,%r10         
+	adcq %rsi,%r12         
+	addq %r8,%r11         
+	adcq %rdi,%r10         
+	adcq %rsi,%r12         
+	
+/NO_APP
+	movq	%r11, -216(%rbp)
+	movq	%r12, %rbx
+/APP
+	movq  (%rcx),%rax     
+	mulq  80(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%rbx         
+	adcq %r12,%rax         
+	addq %r8,%r10         
+	adcq %r13,%rbx         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%rbx, %r11
+	movq	%r13, %rdi
+	movq	%rdx, %rbx
+	movq	%r12, %rsi
+/APP
+	movq  40(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%r11     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r10, -208(%rbp)
+	movq	%rbx, %r10
+/APP
+	movq  (%rcx),%rax     
+	mulq  88(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rdx
+/APP
+	addq %r8,%r11         
+	adcq %rdi,%r10         
+	adcq %rsi,%rdx         
+	addq %r8,%r11         
+	adcq %rdi,%r10         
+	adcq %rsi,%rdx         
+	
+/NO_APP
+	movq	%rdx, %r13
+	movq	%r11, -200(%rbp)
+	movq	%r13, %r12
+/APP
+	movq  (%rcx),%rax     
+	mulq  96(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+	movq	%rdi, %rdx
+	movq	%rsi, %r11
+/APP
+	addq %r8,%r10         
+	adcq %rdx,%r12         
+	adcq %r11,%rax         
+	addq %r8,%r10         
+	adcq %rdx,%r12         
+	adcq %r11,%rax         
+	
+/NO_APP
+	movq	%rdx, %rbx
+	movq	%rax, %r13
+	movq	%r11, %rsi
+/APP
+	movq  48(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%rbx, %rdi
+	movq	%r10, -192(%rbp)
+	movq	%r13, %r10
+/APP
+	movq  (%rcx),%rax     
+	mulq  104(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r9, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%r12         
+	adcq %rdi,%r10         
+	adcq %rsi,%r13         
+	addq %r8,%r12         
+	adcq %rdi,%r10         
+	adcq %rsi,%r13         
+	
+/NO_APP
+	movq	%r12, -184(%rbp)
+	movq	%r13, %r12
+/APP
+	movq  (%rcx),%rax     
+	mulq  112(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+	movq	%rdi, %rbx
+	movq	%rsi, %rdx
+/APP
+	addq %r8,%r10         
+	adcq %rbx,%r12         
+	adcq %rdx,%rax         
+	addq %r8,%r10         
+	adcq %rbx,%r12         
+	adcq %rdx,%rax         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rbx, %rdi
+/APP
+	movq  56(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r10, -176(%rbp)
+	movq	%r13, %r10
+/APP
+	movq  (%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r9, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%r12         
+	adcq %rdi,%r10         
+	adcq %rsi,%r13         
+	addq %r8,%r12         
+	adcq %rdi,%r10         
+	adcq %rsi,%r13         
+	
+/NO_APP
+	movq	%r12, -168(%rbp)
+	movq	%r13, %r12
+/APP
+	movq  8(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+	movq	%rdi, %rbx
+	movq	%rsi, %rdx
+/APP
+	addq %r8,%r10         
+	adcq %rbx,%r12         
+	adcq %rdx,%rax         
+	addq %r8,%r10         
+	adcq %rbx,%r12         
+	adcq %rdx,%rax         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rbx, %rdi
+/APP
+	movq  64(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r10, -160(%rbp)
+	movq	%r9, %r11
+/APP
+	movq  16(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r13, %r10
+	movq	%r9, %rbx
+/APP
+	movq  24(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%r12         
+	adcq %rdi,%r10         
+	adcq %rsi,%r11         
+	addq %r8,%r12         
+	adcq %rdi,%r10         
+	adcq %rsi,%r11         
+	
+/NO_APP
+	movq	%r12, -152(%rbp)
+/APP
+	movq  24(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	
+/NO_APP
+	movq	%rbx, %rdx
+	movq	%r13, %rdi
+	movq	%r11, %rbx
+	movq	%r12, %rsi
+	movq	%rdx, %r11
+	movq	%r9, %r12
+/APP
+	movq  72(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r10, -144(%rbp)
+	movq	%r11, %r10
+/APP
+	movq  32(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%r12         
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%r12         
+	
+/NO_APP
+	movq	%rbx, -136(%rbp)
+	movq	%r12, %r11
+/APP
+	movq  40(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%r11, %rbx
+	movq	%r13, %rdi
+	movq	%rdx, %r11
+	movq	%r12, %rsi
+/APP
+	movq  80(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r10, -128(%rbp)
+	movq	%r11, %r10
+/APP
+	movq  48(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  80(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rdx
+/APP
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%rdx         
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%rdx         
+	
+/NO_APP
+	movq	%rbx, -120(%rbp)
+	movq	%rdx, %r11
+	movq	%r9, %rbx
+/APP
+	movq  56(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  80(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	
+/NO_APP
+	movq	%rbx, %rdx
+	movq	%r13, %rdi
+	movq	%r11, %rbx
+	movq	%r12, %rsi
+	movq	%rdx, %r11
+	movq	%r9, %r12
+/APP
+	movq  88(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r10, -112(%rbp)
+	movq	%r11, %r10
+/APP
+	movq  64(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  80(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  88(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%r12         
+	addq %r8,%rbx         
+	adcq %rdi,%r10         
+	adcq %rsi,%r12         
+	
+/NO_APP
+	movq	%rbx, -104(%rbp)
+	movq	%r12, %r11
+/APP
+	movq  72(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  80(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  88(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r9, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	addq %r8,%r10         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%r11, %rbx
+	movq	%r13, %rdi
+	movq	%rdx, %r11
+	movq	%r12, %rsi
+/APP
+	movq  96(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r10     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r10, -96(%rbp)
+	movq	%r9, %r10
+/APP
+	movq  80(%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  88(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  96(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r12
+	movq	%rsi, %rax
+	movq	%r9, %rsi
+/APP
+	addq %r8,%rbx         
+	adcq %r12,%r11         
+	adcq %rax,%r10         
+	addq %r8,%rbx         
+	adcq %r12,%r11         
+	adcq %rax,%r10         
+	
+/NO_APP
+	movq	%r9, %r12
+	movq	%rbx, -88(%rbp)
+	movq	%r11, %r13
+	movq	%r10, %r11
+/APP
+	movq  88(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%r13     
+	adcq  %rdx,%r11     
+	adcq  $0,%r12        
+	addq  %rax,%r13     
+	adcq  %rdx,%r11     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r12, %rdi
+/APP
+	movq  96(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r13     
+	adcq  %rdx,%r11     
+	adcq  $0,%rdi        
+	addq  %rax,%r13     
+	adcq  %rdx,%r11     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r11, %rbx
+	movq	%rdi, %r10
+	movq	%r9, %r11
+/APP
+	movq  104(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r13     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%r13, -80(%rbp)
+	movq	%r10, %r8
+	movq	%rbx, %r10
+/APP
+	movq  96(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%r10     
+	adcq  %rdx,%r8     
+	adcq  $0,%rsi        
+	addq  %rax,%r10     
+	adcq  %rdx,%r8     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %r12
+	movq	%rsi, %rbx
+/APP
+	movq  104(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r10     
+	adcq  %rdx,%r12     
+	adcq  $0,%rbx        
+	addq  %rax,%r10     
+	adcq  %rdx,%r12     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r10, -72(%rbp)
+	movq	%rbx, %r13
+	movq	%r12, %rbx
+/APP
+	movq  104(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rbx     
+	adcq  %rdx,%r13     
+	adcq  $0,%r11        
+	addq  %rax,%rbx     
+	adcq  %rdx,%r13     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r11, %r12
+	movq	%r13, %r10
+/APP
+	movq  112(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rbx     
+	adcq  %rdx,%r10     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rbx, -64(%rbp)
+	movq	%r10, %rdi
+	movq	%r9, %rbx
+	movq	%r12, %rsi
+/APP
+	movq  112(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rbx        
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%rdi, -56(%rbp)
+	movq	%rbx, %r8
+/APP
+	movq  120(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r9        
+	
+/NO_APP
+	movq	%rsi, -48(%rbp)
+	movq	16(%r14), %rdi
+	leaq	-288(%rbp), %rsi
+	movl	$256, %edx
+	movq	%r8, -40(%rbp)
+	movl	$32, 8(%r14)
+	movl	$0, (%r14)
+	call	memcpy@PLT
+	movl	8(%r14), %edx
+	testl	%edx, %edx
+	je	.L232
+	leal	-1(%rdx), %ecx
+	movq	16(%r14), %rsi
+	mov	%ecx, %r9d
+	cmpq	$0, (%rsi,%r9,8)
+	jne	.L230
+	movl	%ecx, %edx
+	.align 16
+.L231:
+	testl	%edx, %edx
+	movl	%edx, %ecx
+	je	.L235
+	decl	%edx
+	mov	%edx, %eax
+	cmpq	$0, (%rsi,%rax,8)
+	je	.L231
+	movl	%ecx, 8(%r14)
+	movl	%ecx, %edx
+.L230:
+	testl	%edx, %edx
+	je	.L232
+	movl	(%r14), %eax
+	movl	%eax, (%r14)
+	addq	$256, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	leave
+	ret
+.L235:
+	movl	%edx, 8(%r14)
+	.align 16
+.L232:
+	xorl	%eax, %eax
+	movl	%eax, (%r14)
+	addq	$256, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	leave
+	ret
+.LFE8:
+	.size	s_mp_sqr_comba_16, .-s_mp_sqr_comba_16
+	.align 16
+.globl s_mp_sqr_comba_32
+	.type	s_mp_sqr_comba_32, @function
+s_mp_sqr_comba_32:
+.LFB9:
+	pushq	%rbp
+.LCFI32:
+	xorl	%r10d, %r10d
+	movq	%r10, %r8
+	movq	%r10, %r11
+	movq	%rsp, %rbp
+.LCFI33:
+	pushq	%r14
+.LCFI34:
+	movq	%rsi, %r14
+	movq	%r10, %rsi
+	pushq	%r13
+.LCFI35:
+	movq	%r10, %r13
+	pushq	%r12
+.LCFI36:
+	movq	%r10, %r12
+	pushq	%rbx
+.LCFI37:
+	movq	%r10, %rbx
+	subq	$512, %rsp
+.LCFI38:
+	movq	16(%rdi), %rcx
+/APP
+	movq  (%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r8     
+	adcq  %rdx,%rbx     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, -544(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  8(%rcx)           
+	addq  %rax,%rbx     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r12        
+	addq  %rax,%rbx     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rbx, -536(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  16(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	addq  %rax,%rsi     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r12, %rbx
+	movq	%r13, %r9
+/APP
+	movq  8(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rsi     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r9        
+	
+/NO_APP
+	movq	%rsi, -528(%rbp)
+	movq	%r9, %rdi
+	movq	%r10, %rsi
+	movq	%rbx, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r9     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r11        
+	addq  %rax,%r9     
+	adcq  %rdx,%rdi     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%rdi, %r12
+	movq	%r11, %r13
+	movq	%r10, %rdi
+/APP
+	movq  8(%rcx),%rax     
+	mulq  16(%rcx)           
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r10, %r11
+	movq	%r9, -520(%rbp)
+	movq	%r13, %r8
+	movq	%r12, %r13
+	movq	%r10, %r12
+/APP
+	movq  (%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r13     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	addq  %rax,%r13     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	
+	movq  8(%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r13     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	addq  %rax,%r13     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%r12, %r9
+	movq	%r10, %r8
+/APP
+	movq  16(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r13     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r9        
+	
+/NO_APP
+	movq	%r13, -512(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  40(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  24(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%r11         
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%r11         
+	
+/NO_APP
+	movq	%rbx, -504(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  48(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r10, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%r11, %rbx
+	movq	%r13, %rdi
+	movq	%rdx, %r11
+	movq	%r12, %rsi
+/APP
+	movq  24(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r9, -496(%rbp)
+	movq	%r11, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  56(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  32(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r10, %rdx
+/APP
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%rdx         
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%rdx         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rbx, -488(%rbp)
+	movq	%r10, %rbx
+/APP
+	movq  (%rcx),%rax     
+	mulq  64(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	
+	movq  32(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r11     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r13, %rdi
+	movq	%r9, -480(%rbp)
+	movq	%r12, %rsi
+	movq	%rbx, %r9
+	movq	%r10, %r12
+/APP
+	movq  (%rcx),%rax     
+	mulq  72(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  40(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%r11         
+	adcq %rdi,%r9         
+	adcq %rsi,%r12         
+	addq %r8,%r11         
+	adcq %rdi,%r9         
+	adcq %rsi,%r12         
+	
+/NO_APP
+	movq	%r11, -472(%rbp)
+	movq	%r12, %rbx
+/APP
+	movq  (%rcx),%rax     
+	mulq  80(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r10, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r9         
+	adcq %r13,%rbx         
+	adcq %r12,%rax         
+	addq %r8,%r9         
+	adcq %r13,%rbx         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%rbx, %r11
+	movq	%r13, %rdi
+	movq	%rdx, %rbx
+	movq	%r12, %rsi
+/APP
+	movq  40(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r11     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r9, -464(%rbp)
+	movq	%rbx, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  88(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  48(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r10, %rdx
+/APP
+	addq %r8,%r11         
+	adcq %rdi,%r9         
+	adcq %rsi,%rdx         
+	addq %r8,%r11         
+	adcq %rdi,%r9         
+	adcq %rsi,%rdx         
+	
+/NO_APP
+	movq	%rdx, %r13
+	movq	%r11, -456(%rbp)
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  96(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %rax
+	movq	%rsi, %r11
+/APP
+	addq %r8,%r9         
+	adcq %rax,%r12         
+	adcq %r11,%r13         
+	addq %r8,%r9         
+	adcq %rax,%r12         
+	adcq %r11,%r13         
+	
+/NO_APP
+	movq	%rax, %rbx
+	movq	%r11, %rsi
+/APP
+	movq  48(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%rbx, %rdi
+	movq	%r9, -448(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  104(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  56(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%r12         
+	adcq %rdi,%r9         
+	adcq %rsi,%r13         
+	addq %r8,%r12         
+	adcq %rdi,%r9         
+	adcq %rsi,%r13         
+	
+/NO_APP
+	movq	%r12, -440(%rbp)
+	movq	%r10, %r12
+/APP
+	movq  (%rcx),%rax     
+	mulq  112(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r13, %rdx
+	movq	%rdi, %rbx
+	movq	%rsi, %r13
+/APP
+	addq %r8,%r9         
+	adcq %rbx,%rdx         
+	adcq %r13,%r12         
+	addq %r8,%r9         
+	adcq %rbx,%rdx         
+	adcq %r13,%r12         
+	
+/NO_APP
+	movq	%r12, %rax
+	movq	%r13, %r11
+	movq	%rdx, %r12
+	movq	%rax, %r13
+	movq	%rbx, %rdi
+	movq	%r11, %rsi
+/APP
+	movq  56(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r9, -432(%rbp)
+	movq	%r13, %r9
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  120(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  8(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  64(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rax
+	movq	%rdi, %rdx
+	movq	%rsi, %rbx
+/APP
+	addq %rax,%r12         
+	adcq %rdx,%r9         
+	adcq %rbx,%r13         
+	addq %rax,%r12         
+	adcq %rdx,%r9         
+	adcq %rbx,%r13         
+	
+/NO_APP
+	movq	%r12, -424(%rbp)
+	movq	%rdx, %r8
+	movq	%rax, %rsi
+	movq	%rbx, %rdi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  128(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%rsi, %rax
+	movq	%r8, %rbx
+	movq	%rdi, %rdx
+/APP
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r8
+	movq	%rbx, %rdi
+/APP
+	movq  64(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -416(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  136(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  72(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%rdi, %rax
+	movq	%rsi, %rdx
+/APP
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%r12, -408(%rbp)
+	movq	%rdx, %rdi
+	movq	%rax, %r8
+	movq	%rbx, %rsi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  144(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%rsi, %rax
+	movq	%r8, %rbx
+	movq	%rdi, %rdx
+/APP
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r8
+	movq	%rbx, %rdi
+/APP
+	movq  72(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -400(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  152(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  80(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%rdi, %rax
+	movq	%rsi, %rdx
+/APP
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%r12, -392(%rbp)
+	movq	%rdx, %rdi
+	movq	%rax, %r8
+	movq	%rbx, %rsi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  160(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%rsi, %rax
+	movq	%r8, %rbx
+	movq	%rdi, %rdx
+/APP
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r8
+	movq	%rbx, %rdi
+/APP
+	movq  80(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -384(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  168(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  80(%rcx),%rax     
+	mulq  88(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%rdi, %rax
+	movq	%rsi, %rdx
+/APP
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%r12, -376(%rbp)
+	movq	%rdx, %rdi
+	movq	%rax, %r8
+	movq	%rbx, %rsi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  176(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%rsi, %rax
+	movq	%r8, %rbx
+	movq	%rdi, %rdx
+/APP
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r8
+	movq	%rbx, %rdi
+/APP
+	movq  88(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -368(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  184(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  16(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  24(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  32(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  40(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  48(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  56(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  64(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  80(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  88(%rcx),%rax     
+	mulq  96(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%rdi, %rax
+	movq	%rsi, %rdx
+/APP
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %rdi
+	movq	%r12, -360(%rbp)
+	movq	%rax, %r8
+	movq	%rbx, %rsi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  192(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%rdi, %rax
+/APP
+	addq %rsi,%r9         
+	adcq %rbx,%r12         
+	adcq %rax,%r13         
+	addq %rsi,%r9         
+	adcq %rbx,%r12         
+	adcq %rax,%r13         
+	
+/NO_APP
+	movq	%rax, %r11
+	movq	%rbx, %r8
+/APP
+	movq  96(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rdi
+	movq	%r9, -352(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  200(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  104(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r13         
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r13         
+	
+/NO_APP
+	movq	%r12, -344(%rbp)
+	movq	%r10, %r12
+/APP
+	movq  (%rcx),%rax     
+	mulq  208(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r13, %rdx
+	movq	%r8, %rbx
+	movq	%rdi, %r13
+/APP
+	addq %rsi,%r9         
+	adcq %rbx,%rdx         
+	adcq %r13,%r12         
+	addq %rsi,%r9         
+	adcq %rbx,%rdx         
+	adcq %r13,%r12         
+	
+/NO_APP
+	movq	%r12, %rax
+	movq	%r13, %r11
+	movq	%rdx, %r12
+	movq	%rax, %r13
+	movq	%rbx, %r8
+	movq	%r11, %rdi
+/APP
+	movq  104(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r9, -336(%rbp)
+	movq	%r13, %r9
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  216(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  112(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r13         
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r13         
+	
+/NO_APP
+	movq	%r12, -328(%rbp)
+/APP
+	movq  (%rcx),%rax     
+	mulq  224(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r13, %rax
+	movq	%r10, %rdx
+	movq	%r8, %rbx
+	movq	%rdi, %r12
+/APP
+	addq %rsi,%r9         
+	adcq %rbx,%rax         
+	adcq %r12,%rdx         
+	addq %rsi,%r9         
+	adcq %rbx,%rax         
+	adcq %r12,%rdx         
+	
+/NO_APP
+	movq	%rdx, %rdi
+	movq	%r12, %r11
+	movq	%rbx, %r8
+	movq	%rax, %r12
+	movq	%rdi, %r13
+	movq	%r11, %rdi
+/APP
+	movq  112(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r9, -320(%rbp)
+	movq	%r13, %rbx
+	movq	%r10, %r9
+/APP
+	movq  (%rcx),%rax     
+	mulq  232(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  120(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	addq %rsi,%r12         
+	adcq %r8,%rbx         
+	adcq %rdi,%r9         
+	addq %rsi,%r12         
+	adcq %r8,%rbx         
+	adcq %rdi,%r9         
+	
+/NO_APP
+	movq	%r12, -312(%rbp)
+	movq	%r9, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  240(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r10, %rax
+	movq	%r8, %r11
+	movq	%rdi, %rdx
+/APP
+	addq %rsi,%rbx         
+	adcq %r11,%r13         
+	adcq %rdx,%rax         
+	addq %rsi,%rbx         
+	adcq %r11,%r13         
+	adcq %rdx,%rax         
+	
+/NO_APP
+	movq	%rdx, %r9
+	movq	%rax, %rdx
+	movq	%r13, %r12
+	movq	%r11, %r8
+	movq	%rdx, %r13
+	movq	%r9, %rdi
+/APP
+	movq  120(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rbx     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%rbx, -304(%rbp)
+	movq	%r13, %rbx
+	movq	%r10, %r13
+/APP
+	movq  (%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  8(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  128(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	addq %rsi,%r12         
+	adcq %r8,%rbx         
+	adcq %rdi,%r13         
+	addq %rsi,%r12         
+	adcq %r8,%rbx         
+	adcq %rdi,%r13         
+	
+/NO_APP
+	movq	%r12, -296(%rbp)
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  8(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  16(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  24(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r8, %r11
+	movq	%rdi, %rax
+/APP
+	addq %rsi,%rbx         
+	adcq %r11,%r12         
+	adcq %rax,%r13         
+	addq %rsi,%rbx         
+	adcq %r11,%r12         
+	adcq %rax,%r13         
+	
+/NO_APP
+	movq	%rax, %r9
+	movq	%r11, %r8
+/APP
+	movq  128(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rbx     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r9, %rdi
+	movq	%rbx, -288(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  16(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  24(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  136(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r13         
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r13         
+	
+/NO_APP
+	movq	%r12, -280(%rbp)
+	movq	%r10, %r12
+/APP
+	movq  24(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  32(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r13, %rdx
+	movq	%r8, %rbx
+	movq	%rdi, %r13
+/APP
+	addq %rsi,%r9         
+	adcq %rbx,%rdx         
+	adcq %r13,%r12         
+	addq %rsi,%r9         
+	adcq %rbx,%rdx         
+	adcq %r13,%r12         
+	
+/NO_APP
+	movq	%r12, %rax
+	movq	%r13, %r11
+	movq	%rdx, %r12
+	movq	%rax, %r13
+	movq	%rbx, %r8
+	movq	%r11, %rdi
+/APP
+	movq  136(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r9, -272(%rbp)
+	movq	%r13, %r9
+	movq	%r10, %r13
+/APP
+	movq  32(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  40(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  136(%rcx),%rax     
+	mulq  144(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r13         
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r13         
+	
+/NO_APP
+	movq	%r12, -264(%rbp)
+/APP
+	movq  40(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  48(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  136(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r13, %rax
+	movq	%r10, %rdx
+	movq	%r8, %rbx
+	movq	%rdi, %r12
+/APP
+	addq %rsi,%r9         
+	adcq %rbx,%rax         
+	adcq %r12,%rdx         
+	addq %rsi,%r9         
+	adcq %rbx,%rax         
+	adcq %r12,%rdx         
+	
+/NO_APP
+	movq	%rdx, %rdi
+	movq	%r12, %r11
+	movq	%rbx, %r8
+	movq	%rax, %r12
+	movq	%rdi, %r13
+	movq	%r11, %rdi
+/APP
+	movq  144(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r10, %r11
+	movq	%r9, -256(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  48(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  56(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  136(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  144(%rcx),%rax     
+	mulq  152(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r11         
+	addq %rsi,%r12         
+	adcq %r8,%r9         
+	adcq %rdi,%r11         
+	
+/NO_APP
+	movq	%r12, -248(%rbp)
+	movq	%r11, %r13
+/APP
+	movq  56(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  64(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  72(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  136(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  144(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%r10, %rax
+	movq	%rsi, %rdx
+	movq	%r8, %rbx
+	movq	%rdi, %r12
+/APP
+	addq %rdx,%r9         
+	adcq %rbx,%r13         
+	adcq %r12,%rax         
+	addq %rdx,%r9         
+	adcq %rbx,%r13         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%r12, %r11
+	movq	%rdx, %r8
+	movq	%rax, %rdx
+	movq	%r13, %r12
+	movq	%rbx, %rdi
+	movq	%rdx, %r13
+	movq	%r11, %rsi
+/APP
+	movq  152(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r9, -240(%rbp)
+	movq	%r13, %r9
+	movq	%r10, %r13
+/APP
+	movq  64(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  72(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  80(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  88(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  96(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  104(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  112(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  120(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  128(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  136(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  144(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  152(%rcx),%rax     
+	mulq  160(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rax
+	movq	%rdi, %rdx
+	movq	%rsi, %rbx
+/APP
+	addq %rax,%r12         
+	adcq %rdx,%r9         
+	adcq %rbx,%r13         
+	addq %rax,%r12         
+	adcq %rdx,%r9         
+	adcq %rbx,%r13         
+	
+/NO_APP
+	movq	%r12, -232(%rbp)
+	movq	%rdx, %r8
+	movq	%rax, %rsi
+	movq	%rbx, %rdi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  72(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  80(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  88(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  136(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  144(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  152(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%rsi, %rax
+	movq	%r8, %rbx
+	movq	%rdi, %rdx
+/APP
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r8
+	movq	%rbx, %rdi
+/APP
+	movq  160(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -224(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  80(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  88(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  96(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  104(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  112(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  120(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  128(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  136(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  144(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  152(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  160(%rcx),%rax     
+	mulq  168(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%rdi, %rax
+	movq	%rsi, %rdx
+/APP
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%r12, -216(%rbp)
+	movq	%rdx, %rdi
+	movq	%rax, %r8
+	movq	%rbx, %rsi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  88(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  96(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  104(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  136(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  144(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  152(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  160(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%rsi, %rax
+	movq	%r8, %rbx
+	movq	%rdi, %rdx
+/APP
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r8
+	movq	%rbx, %rdi
+/APP
+	movq  168(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -208(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  96(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  104(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  112(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  120(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  128(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  136(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  144(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  152(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  160(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  168(%rcx),%rax     
+	mulq  176(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%rdi, %rax
+	movq	%rsi, %rdx
+/APP
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%r12, -200(%rbp)
+	movq	%rdx, %rdi
+	movq	%rax, %r8
+	movq	%rbx, %rsi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  104(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  112(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  120(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  136(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  144(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  152(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  160(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  168(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%rsi, %rax
+	movq	%r8, %rbx
+	movq	%rdi, %rdx
+/APP
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r8
+	movq	%rbx, %rdi
+/APP
+	movq  176(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -192(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  112(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  120(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  128(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  136(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  144(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  152(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  160(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  168(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  176(%rcx),%rax     
+	mulq  184(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r8, %rbx
+	movq	%rdi, %rax
+	movq	%rsi, %rdx
+/APP
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	addq %rbx,%r12         
+	adcq %rax,%r9         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%r12, -184(%rbp)
+	movq	%rdx, %rdi
+	movq	%rax, %r8
+	movq	%rbx, %rsi
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  120(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%rsi     
+	movq  %rdx,%r8     
+	xorq  %rdi,%rdi        
+	
+	movq  128(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  136(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  144(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  152(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  160(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  168(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+	movq  176(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%rdi        
+	
+/NO_APP
+	movq	%rsi, %rax
+	movq	%r8, %rbx
+	movq	%rdi, %rdx
+/APP
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	addq %rax,%r9         
+	adcq %rbx,%r12         
+	adcq %rdx,%r13         
+	
+/NO_APP
+	movq	%rdx, %r11
+	movq	%rax, %r8
+	movq	%rbx, %rdi
+/APP
+	movq  184(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -176(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  128(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+/NO_APP
+	movq	%r10, %r13
+/APP
+	movq  136(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  144(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  152(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  160(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  168(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  176(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  184(%rcx),%rax     
+	mulq  192(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%r12         
+	adcq %rdi,%r9         
+	adcq %rsi,%r13         
+	addq %r8,%r12         
+	adcq %rdi,%r9         
+	adcq %rsi,%r13         
+	
+/NO_APP
+	movq	%r12, -168(%rbp)
+	movq	%r13, %r12
+	movq	%r10, %r13
+/APP
+	movq  136(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  144(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  152(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  160(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  168(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  176(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  184(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %rbx
+	movq	%rsi, %rax
+/APP
+	addq %r8,%r9         
+	adcq %rbx,%r12         
+	adcq %rax,%r13         
+	addq %r8,%r9         
+	adcq %rbx,%r12         
+	adcq %rax,%r13         
+	
+/NO_APP
+	movq	%rax, %r11
+	movq	%rbx, %rdi
+	movq	%r10, %rbx
+/APP
+	movq  192(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r11, %rsi
+	movq	%r9, -160(%rbp)
+	movq	%r13, %r9
+/APP
+	movq  144(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  152(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  160(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  168(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  176(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  184(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  192(%rcx),%rax     
+	mulq  200(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%r12         
+	adcq %rdi,%r9         
+	adcq %rsi,%rbx         
+	addq %r8,%r12         
+	adcq %rdi,%r9         
+	adcq %rsi,%rbx         
+	
+/NO_APP
+	movq	%r12, -152(%rbp)
+/APP
+	movq  152(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  160(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  168(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  176(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  184(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  192(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r10, %rdx
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r9         
+	adcq %r13,%rbx         
+	adcq %r12,%rdx         
+	addq %r8,%r9         
+	adcq %r13,%rbx         
+	adcq %r12,%rdx         
+	
+/NO_APP
+	movq	%rdx, %rax
+	movq	%r13, %rdi
+	movq	%r12, %rsi
+	movq	%rax, %r11
+	movq	%r10, %r12
+/APP
+	movq  200(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r9, -144(%rbp)
+	movq	%r11, %r9
+/APP
+	movq  160(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  168(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  176(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  184(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  192(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  200(%rcx),%rax     
+	mulq  208(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%r12         
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%r12         
+	
+/NO_APP
+	movq	%rbx, -136(%rbp)
+	movq	%r12, %r11
+/APP
+	movq  168(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  176(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  184(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  192(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  200(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r10, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%r11, %rbx
+	movq	%r13, %rdi
+	movq	%rdx, %r11
+	movq	%r12, %rsi
+/APP
+	movq  208(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r9, -128(%rbp)
+	movq	%r11, %r9
+/APP
+	movq  176(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  184(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  192(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  200(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  208(%rcx),%rax     
+	mulq  216(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r10, %rdx
+/APP
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%rdx         
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%rdx         
+	
+/NO_APP
+	movq	%rbx, -120(%rbp)
+	movq	%rdx, %r11
+	movq	%r10, %rbx
+/APP
+	movq  184(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  192(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  200(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  208(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rbx         
+	
+/NO_APP
+	movq	%rbx, %rdx
+	movq	%r13, %rdi
+	movq	%r11, %rbx
+	movq	%r12, %rsi
+	movq	%rdx, %r11
+	movq	%r10, %r12
+/APP
+	movq  216(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r9, -112(%rbp)
+	movq	%r11, %r9
+/APP
+	movq  192(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  200(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  208(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  216(%rcx),%rax     
+	mulq  224(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%r12         
+	addq %r8,%rbx         
+	adcq %rdi,%r9         
+	adcq %rsi,%r12         
+	
+/NO_APP
+	movq	%rbx, -104(%rbp)
+	movq	%r12, %r11
+/APP
+	movq  200(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  208(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  216(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%r10, %rax
+	movq	%rdi, %r13
+	movq	%rsi, %r12
+/APP
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	addq %r8,%r9         
+	adcq %r13,%r11         
+	adcq %r12,%rax         
+	
+/NO_APP
+	movq	%rax, %rdx
+	movq	%r11, %rbx
+	movq	%r13, %rdi
+	movq	%rdx, %r11
+	movq	%r12, %rsi
+	movq	%r10, %r12
+/APP
+	movq  224(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r9     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r9, -96(%rbp)
+	movq	%r10, %r9
+/APP
+	movq  208(%rcx),%rax     
+	mulq  248(%rcx)           
+	movq  %rax,%r8     
+	movq  %rdx,%rdi     
+	xorq  %rsi,%rsi        
+	
+	movq  216(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+	movq  224(%rcx),%rax     
+	mulq  232(%rcx)           
+	addq  %rax,%r8     
+	adcq  %rdx,%rdi     
+	adcq  $0,%rsi        
+	
+/NO_APP
+	movq	%rdi, %r13
+	movq	%rsi, %rax
+/APP
+	addq %r8,%rbx         
+	adcq %r13,%r11         
+	adcq %rax,%r9         
+	addq %r8,%rbx         
+	adcq %r13,%r11         
+	adcq %rax,%r9         
+	
+/NO_APP
+	movq	%rbx, -88(%rbp)
+	movq	%r11, %rsi
+	movq	%r9, %r8
+/APP
+	movq  216(%rcx),%rax     
+	mulq  248(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%r12, %r11
+/APP
+	movq  224(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r11        
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r11        
+	
+/NO_APP
+	movq	%r8, %r13
+	movq	%r11, %rbx
+/APP
+	movq  232(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rsi     
+	adcq  %rdx,%r13     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%rsi, -80(%rbp)
+	movq	%rbx, %r12
+	movq	%r13, %rdi
+	movq	%r10, %r13
+/APP
+	movq  224(%rcx),%rax     
+	mulq  248(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	addq  %rax,%rdi     
+	adcq  %rdx,%r12     
+	adcq  $0,%r13        
+	
+/NO_APP
+	movq	%r12, %r9
+	movq	%r13, %r12
+/APP
+	movq  232(%rcx),%rax     
+	mulq  240(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%r9     
+	adcq  $0,%r12        
+	addq  %rax,%rdi     
+	adcq  %rdx,%r9     
+	adcq  $0,%r12        
+	
+/NO_APP
+	movq	%rdi, -72(%rbp)
+	movq	%r9, %r11
+	movq	%r12, %rbx
+	movq	%r10, %r9
+/APP
+	movq  232(%rcx),%rax     
+	mulq  248(%rcx)           
+	addq  %rax,%r11     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r9        
+	addq  %rax,%r11     
+	adcq  %rdx,%rbx     
+	adcq  $0,%r9        
+	
+/NO_APP
+	movq	%rbx, %r13
+	movq	%r9, %rbx
+	movq	%r10, %r9
+/APP
+	movq  240(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%r11     
+	adcq  %rdx,%r13     
+	adcq  $0,%rbx        
+	
+/NO_APP
+	movq	%r11, -64(%rbp)
+	movq	%r13, %rdi
+	movq	%rbx, %rsi
+/APP
+	movq  240(%rcx),%rax     
+	mulq  248(%rcx)           
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r9        
+	addq  %rax,%rdi     
+	adcq  %rdx,%rsi     
+	adcq  $0,%r9        
+	
+/NO_APP
+	movq	%rdi, -56(%rbp)
+	movq	%r9, %r8
+/APP
+	movq  248(%rcx),%rax     
+	mulq  %rax        
+	addq  %rax,%rsi     
+	adcq  %rdx,%r8     
+	adcq  $0,%r10        
+	
+/NO_APP
+	movq	%rsi, -48(%rbp)
+	movq	16(%r14), %rdi
+	leaq	-544(%rbp), %rsi
+	movl	$512, %edx
+	movq	%r8, -40(%rbp)
+	movl	$64, 8(%r14)
+	movl	$0, (%r14)
+	call	memcpy@PLT
+	movl	8(%r14), %edx
+	testl	%edx, %edx
+	je	.L304
+	leal	-1(%rdx), %ecx
+	movq	16(%r14), %rsi
+	mov	%ecx, %r10d
+	cmpq	$0, (%rsi,%r10,8)
+	jne	.L302
+	movl	%ecx, %edx
+	.align 16
+.L303:
+	testl	%edx, %edx
+	movl	%edx, %ecx
+	je	.L307
+	decl	%edx
+	mov	%edx, %eax
+	cmpq	$0, (%rsi,%rax,8)
+	je	.L303
+	movl	%ecx, 8(%r14)
+	movl	%ecx, %edx
+.L302:
+	testl	%edx, %edx
+	je	.L304
+	movl	(%r14), %eax
+	movl	%eax, (%r14)
+	addq	$512, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	leave
+	ret
+.L307:
+	movl	%edx, 8(%r14)
+	.align 16
+.L304:
+	xorl	%eax, %eax
+	movl	%eax, (%r14)
+	addq	$512, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	leave
+	ret
+.LFE9:
+	.size	s_mp_sqr_comba_32, .-s_mp_sqr_comba_32
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m-priv.h b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h
new file mode 100644
index 000000000..5be4da4bf
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h
@@ -0,0 +1,73 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _MP_GF2M_PRIV_H_
+#define _MP_GF2M_PRIV_H_
+
+#include "mpi-priv.h"
+
+extern const mp_digit mp_gf2m_sqr_tb[16];
+
+#if defined(MP_USE_UINT_DIGIT)
+#define MP_DIGIT_BITS 32
+/* enable fast divide and mod operations on MP_DIGIT_BITS */
+#define MP_DIGIT_BITS_LOG_2 5
+#define MP_DIGIT_BITS_MASK 0x1f
+#else
+#define MP_DIGIT_BITS 64
+/* enable fast divide and mod operations on MP_DIGIT_BITS */
+#define MP_DIGIT_BITS_LOG_2 6
+#define MP_DIGIT_BITS_MASK 0x3f
+#endif
+
+/* Platform-specific macros for fast binary polynomial squaring. */
+#if MP_DIGIT_BITS == 32
+#define gf2m_SQR1(w)                                                                \
+    mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \
+        mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF]
+#define gf2m_SQR0(w)                                                               \
+    mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
+        mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF]
+#else
+#define gf2m_SQR1(w)                                                                    \
+    mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 |     \
+        mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \
+        mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \
+        mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF]
+#define gf2m_SQR0(w)                                                                    \
+    mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 |     \
+        mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \
+        mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 |  \
+        mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF]
+#endif
+
+/* Multiply two binary polynomials mp_digits a, b.
+ * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
+ * Output in two mp_digits rh, rl.
+ */
+void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b);
+
+/* Compute xor-multiply of two binary polynomials  (a1, a0) x (b1, b0)
+ * result is a binary polynomial in 4 mp_digits r[4].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
+                const mp_digit b0);
+
+/* Compute xor-multiply of two binary polynomials  (a2, a1, a0) x (b2, b1, b0)
+ * result is a binary polynomial in 6 mp_digits r[6].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
+                const mp_digit b2, const mp_digit b1, const mp_digit b0);
+
+/* Compute xor-multiply of two binary polynomials  (a3, a2, a1, a0) x (b3, b2, b1, b0)
+ * result is a binary polynomial in 8 mp_digits r[8].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
+                const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
+                const mp_digit b0);
+
+#endif /* _MP_GF2M_PRIV_H_ */
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.c b/security/nss/lib/freebl/mpi/mp_gf2m.c
new file mode 100644
index 000000000..5a096adde
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m.c
@@ -0,0 +1,678 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mp_gf2m.h"
+#include "mp_gf2m-priv.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+const mp_digit mp_gf2m_sqr_tb[16] =
+    {
+      0, 1, 4, 5, 16, 17, 20, 21,
+      64, 65, 68, 69, 80, 81, 84, 85
+    };
+
+/* Multiply two binary polynomials mp_digits a, b.
+ * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
+ * Output in two mp_digits rh, rl.
+ */
+#if MP_DIGIT_BITS == 32
+void
+s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
+{
+    register mp_digit h, l, s;
+    mp_digit tab[8], top2b = a >> 30;
+    register mp_digit a1, a2, a4;
+
+    a1 = a & (0x3FFFFFFF);
+    a2 = a1 << 1;
+    a4 = a2 << 1;
+
+    tab[0] = 0;
+    tab[1] = a1;
+    tab[2] = a2;
+    tab[3] = a1 ^ a2;
+    tab[4] = a4;
+    tab[5] = a1 ^ a4;
+    tab[6] = a2 ^ a4;
+    tab[7] = a1 ^ a2 ^ a4;
+
+    s = tab[b & 0x7];
+    l = s;
+    s = tab[b >> 3 & 0x7];
+    l ^= s << 3;
+    h = s >> 29;
+    s = tab[b >> 6 & 0x7];
+    l ^= s << 6;
+    h ^= s >> 26;
+    s = tab[b >> 9 & 0x7];
+    l ^= s << 9;
+    h ^= s >> 23;
+    s = tab[b >> 12 & 0x7];
+    l ^= s << 12;
+    h ^= s >> 20;
+    s = tab[b >> 15 & 0x7];
+    l ^= s << 15;
+    h ^= s >> 17;
+    s = tab[b >> 18 & 0x7];
+    l ^= s << 18;
+    h ^= s >> 14;
+    s = tab[b >> 21 & 0x7];
+    l ^= s << 21;
+    h ^= s >> 11;
+    s = tab[b >> 24 & 0x7];
+    l ^= s << 24;
+    h ^= s >> 8;
+    s = tab[b >> 27 & 0x7];
+    l ^= s << 27;
+    h ^= s >> 5;
+    s = tab[b >> 30];
+    l ^= s << 30;
+    h ^= s >> 2;
+
+    /* compensate for the top two bits of a */
+
+    if (top2b & 01) {
+        l ^= b << 30;
+        h ^= b >> 2;
+    }
+    if (top2b & 02) {
+        l ^= b << 31;
+        h ^= b >> 1;
+    }
+
+    *rh = h;
+    *rl = l;
+}
+#else
+void
+s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
+{
+    register mp_digit h, l, s;
+    mp_digit tab[16], top3b = a >> 61;
+    register mp_digit a1, a2, a4, a8;
+
+    a1 = a & (0x1FFFFFFFFFFFFFFFULL);
+    a2 = a1 << 1;
+    a4 = a2 << 1;
+    a8 = a4 << 1;
+    tab[0] = 0;
+    tab[1] = a1;
+    tab[2] = a2;
+    tab[3] = a1 ^ a2;
+    tab[4] = a4;
+    tab[5] = a1 ^ a4;
+    tab[6] = a2 ^ a4;
+    tab[7] = a1 ^ a2 ^ a4;
+    tab[8] = a8;
+    tab[9] = a1 ^ a8;
+    tab[10] = a2 ^ a8;
+    tab[11] = a1 ^ a2 ^ a8;
+    tab[12] = a4 ^ a8;
+    tab[13] = a1 ^ a4 ^ a8;
+    tab[14] = a2 ^ a4 ^ a8;
+    tab[15] = a1 ^ a2 ^ a4 ^ a8;
+
+    s = tab[b & 0xF];
+    l = s;
+    s = tab[b >> 4 & 0xF];
+    l ^= s << 4;
+    h = s >> 60;
+    s = tab[b >> 8 & 0xF];
+    l ^= s << 8;
+    h ^= s >> 56;
+    s = tab[b >> 12 & 0xF];
+    l ^= s << 12;
+    h ^= s >> 52;
+    s = tab[b >> 16 & 0xF];
+    l ^= s << 16;
+    h ^= s >> 48;
+    s = tab[b >> 20 & 0xF];
+    l ^= s << 20;
+    h ^= s >> 44;
+    s = tab[b >> 24 & 0xF];
+    l ^= s << 24;
+    h ^= s >> 40;
+    s = tab[b >> 28 & 0xF];
+    l ^= s << 28;
+    h ^= s >> 36;
+    s = tab[b >> 32 & 0xF];
+    l ^= s << 32;
+    h ^= s >> 32;
+    s = tab[b >> 36 & 0xF];
+    l ^= s << 36;
+    h ^= s >> 28;
+    s = tab[b >> 40 & 0xF];
+    l ^= s << 40;
+    h ^= s >> 24;
+    s = tab[b >> 44 & 0xF];
+    l ^= s << 44;
+    h ^= s >> 20;
+    s = tab[b >> 48 & 0xF];
+    l ^= s << 48;
+    h ^= s >> 16;
+    s = tab[b >> 52 & 0xF];
+    l ^= s << 52;
+    h ^= s >> 12;
+    s = tab[b >> 56 & 0xF];
+    l ^= s << 56;
+    h ^= s >> 8;
+    s = tab[b >> 60];
+    l ^= s << 60;
+    h ^= s >> 4;
+
+    /* compensate for the top three bits of a */
+
+    if (top3b & 01) {
+        l ^= b << 61;
+        h ^= b >> 3;
+    }
+    if (top3b & 02) {
+        l ^= b << 62;
+        h ^= b >> 2;
+    }
+    if (top3b & 04) {
+        l ^= b << 63;
+        h ^= b >> 1;
+    }
+
+    *rh = h;
+    *rl = l;
+}
+#endif
+
+/* Compute xor-multiply of two binary polynomials  (a1, a0) x (b1, b0)
+ * result is a binary polynomial in 4 mp_digits r[4].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
+           const mp_digit b0)
+{
+    mp_digit m1, m0;
+    /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
+    s_bmul_1x1(r + 3, r + 2, a1, b1);
+    s_bmul_1x1(r + 1, r, a0, b0);
+    s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
+    /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
+    r[2] ^= m1 ^ r[1] ^ r[3];            /* h0 ^= m1 ^ l1 ^ h1; */
+    r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
+}
+
+/* Compute xor-multiply of two binary polynomials  (a2, a1, a0) x (b2, b1, b0)
+ * result is a binary polynomial in 6 mp_digits r[6].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
+           const mp_digit b2, const mp_digit b1, const mp_digit b0)
+{
+    mp_digit zm[4];
+
+    s_bmul_1x1(r + 5, r + 4, a2, b2);         /* fill top 2 words */
+    s_bmul_2x2(zm, a1, a2 ^ a0, b1, b2 ^ b0); /* fill middle 4 words */
+    s_bmul_2x2(r, a1, a0, b1, b0);            /* fill bottom 4 words */
+
+    zm[3] ^= r[3];
+    zm[2] ^= r[2];
+    zm[1] ^= r[1] ^ r[5];
+    zm[0] ^= r[0] ^ r[4];
+
+    r[5] ^= zm[3];
+    r[4] ^= zm[2];
+    r[3] ^= zm[1];
+    r[2] ^= zm[0];
+}
+
+/* Compute xor-multiply of two binary polynomials  (a3, a2, a1, a0) x (b3, b2, b1, b0)
+ * result is a binary polynomial in 8 mp_digits r[8].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
+           const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
+           const mp_digit b0)
+{
+    mp_digit zm[4];
+
+    s_bmul_2x2(r + 4, a3, a2, b3, b2);                  /* fill top 4 words */
+    s_bmul_2x2(zm, a3 ^ a1, a2 ^ a0, b3 ^ b1, b2 ^ b0); /* fill middle 4 words */
+    s_bmul_2x2(r, a1, a0, b1, b0);                      /* fill bottom 4 words */
+
+    zm[3] ^= r[3] ^ r[7];
+    zm[2] ^= r[2] ^ r[6];
+    zm[1] ^= r[1] ^ r[5];
+    zm[0] ^= r[0] ^ r[4];
+
+    r[5] ^= zm[3];
+    r[4] ^= zm[2];
+    r[3] ^= zm[1];
+    r[2] ^= zm[0];
+}
+
+/* Compute addition of two binary polynomials a and b,
+ * store result in c; c could be a or b, a and b could be equal;
+ * c is the bitwise XOR of a and b.
+ */
+mp_err
+mp_badd(const mp_int *a, const mp_int *b, mp_int *c)
+{
+    mp_digit *pa, *pb, *pc;
+    mp_size ix;
+    mp_size used_pa, used_pb;
+    mp_err res = MP_OKAY;
+
+    /* Add all digits up to the precision of b.  If b had more
+     * precision than a initially, swap a, b first
+     */
+    if (MP_USED(a) >= MP_USED(b)) {
+        pa = MP_DIGITS(a);
+        pb = MP_DIGITS(b);
+        used_pa = MP_USED(a);
+        used_pb = MP_USED(b);
+    } else {
+        pa = MP_DIGITS(b);
+        pb = MP_DIGITS(a);
+        used_pa = MP_USED(b);
+        used_pb = MP_USED(a);
+    }
+
+    /* Make sure c has enough precision for the output value */
+    MP_CHECKOK(s_mp_pad(c, used_pa));
+
+    /* Do word-by-word xor */
+    pc = MP_DIGITS(c);
+    for (ix = 0; ix < used_pb; ix++) {
+        (*pc++) = (*pa++) ^ (*pb++);
+    }
+
+    /* Finish the rest of digits until we're actually done */
+    for (; ix < used_pa; ++ix) {
+        *pc++ = *pa++;
+    }
+
+    MP_USED(c) = used_pa;
+    MP_SIGN(c) = ZPOS;
+    s_mp_clamp(c);
+
+CLEANUP:
+    return res;
+}
+
+#define s_mp_div2(a) MP_CHECKOK(mpl_rsh((a), (a), 1));
+
+/* Compute binary polynomial multiply d = a * b */
+static void
+s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
+{
+    mp_digit a_i, a0b0, a1b1, carry = 0;
+    while (a_len--) {
+        a_i = *a++;
+        s_bmul_1x1(&a1b1, &a0b0, a_i, b);
+        *d++ = a0b0 ^ carry;
+        carry = a1b1;
+    }
+    *d = carry;
+}
+
+/* Compute binary polynomial xor multiply accumulate d ^= a * b */
+static void
+s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
+{
+    mp_digit a_i, a0b0, a1b1, carry = 0;
+    while (a_len--) {
+        a_i = *a++;
+        s_bmul_1x1(&a1b1, &a0b0, a_i, b);
+        *d++ ^= a0b0 ^ carry;
+        carry = a1b1;
+    }
+    *d ^= carry;
+}
+
+/* Compute binary polynomial xor multiply c = a * b.
+ * All parameters may be identical.
+ */
+mp_err
+mp_bmul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+    mp_digit *pb, b_i;
+    mp_int tmp;
+    mp_size ib, a_used, b_used;
+    mp_err res = MP_OKAY;
+
+    MP_DIGITS(&tmp) = 0;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (a == c) {
+        MP_CHECKOK(mp_init_copy(&tmp, a));
+        if (a == b)
+            b = &tmp;
+        a = &tmp;
+    } else if (b == c) {
+        MP_CHECKOK(mp_init_copy(&tmp, b));
+        b = &tmp;
+    }
+
+    if (MP_USED(a) < MP_USED(b)) {
+        const mp_int *xch = b; /* switch a and b if b longer */
+        b = a;
+        a = xch;
+    }
+
+    MP_USED(c) = 1;
+    MP_DIGIT(c, 0) = 0;
+    MP_CHECKOK(s_mp_pad(c, USED(a) + USED(b)));
+
+    pb = MP_DIGITS(b);
+    s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
+
+    /* Outer loop:  Digits of b */
+    a_used = MP_USED(a);
+    b_used = MP_USED(b);
+    MP_USED(c) = a_used + b_used;
+    for (ib = 1; ib < b_used; ib++) {
+        b_i = *pb++;
+
+        /* Inner product:  Digits of a */
+        if (b_i)
+            s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib);
+        else
+            MP_DIGIT(c, ib + a_used) = b_i;
+    }
+
+    s_mp_clamp(c);
+
+    SIGN(c) = ZPOS;
+
+CLEANUP:
+    mp_clear(&tmp);
+    return res;
+}
+
+/* Compute modular reduction of a and store result in r.
+ * r could be a.
+ * For modular arithmetic, the irreducible polynomial f(t) is represented
+ * as an array of int[], where f(t) is of the form:
+ *     f(t) = t^p[0] + t^p[1] + ... + t^p[k]
+ * where m = p[0] > p[1] > ... > p[k] = 0.
+ */
+mp_err
+mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r)
+{
+    int j, k;
+    int n, dN, d0, d1;
+    mp_digit zz, *z, tmp;
+    mp_size used;
+    mp_err res = MP_OKAY;
+
+    /* The algorithm does the reduction in place in r,
+     * if a != r, copy a into r first so reduction can be done in r
+     */
+    if (a != r) {
+        MP_CHECKOK(mp_copy(a, r));
+    }
+    z = MP_DIGITS(r);
+
+    /* start reduction */
+    /*dN = p[0] / MP_DIGIT_BITS; */
+    dN = p[0] >> MP_DIGIT_BITS_LOG_2;
+    used = MP_USED(r);
+
+    for (j = used - 1; j > dN;) {
+
+        zz = z[j];
+        if (zz == 0) {
+            j--;
+            continue;
+        }
+        z[j] = 0;
+
+        for (k = 1; p[k] > 0; k++) {
+            /* reducing component t^p[k] */
+            n = p[0] - p[k];
+            /*d0 = n % MP_DIGIT_BITS;   */
+            d0 = n & MP_DIGIT_BITS_MASK;
+            d1 = MP_DIGIT_BITS - d0;
+            /*n /= MP_DIGIT_BITS; */
+            n >>= MP_DIGIT_BITS_LOG_2;
+            z[j - n] ^= (zz >> d0);
+            if (d0)
+                z[j - n - 1] ^= (zz << d1);
+        }
+
+        /* reducing component t^0 */
+        n = dN;
+        /*d0 = p[0] % MP_DIGIT_BITS;*/
+        d0 = p[0] & MP_DIGIT_BITS_MASK;
+        d1 = MP_DIGIT_BITS - d0;
+        z[j - n] ^= (zz >> d0);
+        if (d0)
+            z[j - n - 1] ^= (zz << d1);
+    }
+
+    /* final round of reduction */
+    while (j == dN) {
+
+        /* d0 = p[0] % MP_DIGIT_BITS; */
+        d0 = p[0] & MP_DIGIT_BITS_MASK;
+        zz = z[dN] >> d0;
+        if (zz == 0)
+            break;
+        d1 = MP_DIGIT_BITS - d0;
+
+        /* clear up the top d1 bits */
+        if (d0) {
+            z[dN] = (z[dN] << d1) >> d1;
+        } else {
+            z[dN] = 0;
+        }
+        *z ^= zz; /* reduction t^0 component */
+
+        for (k = 1; p[k] > 0; k++) {
+            /* reducing component t^p[k]*/
+            /* n = p[k] / MP_DIGIT_BITS; */
+            n = p[k] >> MP_DIGIT_BITS_LOG_2;
+            /* d0 = p[k] % MP_DIGIT_BITS; */
+            d0 = p[k] & MP_DIGIT_BITS_MASK;
+            d1 = MP_DIGIT_BITS - d0;
+            z[n] ^= (zz << d0);
+            tmp = zz >> d1;
+            if (d0 && tmp)
+                z[n + 1] ^= tmp;
+        }
+    }
+
+    s_mp_clamp(r);
+CLEANUP:
+    return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p,
+ * Store the result in r.  r could be a or b; a could be b.
+ */
+mp_err
+mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r)
+{
+    mp_err res;
+
+    if (a == b)
+        return mp_bsqrmod(a, p, r);
+    if ((res = mp_bmul(a, b, r)) != MP_OKAY)
+        return res;
+    return mp_bmod(r, p, r);
+}
+
+/* Compute binary polynomial squaring c = a*a mod p .
+ * Parameter r and a can be identical.
+ */
+
+mp_err
+mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r)
+{
+    mp_digit *pa, *pr, a_i;
+    mp_int tmp;
+    mp_size ia, a_used;
+    mp_err res;
+
+    ARGCHK(a != NULL && r != NULL, MP_BADARG);
+    MP_DIGITS(&tmp) = 0;
+
+    if (a == r) {
+        MP_CHECKOK(mp_init_copy(&tmp, a));
+        a = &tmp;
+    }
+
+    MP_USED(r) = 1;
+    MP_DIGIT(r, 0) = 0;
+    MP_CHECKOK(s_mp_pad(r, 2 * USED(a)));
+
+    pa = MP_DIGITS(a);
+    pr = MP_DIGITS(r);
+    a_used = MP_USED(a);
+    MP_USED(r) = 2 * a_used;
+
+    for (ia = 0; ia < a_used; ia++) {
+        a_i = *pa++;
+        *pr++ = gf2m_SQR0(a_i);
+        *pr++ = gf2m_SQR1(a_i);
+    }
+
+    MP_CHECKOK(mp_bmod(r, p, r));
+    s_mp_clamp(r);
+    SIGN(r) = ZPOS;
+
+CLEANUP:
+    mp_clear(&tmp);
+    return res;
+}
+
+/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p.
+ * Store the result in r. r could be x or y, and x could equal y.
+ * Uses algorithm Modular_Division_GF(2^m) from
+ *     Chang-Shantz, S.  "From Euclid's GCD to Montgomery Multiplication to
+ *     the Great Divide".
+ */
+int
+mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
+           const unsigned int p[], mp_int *r)
+{
+    mp_int aa, bb, uu;
+    mp_int *a, *b, *u, *v;
+    mp_err res = MP_OKAY;
+
+    MP_DIGITS(&aa) = 0;
+    MP_DIGITS(&bb) = 0;
+    MP_DIGITS(&uu) = 0;
+
+    MP_CHECKOK(mp_init_copy(&aa, x));
+    MP_CHECKOK(mp_init_copy(&uu, y));
+    MP_CHECKOK(mp_init_copy(&bb, pp));
+    MP_CHECKOK(s_mp_pad(r, USED(pp)));
+    MP_USED(r) = 1;
+    MP_DIGIT(r, 0) = 0;
+
+    a = &aa;
+    b = &bb;
+    u = &uu;
+    v = r;
+    /* reduce x and y mod p */
+    MP_CHECKOK(mp_bmod(a, p, a));
+    MP_CHECKOK(mp_bmod(u, p, u));
+
+    while (!mp_isodd(a)) {
+        s_mp_div2(a);
+        if (mp_isodd(u)) {
+            MP_CHECKOK(mp_badd(u, pp, u));
+        }
+        s_mp_div2(u);
+    }
+
+    do {
+        if (mp_cmp_mag(b, a) > 0) {
+            MP_CHECKOK(mp_badd(b, a, b));
+            MP_CHECKOK(mp_badd(v, u, v));
+            do {
+                s_mp_div2(b);
+                if (mp_isodd(v)) {
+                    MP_CHECKOK(mp_badd(v, pp, v));
+                }
+                s_mp_div2(v);
+            } while (!mp_isodd(b));
+        } else if ((MP_DIGIT(a, 0) == 1) && (MP_USED(a) == 1))
+            break;
+        else {
+            MP_CHECKOK(mp_badd(a, b, a));
+            MP_CHECKOK(mp_badd(u, v, u));
+            do {
+                s_mp_div2(a);
+                if (mp_isodd(u)) {
+                    MP_CHECKOK(mp_badd(u, pp, u));
+                }
+                s_mp_div2(u);
+            } while (!mp_isodd(a));
+        }
+    } while (1);
+
+    MP_CHECKOK(mp_copy(u, r));
+
+CLEANUP:
+    mp_clear(&aa);
+    mp_clear(&bb);
+    mp_clear(&uu);
+    return res;
+}
+
+/* Convert the bit-string representation of a polynomial a into an array
+ * of integers corresponding to the bits with non-zero coefficient.
+ * Up to max elements of the array will be filled.  Return value is total
+ * number of coefficients that would be extracted if array was large enough.
+ */
+int
+mp_bpoly2arr(const mp_int *a, unsigned int p[], int max)
+{
+    int i, j, k;
+    mp_digit top_bit, mask;
+
+    top_bit = 1;
+    top_bit <<= MP_DIGIT_BIT - 1;
+
+    for (k = 0; k < max; k++)
+        p[k] = 0;
+    k = 0;
+
+    for (i = MP_USED(a) - 1; i >= 0; i--) {
+        mask = top_bit;
+        for (j = MP_DIGIT_BIT - 1; j >= 0; j--) {
+            if (MP_DIGITS(a)[i] & mask) {
+                if (k < max)
+                    p[k] = MP_DIGIT_BIT * i + j;
+                k++;
+            }
+            mask >>= 1;
+        }
+    }
+
+    return k;
+}
+
+/* Convert the coefficient array representation of a polynomial to a
+ * bit-string.  The array must be terminated by 0.
+ */
+mp_err
+mp_barr2poly(const unsigned int p[], mp_int *a)
+{
+
+    mp_err res = MP_OKAY;
+    int i;
+
+    mp_zero(a);
+    for (i = 0; p[i] > 0; i++) {
+        MP_CHECKOK(mpl_set_bit(a, p[i], 1));
+    }
+    MP_CHECKOK(mpl_set_bit(a, 0, 1));
+
+CLEANUP:
+    return res;
+}
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.h b/security/nss/lib/freebl/mpi/mp_gf2m.h
new file mode 100644
index 000000000..ed2c85493
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m.h
@@ -0,0 +1,28 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _MP_GF2M_H_
+#define _MP_GF2M_H_
+
+#include "mpi.h"
+
+mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c);
+
+/* For modular arithmetic, the irreducible polynomial f(t) is represented
+ * as an array of int[], where f(t) is of the form:
+ *     f(t) = t^p[0] + t^p[1] + ... + t^p[k]
+ * where m = p[0] > p[1] > ... > p[k] = 0.
+ */
+mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r);
+mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[],
+                  mp_int *r);
+mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r);
+mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
+                  const unsigned int p[], mp_int *r);
+
+int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max);
+mp_err mp_barr2poly(const unsigned int p[], mp_int *a);
+
+#endif /* _MP_GF2M_H_ */
diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c
new file mode 100644
index 000000000..6fed35239
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache.c
@@ -0,0 +1,808 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "prtypes.h"
+
+/*
+ * This file implements a single function: s_mpi_getProcessorLineSize();
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * Currently the file returns good data for most modern x86 processors, and
+ * reasonable data on 64-bit ppc processors. All other processors are assumed
+ * to have a cache line size of 32 bytes unless modified by target.mk.
+ *
+ */
+
+#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+/* X86 processors have special instructions that tell us about the cache */
+#include "string.h"
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+#define AMD_64 1
+#endif
+
+/* Generic CPUID function */
+#if defined(AMD_64)
+
+#if defined(__GNUC__)
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+             unsigned long *ebx, unsigned long *ecx,
+             unsigned long *edx)
+{
+    __asm__("cpuid\n\t"
+            : "=a"(*eax),
+              "=b"(*ebx),
+              "=c"(*ecx),
+              "=d"(*edx)
+            : "0"(op));
+}
+
+#elif defined(_MSC_VER)
+
+#include <intrin.h>
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+             unsigned long *ebx, unsigned long *ecx,
+             unsigned long *edx)
+{
+    int intrinsic_out[4];
+
+    __cpuid(intrinsic_out, op);
+    *eax = intrinsic_out[0];
+    *ebx = intrinsic_out[1];
+    *ecx = intrinsic_out[2];
+    *edx = intrinsic_out[3];
+}
+
+#endif
+
+#else /* !defined(AMD_64) */
+
+/* x86 */
+
+#if defined(__GNUC__)
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+             unsigned long *ebx, unsigned long *ecx,
+             unsigned long *edx)
+{
+    /* Some older processors don't fill the ecx register with cpuid, so clobber it
+     * before calling cpuid, so that there's no risk of picking random bits that
+     * erroneously indicate that absent CPU features are present.
+     * Also, GCC isn't smart enough to save the ebx PIC register on its own
+     * in this case, so do it by hand. Use edi to store ebx and pass the
+     * value returned in ebx from cpuid through edi. */
+    __asm__("xor %%ecx, %%ecx\n\t"
+            "mov %%ebx,%%edi\n\t"
+            "cpuid\n\t"
+            "xchgl %%ebx,%%edi\n\t"
+            : "=a"(*eax),
+              "=D"(*ebx),
+              "=c"(*ecx),
+              "=d"(*edx)
+            : "0"(op));
+}
+
+/*
+ * try flipping a processor flag to determine CPU type
+ */
+static unsigned long
+changeFlag(unsigned long flag)
+{
+    unsigned long changedFlags, originalFlags;
+    __asm__("pushfl\n\t" /* get the flags */
+            "popl %0\n\t"
+            "movl %0,%1\n\t" /* save the original flags */
+            "xorl %2,%0\n\t" /* flip the bit */
+            "pushl %0\n\t"   /* set the flags */
+            "popfl\n\t"
+            "pushfl\n\t" /* get the flags again (for return) */
+            "popl %0\n\t"
+            "pushl %1\n\t" /* restore the original flags */
+            "popfl\n\t"
+            : "=r"(changedFlags),
+              "=r"(originalFlags),
+              "=r"(flag)
+            : "2"(flag));
+    return changedFlags ^ originalFlags;
+}
+
+#elif defined(_MSC_VER)
+
+/*
+ * windows versions of the above assembler
+ */
+#define wcpuid __asm __emit 0fh __asm __emit 0a2h
+void
+freebl_cpuid(unsigned long op, unsigned long *Reax,
+             unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
+{
+    unsigned long Leax, Lebx, Lecx, Ledx;
+    __asm {
+        pushad
+        xor     ecx,ecx
+        mov     eax,op
+        wcpuid
+        mov     Leax,eax
+        mov     Lebx,ebx
+        mov     Lecx,ecx
+        mov     Ledx,edx
+        popad
+    }
+    *Reax = Leax;
+    *Rebx = Lebx;
+    *Recx = Lecx;
+    *Redx = Ledx;
+}
+
+static unsigned long
+changeFlag(unsigned long flag)
+{
+    unsigned long changedFlags, originalFlags;
+    __asm {
+        push eax
+        push ebx
+        pushfd /* get the flags */
+            pop  eax
+        push eax /* save the flags on the stack */
+            mov  originalFlags,eax /* save the original flags */
+        mov  ebx,flag
+            xor  eax,ebx /* flip the bit */
+        push eax /* set the flags */
+            popfd
+        pushfd /* get the flags again (for return) */
+        pop  eax
+        popfd /* restore the original flags */
+        mov changedFlags,eax
+        pop ebx
+        pop eax
+    }
+    return changedFlags ^ originalFlags;
+}
+#endif
+
+#endif
+
+#if !defined(AMD_64)
+#define AC_FLAG 0x40000
+#define ID_FLAG 0x200000
+
+/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
+static int
+is386()
+{
+    return changeFlag(AC_FLAG) == 0;
+}
+
+/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
+static int
+is486()
+{
+    return changeFlag(ID_FLAG) == 0;
+}
+#endif
+
+/*
+ * table for Intel Cache.
+ * See Intel Application Note AP-485 for more information
+ */
+
+typedef unsigned char CacheTypeEntry;
+
+typedef enum {
+    Cache_NONE = 0,
+    Cache_UNKNOWN = 1,
+    Cache_TLB = 2,
+    Cache_TLBi = 3,
+    Cache_TLBd = 4,
+    Cache_Trace = 5,
+    Cache_L1 = 6,
+    Cache_L1i = 7,
+    Cache_L1d = 8,
+    Cache_L2 = 9,
+    Cache_L2i = 10,
+    Cache_L2d = 11,
+    Cache_L3 = 12,
+    Cache_L3i = 13,
+    Cache_L3d = 14
+} CacheType;
+
+struct _cache {
+    CacheTypeEntry type;
+    unsigned char lineSize;
+};
+static const struct _cache CacheMap[256] = {
+    /* 00 */ { Cache_NONE, 0 },
+    /* 01 */ { Cache_TLBi, 0 },
+    /* 02 */ { Cache_TLBi, 0 },
+    /* 03 */ { Cache_TLBd, 0 },
+    /* 04 */ {
+        Cache_TLBd,
+    },
+    /* 05 */ { Cache_UNKNOWN, 0 },
+    /* 06 */ { Cache_L1i, 32 },
+    /* 07 */ { Cache_UNKNOWN, 0 },
+    /* 08 */ { Cache_L1i, 32 },
+    /* 09 */ { Cache_UNKNOWN, 0 },
+    /* 0a */ { Cache_L1d, 32 },
+    /* 0b */ { Cache_UNKNOWN, 0 },
+    /* 0c */ { Cache_L1d, 32 },
+    /* 0d */ { Cache_UNKNOWN, 0 },
+    /* 0e */ { Cache_UNKNOWN, 0 },
+    /* 0f */ { Cache_UNKNOWN, 0 },
+    /* 10 */ { Cache_UNKNOWN, 0 },
+    /* 11 */ { Cache_UNKNOWN, 0 },
+    /* 12 */ { Cache_UNKNOWN, 0 },
+    /* 13 */ { Cache_UNKNOWN, 0 },
+    /* 14 */ { Cache_UNKNOWN, 0 },
+    /* 15 */ { Cache_UNKNOWN, 0 },
+    /* 16 */ { Cache_UNKNOWN, 0 },
+    /* 17 */ { Cache_UNKNOWN, 0 },
+    /* 18 */ { Cache_UNKNOWN, 0 },
+    /* 19 */ { Cache_UNKNOWN, 0 },
+    /* 1a */ { Cache_UNKNOWN, 0 },
+    /* 1b */ { Cache_UNKNOWN, 0 },
+    /* 1c */ { Cache_UNKNOWN, 0 },
+    /* 1d */ { Cache_UNKNOWN, 0 },
+    /* 1e */ { Cache_UNKNOWN, 0 },
+    /* 1f */ { Cache_UNKNOWN, 0 },
+    /* 20 */ { Cache_UNKNOWN, 0 },
+    /* 21 */ { Cache_UNKNOWN, 0 },
+    /* 22 */ { Cache_L3, 64 },
+    /* 23 */ { Cache_L3, 64 },
+    /* 24 */ { Cache_UNKNOWN, 0 },
+    /* 25 */ { Cache_L3, 64 },
+    /* 26 */ { Cache_UNKNOWN, 0 },
+    /* 27 */ { Cache_UNKNOWN, 0 },
+    /* 28 */ { Cache_UNKNOWN, 0 },
+    /* 29 */ { Cache_L3, 64 },
+    /* 2a */ { Cache_UNKNOWN, 0 },
+    /* 2b */ { Cache_UNKNOWN, 0 },
+    /* 2c */ { Cache_L1d, 64 },
+    /* 2d */ { Cache_UNKNOWN, 0 },
+    /* 2e */ { Cache_UNKNOWN, 0 },
+    /* 2f */ { Cache_UNKNOWN, 0 },
+    /* 30 */ { Cache_L1i, 64 },
+    /* 31 */ { Cache_UNKNOWN, 0 },
+    /* 32 */ { Cache_UNKNOWN, 0 },
+    /* 33 */ { Cache_UNKNOWN, 0 },
+    /* 34 */ { Cache_UNKNOWN, 0 },
+    /* 35 */ { Cache_UNKNOWN, 0 },
+    /* 36 */ { Cache_UNKNOWN, 0 },
+    /* 37 */ { Cache_UNKNOWN, 0 },
+    /* 38 */ { Cache_UNKNOWN, 0 },
+    /* 39 */ { Cache_L2, 64 },
+    /* 3a */ { Cache_UNKNOWN, 0 },
+    /* 3b */ { Cache_L2, 64 },
+    /* 3c */ { Cache_L2, 64 },
+    /* 3d */ { Cache_UNKNOWN, 0 },
+    /* 3e */ { Cache_UNKNOWN, 0 },
+    /* 3f */ { Cache_UNKNOWN, 0 },
+    /* 40 */ { Cache_L2, 0 },
+    /* 41 */ { Cache_L2, 32 },
+    /* 42 */ { Cache_L2, 32 },
+    /* 43 */ { Cache_L2, 32 },
+    /* 44 */ { Cache_L2, 32 },
+    /* 45 */ { Cache_L2, 32 },
+    /* 46 */ { Cache_UNKNOWN, 0 },
+    /* 47 */ { Cache_UNKNOWN, 0 },
+    /* 48 */ { Cache_UNKNOWN, 0 },
+    /* 49 */ { Cache_UNKNOWN, 0 },
+    /* 4a */ { Cache_UNKNOWN, 0 },
+    /* 4b */ { Cache_UNKNOWN, 0 },
+    /* 4c */ { Cache_UNKNOWN, 0 },
+    /* 4d */ { Cache_UNKNOWN, 0 },
+    /* 4e */ { Cache_UNKNOWN, 0 },
+    /* 4f */ { Cache_UNKNOWN, 0 },
+    /* 50 */ { Cache_TLBi, 0 },
+    /* 51 */ { Cache_TLBi, 0 },
+    /* 52 */ { Cache_TLBi, 0 },
+    /* 53 */ { Cache_UNKNOWN, 0 },
+    /* 54 */ { Cache_UNKNOWN, 0 },
+    /* 55 */ { Cache_UNKNOWN, 0 },
+    /* 56 */ { Cache_UNKNOWN, 0 },
+    /* 57 */ { Cache_UNKNOWN, 0 },
+    /* 58 */ { Cache_UNKNOWN, 0 },
+    /* 59 */ { Cache_UNKNOWN, 0 },
+    /* 5a */ { Cache_UNKNOWN, 0 },
+    /* 5b */ { Cache_TLBd, 0 },
+    /* 5c */ { Cache_TLBd, 0 },
+    /* 5d */ { Cache_TLBd, 0 },
+    /* 5e */ { Cache_UNKNOWN, 0 },
+    /* 5f */ { Cache_UNKNOWN, 0 },
+    /* 60 */ { Cache_UNKNOWN, 0 },
+    /* 61 */ { Cache_UNKNOWN, 0 },
+    /* 62 */ { Cache_UNKNOWN, 0 },
+    /* 63 */ { Cache_UNKNOWN, 0 },
+    /* 64 */ { Cache_UNKNOWN, 0 },
+    /* 65 */ { Cache_UNKNOWN, 0 },
+    /* 66 */ { Cache_L1d, 64 },
+    /* 67 */ { Cache_L1d, 64 },
+    /* 68 */ { Cache_L1d, 64 },
+    /* 69 */ { Cache_UNKNOWN, 0 },
+    /* 6a */ { Cache_UNKNOWN, 0 },
+    /* 6b */ { Cache_UNKNOWN, 0 },
+    /* 6c */ { Cache_UNKNOWN, 0 },
+    /* 6d */ { Cache_UNKNOWN, 0 },
+    /* 6e */ { Cache_UNKNOWN, 0 },
+    /* 6f */ { Cache_UNKNOWN, 0 },
+    /* 70 */ { Cache_Trace, 1 },
+    /* 71 */ { Cache_Trace, 1 },
+    /* 72 */ { Cache_Trace, 1 },
+    /* 73 */ { Cache_UNKNOWN, 0 },
+    /* 74 */ { Cache_UNKNOWN, 0 },
+    /* 75 */ { Cache_UNKNOWN, 0 },
+    /* 76 */ { Cache_UNKNOWN, 0 },
+    /* 77 */ { Cache_UNKNOWN, 0 },
+    /* 78 */ { Cache_UNKNOWN, 0 },
+    /* 79 */ { Cache_L2, 64 },
+    /* 7a */ { Cache_L2, 64 },
+    /* 7b */ { Cache_L2, 64 },
+    /* 7c */ { Cache_L2, 64 },
+    /* 7d */ { Cache_UNKNOWN, 0 },
+    /* 7e */ { Cache_UNKNOWN, 0 },
+    /* 7f */ { Cache_UNKNOWN, 0 },
+    /* 80 */ { Cache_UNKNOWN, 0 },
+    /* 81 */ { Cache_UNKNOWN, 0 },
+    /* 82 */ { Cache_L2, 32 },
+    /* 83 */ { Cache_L2, 32 },
+    /* 84 */ { Cache_L2, 32 },
+    /* 85 */ { Cache_L2, 32 },
+    /* 86 */ { Cache_L2, 64 },
+    /* 87 */ { Cache_L2, 64 },
+    /* 88 */ { Cache_UNKNOWN, 0 },
+    /* 89 */ { Cache_UNKNOWN, 0 },
+    /* 8a */ { Cache_UNKNOWN, 0 },
+    /* 8b */ { Cache_UNKNOWN, 0 },
+    /* 8c */ { Cache_UNKNOWN, 0 },
+    /* 8d */ { Cache_UNKNOWN, 0 },
+    /* 8e */ { Cache_UNKNOWN, 0 },
+    /* 8f */ { Cache_UNKNOWN, 0 },
+    /* 90 */ { Cache_UNKNOWN, 0 },
+    /* 91 */ { Cache_UNKNOWN, 0 },
+    /* 92 */ { Cache_UNKNOWN, 0 },
+    /* 93 */ { Cache_UNKNOWN, 0 },
+    /* 94 */ { Cache_UNKNOWN, 0 },
+    /* 95 */ { Cache_UNKNOWN, 0 },
+    /* 96 */ { Cache_UNKNOWN, 0 },
+    /* 97 */ { Cache_UNKNOWN, 0 },
+    /* 98 */ { Cache_UNKNOWN, 0 },
+    /* 99 */ { Cache_UNKNOWN, 0 },
+    /* 9a */ { Cache_UNKNOWN, 0 },
+    /* 9b */ { Cache_UNKNOWN, 0 },
+    /* 9c */ { Cache_UNKNOWN, 0 },
+    /* 9d */ { Cache_UNKNOWN, 0 },
+    /* 9e */ { Cache_UNKNOWN, 0 },
+    /* 9f */ { Cache_UNKNOWN, 0 },
+    /* a0 */ { Cache_UNKNOWN, 0 },
+    /* a1 */ { Cache_UNKNOWN, 0 },
+    /* a2 */ { Cache_UNKNOWN, 0 },
+    /* a3 */ { Cache_UNKNOWN, 0 },
+    /* a4 */ { Cache_UNKNOWN, 0 },
+    /* a5 */ { Cache_UNKNOWN, 0 },
+    /* a6 */ { Cache_UNKNOWN, 0 },
+    /* a7 */ { Cache_UNKNOWN, 0 },
+    /* a8 */ { Cache_UNKNOWN, 0 },
+    /* a9 */ { Cache_UNKNOWN, 0 },
+    /* aa */ { Cache_UNKNOWN, 0 },
+    /* ab */ { Cache_UNKNOWN, 0 },
+    /* ac */ { Cache_UNKNOWN, 0 },
+    /* ad */ { Cache_UNKNOWN, 0 },
+    /* ae */ { Cache_UNKNOWN, 0 },
+    /* af */ { Cache_UNKNOWN, 0 },
+    /* b0 */ { Cache_TLBi, 0 },
+    /* b1 */ { Cache_UNKNOWN, 0 },
+    /* b2 */ { Cache_UNKNOWN, 0 },
+    /* b3 */ { Cache_TLBd, 0 },
+    /* b4 */ { Cache_UNKNOWN, 0 },
+    /* b5 */ { Cache_UNKNOWN, 0 },
+    /* b6 */ { Cache_UNKNOWN, 0 },
+    /* b7 */ { Cache_UNKNOWN, 0 },
+    /* b8 */ { Cache_UNKNOWN, 0 },
+    /* b9 */ { Cache_UNKNOWN, 0 },
+    /* ba */ { Cache_UNKNOWN, 0 },
+    /* bb */ { Cache_UNKNOWN, 0 },
+    /* bc */ { Cache_UNKNOWN, 0 },
+    /* bd */ { Cache_UNKNOWN, 0 },
+    /* be */ { Cache_UNKNOWN, 0 },
+    /* bf */ { Cache_UNKNOWN, 0 },
+    /* c0 */ { Cache_UNKNOWN, 0 },
+    /* c1 */ { Cache_UNKNOWN, 0 },
+    /* c2 */ { Cache_UNKNOWN, 0 },
+    /* c3 */ { Cache_UNKNOWN, 0 },
+    /* c4 */ { Cache_UNKNOWN, 0 },
+    /* c5 */ { Cache_UNKNOWN, 0 },
+    /* c6 */ { Cache_UNKNOWN, 0 },
+    /* c7 */ { Cache_UNKNOWN, 0 },
+    /* c8 */ { Cache_UNKNOWN, 0 },
+    /* c9 */ { Cache_UNKNOWN, 0 },
+    /* ca */ { Cache_UNKNOWN, 0 },
+    /* cb */ { Cache_UNKNOWN, 0 },
+    /* cc */ { Cache_UNKNOWN, 0 },
+    /* cd */ { Cache_UNKNOWN, 0 },
+    /* ce */ { Cache_UNKNOWN, 0 },
+    /* cf */ { Cache_UNKNOWN, 0 },
+    /* d0 */ { Cache_UNKNOWN, 0 },
+    /* d1 */ { Cache_UNKNOWN, 0 },
+    /* d2 */ { Cache_UNKNOWN, 0 },
+    /* d3 */ { Cache_UNKNOWN, 0 },
+    /* d4 */ { Cache_UNKNOWN, 0 },
+    /* d5 */ { Cache_UNKNOWN, 0 },
+    /* d6 */ { Cache_UNKNOWN, 0 },
+    /* d7 */ { Cache_UNKNOWN, 0 },
+    /* d8 */ { Cache_UNKNOWN, 0 },
+    /* d9 */ { Cache_UNKNOWN, 0 },
+    /* da */ { Cache_UNKNOWN, 0 },
+    /* db */ { Cache_UNKNOWN, 0 },
+    /* dc */ { Cache_UNKNOWN, 0 },
+    /* dd */ { Cache_UNKNOWN, 0 },
+    /* de */ { Cache_UNKNOWN, 0 },
+    /* df */ { Cache_UNKNOWN, 0 },
+    /* e0 */ { Cache_UNKNOWN, 0 },
+    /* e1 */ { Cache_UNKNOWN, 0 },
+    /* e2 */ { Cache_UNKNOWN, 0 },
+    /* e3 */ { Cache_UNKNOWN, 0 },
+    /* e4 */ { Cache_UNKNOWN, 0 },
+    /* e5 */ { Cache_UNKNOWN, 0 },
+    /* e6 */ { Cache_UNKNOWN, 0 },
+    /* e7 */ { Cache_UNKNOWN, 0 },
+    /* e8 */ { Cache_UNKNOWN, 0 },
+    /* e9 */ { Cache_UNKNOWN, 0 },
+    /* ea */ { Cache_UNKNOWN, 0 },
+    /* eb */ { Cache_UNKNOWN, 0 },
+    /* ec */ { Cache_UNKNOWN, 0 },
+    /* ed */ { Cache_UNKNOWN, 0 },
+    /* ee */ { Cache_UNKNOWN, 0 },
+    /* ef */ { Cache_UNKNOWN, 0 },
+    /* f0 */ { Cache_UNKNOWN, 0 },
+    /* f1 */ { Cache_UNKNOWN, 0 },
+    /* f2 */ { Cache_UNKNOWN, 0 },
+    /* f3 */ { Cache_UNKNOWN, 0 },
+    /* f4 */ { Cache_UNKNOWN, 0 },
+    /* f5 */ { Cache_UNKNOWN, 0 },
+    /* f6 */ { Cache_UNKNOWN, 0 },
+    /* f7 */ { Cache_UNKNOWN, 0 },
+    /* f8 */ { Cache_UNKNOWN, 0 },
+    /* f9 */ { Cache_UNKNOWN, 0 },
+    /* fa */ { Cache_UNKNOWN, 0 },
+    /* fb */ { Cache_UNKNOWN, 0 },
+    /* fc */ { Cache_UNKNOWN, 0 },
+    /* fd */ { Cache_UNKNOWN, 0 },
+    /* fe */ { Cache_UNKNOWN, 0 },
+    /* ff */ { Cache_UNKNOWN, 0 }
+};
+
+/*
+ * use the above table to determine the CacheEntryLineSize.
+ */
+static void
+getIntelCacheEntryLineSize(unsigned long val, int *level,
+                           unsigned long *lineSize)
+{
+    CacheType type;
+
+    type = CacheMap[val].type;
+    /* only interested in data caches */
+    /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
+     * this data check has the side effect of rejecting that entry. If
+     * that wasn't the case, we could have to reject it explicitly */
+    if (CacheMap[val].lineSize == 0) {
+        return;
+    }
+    /* look at the caches, skip types we aren't interested in.
+     * if we already have a value for a lower level cache, skip the
+     * current entry */
+    if ((type == Cache_L1) || (type == Cache_L1d)) {
+        *level = 1;
+        *lineSize = CacheMap[val].lineSize;
+    } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
+        *level = 2;
+        *lineSize = CacheMap[val].lineSize;
+    } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
+        *level = 3;
+        *lineSize = CacheMap[val].lineSize;
+    }
+    return;
+}
+
+static void
+getIntelRegisterCacheLineSize(unsigned long val,
+                              int *level, unsigned long *lineSize)
+{
+    getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
+    getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
+    getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
+    getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
+}
+
+/*
+ * returns '0' if no recognized cache is found, or if the cache
+ * information is supported by this processor
+ */
+static unsigned long
+getIntelCacheLineSize(int cpuidLevel)
+{
+    int level = 4;
+    unsigned long lineSize = 0;
+    unsigned long eax, ebx, ecx, edx;
+    int repeat, count;
+
+    if (cpuidLevel < 2) {
+        return 0;
+    }
+
+    /* command '2' of the cpuid is intel's cache info call. Each byte of the
+     * 4 registers contain a potential descriptor for the cache. The CacheMap
+     * table maps the cache entry with the processor cache. Register 'al'
+     * contains a count value that cpuid '2' needs to be called in order to
+     * find all the cache descriptors. Only registers with the high bit set
+     * to 'zero' have valid descriptors. This code loops through all the
+     * required calls to cpuid '2' and passes any valid descriptors it finds
+     * to the getIntelRegisterCacheLineSize code, which breaks the registers
+     * down into their component descriptors. In the end the lineSize of the
+     * lowest level cache data cache is returned. */
+    freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+    repeat = eax & 0xf;
+    for (count = 0; count < repeat; count++) {
+        if ((eax & 0x80000000) == 0) {
+            getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
+        }
+        if ((ebx & 0x80000000) == 0) {
+            getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
+        }
+        if ((ecx & 0x80000000) == 0) {
+            getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
+        }
+        if ((edx & 0x80000000) == 0) {
+            getIntelRegisterCacheLineSize(edx, &level, &lineSize);
+        }
+        if (count + 1 != repeat) {
+            freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+        }
+    }
+    return lineSize;
+}
+
+/*
+ * returns '0' if the cache info is not supported by this processor.
+ * This is based on the AMD extended cache commands for cpuid.
+ * (see "AMD Processor Recognition Application Note" Publication 20734).
+ * Some other processors use the identical scheme.
+ * (see "Processor Recognition, Transmeta Corporation").
+ */
+static unsigned long
+getOtherCacheLineSize(unsigned long cpuidLevel)
+{
+    unsigned long lineSize = 0;
+    unsigned long eax, ebx, ecx, edx;
+
+    /* get the Extended CPUID level */
+    freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+    cpuidLevel = eax;
+
+    if (cpuidLevel >= 0x80000005) {
+        freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+        lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
+    }
+    return lineSize;
+}
+
+static const char *const manMap[] = {
+#define INTEL 0
+    "GenuineIntel",
+#define AMD 1
+    "AuthenticAMD",
+#define CYRIX 2
+    "CyrixInstead",
+#define CENTAUR 2
+    "CentaurHauls",
+#define NEXGEN 3
+    "NexGenDriven",
+#define TRANSMETA 4
+    "GenuineTMx86",
+#define RISE 5
+    "RiseRiseRise",
+#define UMC 6
+    "UMC UMC UMC ",
+#define SIS 7
+    "Sis Sis Sis ",
+#define NATIONAL 8
+    "Geode by NSC",
+};
+
+static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]);
+
+#define MAN_UNKNOWN 9
+
+#if !defined(AMD_64)
+#define SSE2_FLAG (1 << 26)
+unsigned long
+s_mpi_is_sse2()
+{
+    unsigned long eax, ebx, ecx, edx;
+
+    if (is386() || is486()) {
+        return 0;
+    }
+    freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+
+    /* has no SSE2 extensions */
+    if (eax == 0) {
+        return 0;
+    }
+
+    freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
+    return (edx & SSE2_FLAG) == SSE2_FLAG;
+}
+#endif
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+    unsigned long eax, ebx, ecx, edx;
+    PRUint32 cpuid[3];
+    unsigned long cpuidLevel;
+    unsigned long cacheLineSize = 0;
+    int manufacturer = MAN_UNKNOWN;
+    int i;
+    char string[13];
+
+#if !defined(AMD_64)
+    if (is386()) {
+        return 0; /* 386 had no cache */
+    }
+    if (is486()) {
+        return 32; /* really? need more info */
+    }
+#endif
+
+    /* Pentium, cpuid command is available */
+    freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+    cpuidLevel = eax;
+    /* string holds the CPU's manufacturer ID string - a twelve
+     * character ASCII string stored in ebx, edx, ecx, and
+     * the 32-bit extended feature flags are in edx, ecx.
+     */
+    cpuid[0] = ebx;
+    cpuid[1] = ecx;
+    cpuid[2] = edx;
+    memcpy(string, cpuid, sizeof(cpuid));
+    string[12] = 0;
+
+    manufacturer = MAN_UNKNOWN;
+    for (i = 0; i < n_manufacturers; i++) {
+        if (strcmp(manMap[i], string) == 0) {
+            manufacturer = i;
+        }
+    }
+
+    if (manufacturer == INTEL) {
+        cacheLineSize = getIntelCacheLineSize(cpuidLevel);
+    } else {
+        cacheLineSize = getOtherCacheLineSize(cpuidLevel);
+    }
+    /* doesn't support cache info based on cpuid. This means
+     * an old pentium class processor, which have cache lines of
+     * 32. If we learn differently, we can use a switch based on
+     * the Manufacturer id  */
+    if (cacheLineSize == 0) {
+        cacheLineSize = 32;
+    }
+    return cacheLineSize;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+#if defined(__ppc64__)
+/*
+ *  Sigh, The PPC has some really nice features to help us determine cache
+ *  size, since it had lots of direct control functions to do so. The POWER
+ *  processor even has an instruction to do this, but it was dropped in
+ *  PowerPC. Unfortunately most of them are not available in user mode.
+ *
+ *  The dcbz function would be a great way to determine cache line size except
+ *  1) it only works on write-back memory (it throws an exception otherwise),
+ *  and 2) because so many mac programs 'knew' the processor cache size was
+ *  32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
+ *  G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
+ *  these programs happy. dcbzl work if 64 bit instructions are supported.
+ *  If you know 64 bit instructions are supported, and that stack is
+ *  write-back, you can use this code.
+ */
+#include "memory.h"
+
+/* clear the cache line that contains 'array' */
+static inline void
+dcbzl(char *array)
+{
+    register char *a asm("r2") = array;
+    __asm__ __volatile__("dcbzl %0,r0"
+                         : "=r"(a)
+                         : "0"(a));
+}
+
+#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1)))
+
+#define PPC_MAX_LINE_SIZE 256
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+    char testArray[2 * PPC_MAX_LINE_SIZE + 1];
+    char *test;
+    int i;
+
+    /* align the array on a maximum line size boundary, so we
+     * know we are starting to clear from the first address */
+    test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
+    /* set all the values to 1's */
+    memset(test, 0xff, PPC_MAX_LINE_SIZE);
+    /* clear one cache block starting at 'test' */
+    dcbzl(test);
+
+    /* find the size of the cleared area, that's our block size */
+    for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) {
+        if (test[i - 1] == 0) {
+            return i;
+        }
+    }
+    return 0;
+}
+
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+/*
+ * put other processor and platform specific cache code here
+ * return the smallest cache line size in bytes on the processor
+ * (usually the L1 cache). If the OS has a call, this would be
+ * a greate place to put it.
+ *
+ * If there is no cache, return 0;
+ *
+ * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
+ * below aren't compiled.
+ *
+ */
+
+/* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
+ * OS */
+#if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+    return MPI_CACHE_LINE_SIZE;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+/* If no way to get the processor cache line size has been defined, assume
+ * it's 32 bytes (most common value, does not significantly impact performance)
+ */
+#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+    return 32;
+}
+#endif
+
+#ifdef TEST_IT
+#include <stdio.h>
+
+main()
+{
+    printf("line size = %d\n", s_mpi_getProcessorLineSize());
+}
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpcpucache_amd64.s b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s
new file mode 100644
index 000000000..d493b4762
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s
@@ -0,0 +1,861 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+	.file	"mpcpucache.c"
+/	.section	.rodata.str1.1,"aMS",@progbits,1
+	.section	.rodata
+.LC0:
+	.string	"GenuineIntel"
+.LC1:
+	.string	"AuthenticAMD"
+.LC2:
+	.string	"CyrixInstead"
+.LC3:
+	.string	"CentaurHauls"
+.LC4:
+	.string	"NexGenDriven"
+.LC5:
+	.string	"GenuineTMx86"
+.LC6:
+	.string	"RiseRiseRise"
+.LC7:
+	.string	"UMC UMC UMC "
+.LC8:
+	.string	"Sis Sis Sis "
+.LC9:
+	.string	"Geode by NSC"
+	.section	.data.rel.ro.local,"aw",@progbits
+	.align 32
+	.type	manMap, @object
+	.size	manMap, 80
+manMap:
+	.quad	.LC0
+	.quad	.LC1
+	.quad	.LC2
+	.quad	.LC3
+	.quad	.LC4
+	.quad	.LC5
+	.quad	.LC6
+	.quad	.LC7
+	.quad	.LC8
+	.quad	.LC9
+	.section	.rodata
+	.align 32
+	.type	CacheMap, @object
+	.size	CacheMap, 512
+CacheMap:
+	.byte	0
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	4
+	.zero	1
+	.byte	1
+	.byte	0
+	.byte	7
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	7
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	8
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	8
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	12
+	.byte	64
+	.byte	12
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	12
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	12
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	8
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	7
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	0
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	8
+	.byte	64
+	.byte	8
+	.byte	64
+	.byte	8
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	5
+	.byte	1
+	.byte	5
+	.byte	1
+	.byte	5
+	.byte	1
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.text
+	.align	16
+.globl freebl_cpuid
+	.type	freebl_cpuid, @function
+freebl_cpuid:
+.LFB2:
+	movq	%rdx, %r10
+	pushq	%rbx
+.LCFI0:
+	movq	%rcx, %r11
+	movq	%rdi, %rax
+/APP
+	cpuid
+	
+/NO_APP
+	movq	%rax, (%rsi)
+	movq	%rbx, (%r10)
+	popq	%rbx
+	movq	%rcx, (%r11)
+	movq	%rdx, (%r8)
+	ret
+.LFE2:
+	.size	freebl_cpuid, .-freebl_cpuid
+	.align	16
+	.type	getIntelCacheEntryLineSize, @function
+getIntelCacheEntryLineSize:
+.LFB3:
+	leaq	CacheMap(%rip), %r9
+	movq	%rdx, %r10
+	movzbl	1(%r9,%rdi,2), %ecx
+	movzbl	(%r9,%rdi,2), %r8d
+	testb	%cl, %cl
+	je	.L2
+	cmpl	$6, %r8d
+	sete	%dl
+	cmpl	$8, %r8d
+	sete	%al
+	orl	%edx, %eax
+	testb	$1, %al
+	je	.L4
+	movl	$1, (%rsi)
+.L9:
+	movzbl	%cl, %eax 
+	movq	%rax, (%r10)
+	ret
+	.align	16
+.L4:
+	movl	(%rsi), %r11d
+	cmpl	$1, %r11d
+	jg	.L11
+.L6:
+	cmpl	$2, %r11d
+	jle	.L2
+	cmpl	$12, %r8d
+	sete	%dl
+	cmpl	$14, %r8d
+	sete	%al
+	orl	%edx, %eax
+	testb	$1, %al
+	je	.L2
+	movzbq	1(%r9,%rdi,2), %rax
+	movl	$3, (%rsi)
+	movq	%rax, (%r10)
+	.align	16
+.L2:
+	rep ; ret
+	.align	16
+.L11:
+	cmpl	$9, %r8d
+	sete	%dl
+	cmpl	$11, %r8d
+	sete	%al
+	orl	%edx, %eax
+	testb	$1, %al
+	je	.L6
+	movl	$2, (%rsi)
+	jmp	.L9
+.LFE3:
+	.size	getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize
+	.align	16
+	.type	getIntelRegisterCacheLineSize, @function
+getIntelRegisterCacheLineSize:
+.LFB4:
+	pushq	%rbp
+.LCFI1:
+	movq	%rsp, %rbp
+.LCFI2:
+	movq	%rbx, -24(%rbp)
+.LCFI3:
+	movq	%rdi, %rbx
+	shrq	$24, %rdi
+	movq	%r12, -16(%rbp)
+.LCFI4:
+	movq	%r13, -8(%rbp)
+.LCFI5:
+	andl	$255, %edi
+	subq	$24, %rsp
+.LCFI6:
+	movq	%rsi, %r13
+	movq	%rdx, %r12
+	call	getIntelCacheEntryLineSize
+	movq	%rbx, %rdi
+	movq	%r12, %rdx
+	movq	%r13, %rsi
+	shrq	$16, %rdi
+	andl	$255, %edi
+	call	getIntelCacheEntryLineSize
+	movq	%rbx, %rdi
+	movq	%r12, %rdx
+	movq	%r13, %rsi
+	shrq	$8, %rdi
+	andl	$255, %ebx
+	andl	$255, %edi
+	call	getIntelCacheEntryLineSize
+	movq	%r12, %rdx
+	movq	%r13, %rsi
+	movq	%rbx, %rdi
+	movq	8(%rsp), %r12
+	movq	(%rsp), %rbx
+	movq	16(%rsp), %r13
+	leave
+	jmp	getIntelCacheEntryLineSize
+.LFE4:
+	.size	getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize
+	.align	16
+.globl s_mpi_getProcessorLineSize
+	.type	s_mpi_getProcessorLineSize, @function
+s_mpi_getProcessorLineSize:
+.LFB7:
+	pushq	%rbp
+.LCFI7:
+	xorl	%edi, %edi
+	movq	%rsp, %rbp
+.LCFI8:
+	pushq	%r15
+.LCFI9:
+	leaq	-136(%rbp), %r8
+	leaq	-144(%rbp), %rcx
+	leaq	-152(%rbp), %rdx
+	pushq	%r14
+.LCFI10:
+	leaq	-160(%rbp), %rsi
+	leaq	-128(%rbp), %r14
+	pushq	%r13
+.LCFI11:
+	leaq	manMap(%rip), %r13
+	pushq	%r12
+.LCFI12:
+	movl	$9, %r12d
+	pushq	%rbx
+.LCFI13:
+	xorl	%ebx, %ebx
+	subq	$200, %rsp
+.LCFI14:
+	call	freebl_cpuid
+	movq	-152(%rbp), %rax
+	movq	-160(%rbp), %r15
+	movb	$0, -116(%rbp)
+	movl	%eax, -128(%rbp)
+	movq	-136(%rbp), %rax
+	movl	%eax, -124(%rbp)
+	movq	-144(%rbp), %rax
+	movl	%eax, -120(%rbp)
+	.align	16
+.L18:
+	movslq	%ebx,%rax
+	movq	%r14, %rsi
+	movq	(%r13,%rax,8), %rdi
+	call	strcmp@PLT
+	testl	%eax, %eax
+	cmove	%ebx, %r12d
+	incl	%ebx
+	cmpl	$9, %ebx
+	jle	.L18
+	testl	%r12d, %r12d
+	jne	.L19
+	xorl	%eax, %eax
+	decl	%r15d
+	movl	$4, -204(%rbp)
+	movq	$0, -200(%rbp)
+	jle	.L21
+	leaq	-168(%rbp), %r8
+	leaq	-176(%rbp), %rcx
+	leaq	-184(%rbp), %rdx
+	leaq	-192(%rbp), %rsi
+	movl	$2, %edi
+	xorl	%ebx, %ebx
+	call	freebl_cpuid
+	movq	-192(%rbp), %rdi
+	movl	%edi, %r12d
+	andl	$15, %r12d
+	cmpl	%r12d, %ebx
+	jl	.L30
+	jmp	.L38
+	.align	16
+.L25:
+	movq	-184(%rbp), %rdi
+	testl	$2147483648, %edi 
+	je	.L40
+.L26:
+	movq	-176(%rbp), %rdi
+	testl	$2147483648, %edi 
+	je	.L41
+.L27:
+	movq	-168(%rbp), %rdi
+	testl	$2147483648, %edi 
+	je	.L42
+.L28:
+	incl	%ebx
+	cmpl	%r12d, %ebx
+	je	.L24
+	leaq	-168(%rbp), %r8
+	leaq	-176(%rbp), %rcx
+	leaq	-184(%rbp), %rdx
+	leaq	-192(%rbp), %rsi
+	movl	$2, %edi
+	call	freebl_cpuid
+.L24:
+	cmpl	%r12d, %ebx
+	jge	.L38
+	movq	-192(%rbp), %rdi
+.L30:
+	testl	$2147483648, %edi 
+	jne	.L25
+	leaq	-200(%rbp), %rdx
+	leaq	-204(%rbp), %rsi
+	andl	$4294967040, %edi
+	call	getIntelRegisterCacheLineSize
+	movq	-184(%rbp), %rdi
+	testl	$2147483648, %edi 
+	jne	.L26
+.L40:
+	leaq	-200(%rbp), %rdx
+	leaq	-204(%rbp), %rsi
+	call	getIntelRegisterCacheLineSize
+	movq	-176(%rbp), %rdi
+	testl	$2147483648, %edi 
+	jne	.L27
+.L41:
+	leaq	-200(%rbp), %rdx
+	leaq	-204(%rbp), %rsi
+	call	getIntelRegisterCacheLineSize
+	movq	-168(%rbp), %rdi
+	testl	$2147483648, %edi 
+	jne	.L28
+.L42:
+	leaq	-200(%rbp), %rdx
+	leaq	-204(%rbp), %rsi
+	call	getIntelRegisterCacheLineSize
+	jmp	.L28
+.L38:
+	movq	-200(%rbp), %rax
+.L21:
+	movq	%rax, %rdx
+	movl	$32, %eax
+	testq	%rdx, %rdx
+	cmoveq	%rax, %rdx
+	addq	$200, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	leave
+	movq	%rdx, %rax
+	ret
+.L19:
+	leaq	-216(%rbp), %r8
+	leaq	-224(%rbp), %rcx
+	leaq	-232(%rbp), %rdx
+	leaq	-240(%rbp), %rsi
+	movl	$2147483648, %edi
+	xorl	%ebx, %ebx
+	call	freebl_cpuid
+	movl	$2147483652, %eax
+	cmpq	%rax, -240(%rbp)
+	ja	.L43
+.L32:
+	movq	%rbx, %rdx
+	movl	$32, %eax
+	testq	%rdx, %rdx
+	cmoveq	%rax, %rdx
+	addq	$200, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	leave
+	movq	%rdx, %rax
+	ret
+.L43:
+	leaq	-216(%rbp), %r8
+	leaq	-224(%rbp), %rcx
+	leaq	-232(%rbp), %rdx
+	leaq	-240(%rbp), %rsi
+	movl	$2147483653, %edi
+	call	freebl_cpuid
+	movzbq	-224(%rbp), %rbx
+	jmp	.L32
+.LFE7:
+	.size	s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize
diff --git a/security/nss/lib/freebl/mpi/mpcpucache_x86.s b/security/nss/lib/freebl/mpi/mpcpucache_x86.s
new file mode 100644
index 000000000..af17ebcb4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache_x86.s
@@ -0,0 +1,902 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+	.file	"mpcpucache.c"
+/	.section	.rodata.str1.1,"aMS",@progbits,1
+	.section	.rodata
+.LC0:
+	.string	"GenuineIntel"
+.LC1:
+	.string	"AuthenticAMD"
+.LC2:
+	.string	"CyrixInstead"
+.LC3:
+	.string	"CentaurHauls"
+.LC4:
+	.string	"NexGenDriven"
+.LC5:
+	.string	"GenuineTMx86"
+.LC6:
+	.string	"RiseRiseRise"
+.LC7:
+	.string	"UMC UMC UMC "
+.LC8:
+	.string	"Sis Sis Sis "
+.LC9:
+	.string	"Geode by NSC"
+	.section	.data.rel.ro.local,"aw",@progbits
+	.align 32
+	.type	manMap, @object
+	.size	manMap, 40
+manMap:
+	.long	.LC0
+	.long	.LC1
+	.long	.LC2
+	.long	.LC3
+	.long	.LC4
+	.long	.LC5
+	.long	.LC6
+	.long	.LC7
+	.long	.LC8
+	.long	.LC9
+	.section	.rodata
+	.align 32
+	.type	CacheMap, @object
+	.size	CacheMap, 512
+CacheMap:
+	.byte	0
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	4
+	.zero	1
+	.byte	1
+	.byte	0
+	.byte	7
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	7
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	8
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	8
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	12
+	.byte	64
+	.byte	12
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	12
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	12
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	8
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	7
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	0
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	8
+	.byte	64
+	.byte	8
+	.byte	64
+	.byte	8
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	5
+	.byte	1
+	.byte	5
+	.byte	1
+	.byte	5
+	.byte	1
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	32
+	.byte	9
+	.byte	64
+	.byte	9
+	.byte	64
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	3
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	4
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	1
+	.byte	0
+	.text
+	.align	4
+.globl freebl_cpuid
+	.type	freebl_cpuid, @function
+freebl_cpuid:
+	pushl	%ebp
+	pushl	%edi
+	pushl	%esi
+	subl	$8, %esp
+	movl	%edx, %ebp
+/APP
+	pushl %ebx
+	xorl %ecx, %ecx
+	cpuid
+	mov %ebx,%esi
+	popl %ebx
+	
+/NO_APP
+	movl	%eax, (%ebp)
+	movl	24(%esp), %eax
+	movl	%esi, (%eax)
+	movl	28(%esp), %eax
+	movl	%ecx, (%eax)
+	movl	32(%esp), %eax
+	movl	%edx, (%eax)
+	addl	$8, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebp
+	ret
+	.size	freebl_cpuid, .-freebl_cpuid
+	.align	4
+	.type	changeFlag, @function
+changeFlag:
+/APP
+	pushfl
+	popl %edx
+	movl %edx,%ecx
+	xorl %eax,%edx
+	pushl %edx
+	popfl
+	pushfl
+	popl %edx
+	pushl %ecx
+	popfl
+	
+/NO_APP
+	xorl	%ecx, %edx
+	movl	%edx, %eax
+	ret
+	.size	changeFlag, .-changeFlag
+	.align	4
+	.type	getIntelCacheEntryLineSize, @function
+getIntelCacheEntryLineSize:
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	call	.L17
+.L17:
+	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-.L17], %ebx
+	movzbl	CacheMap@GOTOFF(%ebx,%eax,2), %ecx
+	movb	1+CacheMap@GOTOFF(%ebx,%eax,2), %al
+	testb	%al, %al
+	movl	16(%esp), %edi
+	je	.L3
+	cmpl	$6, %ecx
+	je	.L6
+	cmpl	$8, %ecx
+	je	.L6
+	movl	(%edx), %esi
+	cmpl	$1, %esi
+	jg	.L15
+.L8:
+	cmpl	$2, %esi
+	jle	.L3
+	cmpl	$12, %ecx
+	je	.L12
+	cmpl	$14, %ecx
+	je	.L12
+	.align	4
+.L3:
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+	.align	4
+.L6:
+	movzbl	%al, %eax
+	movl	$1, (%edx)
+	movl	%eax, (%edi)
+.L16:
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+	.align	4
+.L15:
+	cmpl	$9, %ecx
+	je	.L9
+	cmpl	$11, %ecx
+	jne	.L8
+.L9:
+	movzbl	%al, %eax
+	movl	$2, (%edx)
+	movl	%eax, (%edi)
+	jmp	.L16
+.L12:
+	movzbl	%al, %eax
+	movl	$3, (%edx)
+	movl	%eax, (%edi)
+	jmp	.L16
+	.size	getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize
+	.align	4
+	.type	getIntelRegisterCacheLineSize, @function
+getIntelRegisterCacheLineSize:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%edi
+	pushl	%esi
+	pushl	%ecx
+	movl	8(%ebp), %edi
+	movl	%eax, %esi
+	movl	%edx, -12(%ebp)
+	shrl	$24, %eax
+	pushl	%edi
+	call	getIntelCacheEntryLineSize
+	movl	%esi, %eax
+	pushl	%edi
+	shrl	$16, %eax
+	movl	-12(%ebp), %edx
+	andl	$255, %eax
+	call	getIntelCacheEntryLineSize
+	pushl	%edi
+	movl	%esi, %edx
+	movzbl	%dh, %eax
+	movl	-12(%ebp), %edx
+	call	getIntelCacheEntryLineSize
+	andl	$255, %esi
+	movl	%edi, 8(%ebp)
+	movl	-12(%ebp), %edx
+	addl	$12, %esp
+	leal	-8(%ebp), %esp
+	movl	%esi, %eax
+	popl	%esi
+	popl	%edi
+	leave
+	jmp	getIntelCacheEntryLineSize
+	.size	getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize
+	.align	4
+.globl s_mpi_getProcessorLineSize
+	.type	s_mpi_getProcessorLineSize, @function
+s_mpi_getProcessorLineSize:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	subl	$188, %esp
+	call	.L52
+.L52:
+	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-.L52], %ebx
+	movl	$9, -168(%ebp)
+	movl	$262144, %eax
+	call	changeFlag
+	xorl	%edx, %edx
+	testl	%eax, %eax
+	jne	.L50
+.L19:
+	leal	-12(%ebp), %esp
+	popl	%ebx
+	popl	%esi
+	movl	%edx, %eax
+	popl	%edi
+	leave
+	ret
+	.align	4
+.L50:
+	movl	$2097152, %eax
+	call	changeFlag
+	testl	%eax, %eax
+	movl	$32, %edx
+	je	.L19
+	leal	-108(%ebp), %eax
+	pushl	%eax
+	leal	-112(%ebp), %eax
+	pushl	%eax
+	leal	-116(%ebp), %eax
+	pushl	%eax
+	leal	-120(%ebp), %edx
+	xorl	%eax, %eax
+	call	freebl_cpuid
+	movl	-120(%ebp), %eax
+	movl	%eax, -164(%ebp)
+	movl	-116(%ebp), %eax
+	movl	%eax, -104(%ebp)
+	movl	-108(%ebp), %eax
+	movl	%eax, -100(%ebp)
+	movl	-112(%ebp), %eax
+	movl	%eax, -96(%ebp)
+	movb	$0, -92(%ebp)
+	xorl	%esi, %esi
+	addl	$12, %esp
+	leal	-104(%ebp), %edi
+	.align	4
+.L28:
+	subl	$8, %esp
+	pushl	%edi
+	pushl	manMap@GOTOFF(%ebx,%esi,4)
+	call	strcmp@PLT
+	addl	$16, %esp
+	testl	%eax, %eax
+	jne	.L26
+	movl	%esi, -168(%ebp)
+.L26:
+	incl	%esi
+	cmpl	$9, %esi
+	jle	.L28
+	movl	-168(%ebp), %eax
+	testl	%eax, %eax
+	jne	.L29
+	xorl	%eax, %eax
+	cmpl	$1, -164(%ebp)
+	movl	$4, -144(%ebp)
+	movl	$0, -140(%ebp)
+	jle	.L41
+	leal	-124(%ebp), %edx
+	movl	%edx, -188(%ebp)
+	leal	-128(%ebp), %eax
+	pushl	%edx
+	movl	%eax, -184(%ebp)
+	leal	-132(%ebp), %edx
+	pushl	%eax
+	movl	%edx, -180(%ebp)
+	movl	$2, %eax
+	pushl	%edx
+	leal	-136(%ebp), %edx
+	call	freebl_cpuid
+	movl	-136(%ebp), %eax
+	movl	%eax, %edi
+	andl	$15, %edi
+	xorl	%esi, %esi
+	addl	$12, %esp
+	leal	-140(%ebp), %edx
+	cmpl	%edi, %esi
+	movl	%edx, -176(%ebp)
+	jl	.L40
+	jmp	.L48
+	.align	4
+.L49:
+	movl	-136(%ebp), %eax
+.L40:
+	testl	%eax, %eax
+	js	.L35
+	xorb	%al, %al
+	pushl	-176(%ebp)
+	leal	-144(%ebp), %edx
+	call	getIntelRegisterCacheLineSize
+	popl	%eax
+.L35:
+	movl	-132(%ebp), %eax
+	testl	%eax, %eax
+	js	.L36
+	pushl	-176(%ebp)
+	leal	-144(%ebp), %edx
+	call	getIntelRegisterCacheLineSize
+	popl	%eax
+.L36:
+	movl	-128(%ebp), %eax
+	testl	%eax, %eax
+	js	.L37
+	pushl	-176(%ebp)
+	leal	-144(%ebp), %edx
+	call	getIntelRegisterCacheLineSize
+	popl	%eax
+.L37:
+	movl	-124(%ebp), %eax
+	testl	%eax, %eax
+	js	.L38
+	pushl	-176(%ebp)
+	leal	-144(%ebp), %edx
+	call	getIntelRegisterCacheLineSize
+	popl	%eax
+.L38:
+	incl	%esi
+	cmpl	%edi, %esi
+	je	.L34
+	pushl	-188(%ebp)
+	pushl	-184(%ebp)
+	pushl	-180(%ebp)
+	leal	-136(%ebp), %edx
+	movl	$2, %eax
+	call	freebl_cpuid
+	addl	$12, %esp
+.L34:
+	cmpl	%edi, %esi
+	jl	.L49
+.L48:
+	movl	-140(%ebp), %eax
+.L41:
+	testl	%eax, %eax
+	jne	.L44
+	movb	$32, %al
+.L44:
+	leal	-12(%ebp), %esp
+	popl	%ebx
+	popl	%esi
+	movl	%eax, %edx
+	movl	%edx, %eax
+	popl	%edi
+	leave
+	ret
+.L29:
+	leal	-148(%ebp), %eax
+	movl	%eax, -192(%ebp)
+	movl	$0, -172(%ebp)
+	leal	-152(%ebp), %edi
+	pushl	%eax
+	pushl	%edi
+	leal	-156(%ebp), %esi
+	pushl	%esi
+	leal	-160(%ebp), %edx
+	movl	$-2147483648, %eax
+	call	freebl_cpuid
+	addl	$12, %esp
+	cmpl	$-2147483644, -160(%ebp)
+	ja	.L51
+.L42:
+	movl	-172(%ebp), %eax
+	jmp	.L41
+.L51:
+	pushl	-192(%ebp)
+	pushl	%edi
+	pushl	%esi
+	leal	-160(%ebp), %edx
+	movl	$-2147483643, %eax
+	call	freebl_cpuid
+	movzbl	-152(%ebp), %edx
+	addl	$12, %esp
+	movl	%edx, -172(%ebp)
+	jmp	.L42
+	.size	s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize
diff --git a/security/nss/lib/freebl/mpi/mpi-config.h b/security/nss/lib/freebl/mpi/mpi-config.h
new file mode 100644
index 000000000..f365592a4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi-config.h
@@ -0,0 +1,68 @@
+/* Default configuration for MPI library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MPI_CONFIG_H_
+#define MPI_CONFIG_H_
+
+/*
+  For boolean options,
+  0 = no
+  1 = yes
+
+  Other options are documented individually.
+
+ */
+
+#ifndef MP_IOFUNC
+#define MP_IOFUNC 0 /* include mp_print() ?                */
+#endif
+
+#ifndef MP_MODARITH
+#define MP_MODARITH 1 /* include modular arithmetic ?        */
+#endif
+
+#ifndef MP_NUMTH
+#define MP_NUMTH 1 /* include number theoretic functions? */
+#endif
+
+#ifndef MP_LOGTAB
+#define MP_LOGTAB 1 /* use table of logs instead of log()? */
+#endif
+
+#ifndef MP_MEMSET
+#define MP_MEMSET 1 /* use memset() to zero buffers?       */
+#endif
+
+#ifndef MP_MEMCPY
+#define MP_MEMCPY 1 /* use memcpy() to copy buffers?       */
+#endif
+
+#ifndef MP_ARGCHK
+/*
+  0 = no parameter checks
+  1 = runtime checks, continue execution and return an error to caller
+  2 = assertions; dump core on parameter errors
+ */
+#ifdef DEBUG
+#define MP_ARGCHK 2 /* how to check input arguments        */
+#else
+#define MP_ARGCHK 1 /* how to check input arguments        */
+#endif
+#endif
+
+#ifndef MP_DEBUG
+#define MP_DEBUG 0 /* print diagnostic output?            */
+#endif
+
+#ifndef MP_DEFPREC
+#define MP_DEFPREC 64 /* default precision, in digits        */
+#endif
+
+#ifndef MP_SQUARE
+#define MP_SQUARE 1 /* use separate squaring code?         */
+#endif
+
+#endif /* ifndef MPI_CONFIG_H_ */
diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h
new file mode 100644
index 000000000..b34452c48
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi-priv.h
@@ -0,0 +1,243 @@
+/*
+ *  mpi-priv.h  - Private header file for MPI
+ *  Arbitrary precision integer arithmetic library
+ *
+ *  NOTE WELL: the content of this header file is NOT part of the "public"
+ *  API for the MPI library, and may change at any time.
+ *  Application programs that use libmpi should NOT include this header file.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef _MPI_PRIV_H_
+#define _MPI_PRIV_H_ 1
+
+#include "mpi.h"
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#if MP_DEBUG
+#include <stdio.h>
+
+#define DIAG(T, V)           \
+    {                        \
+        fprintf(stderr, T);  \
+        mp_print(V, stderr); \
+        fputc('\n', stderr); \
+    }
+#else
+#define DIAG(T, V)
+#endif
+
+/* If we aren't using a wired-in logarithm table, we need to include
+   the math library to get the log() function
+ */
+
+/* {{{ s_logv_2[] - log table for 2 in various bases */
+
+#if MP_LOGTAB
+/*
+  A table of the logs of 2 for various bases (the 0 and 1 entries of
+  this table are meaningless and should not be referenced).
+
+  This table is used to compute output lengths for the mp_toradix()
+  function.  Since a number n in radix r takes up about log_r(n)
+  digits, we estimate the output size by taking the least integer
+  greater than log_r(n), where:
+
+  log_r(n) = log_2(n) * log_r(2)
+
+  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
+  which are the output bases supported.
+ */
+
+extern const float s_logv_2[];
+#define LOG_V_2(R) s_logv_2[(R)]
+
+#else
+
+/*
+   If MP_LOGTAB is not defined, use the math library to compute the
+   logarithms on the fly.  Otherwise, use the table.
+   Pick which works best for your system.
+ */
+
+#include <math.h>
+#define LOG_V_2(R) (log(2.0) / log(R))
+
+#endif /* if MP_LOGTAB */
+
+/* }}} */
+
+/* {{{ Digit arithmetic macros */
+
+/*
+  When adding and multiplying digits, the results can be larger than
+  can be contained in an mp_digit.  Thus, an mp_word is used.  These
+  macros mask off the upper and lower digits of the mp_word (the
+  mp_word may be more than 2 mp_digits wide, but we only concern
+  ourselves with the low-order 2 mp_digits)
+ */
+
+#define CARRYOUT(W) (mp_digit)((W) >> DIGIT_BIT)
+#define ACCUM(W) (mp_digit)(W)
+
+#define MP_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define MP_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MP_HOWMANY(a, b) (((a) + (b)-1) / (b))
+#define MP_ROUNDUP(a, b) (MP_HOWMANY(a, b) * (b))
+
+/* }}} */
+
+/* {{{ Comparison constants */
+
+#define MP_LT -1
+#define MP_EQ 0
+#define MP_GT 1
+
+/* }}} */
+
+/* {{{ private function declarations */
+
+void s_mp_setz(mp_digit *dp, mp_size count);                     /* zero digits           */
+void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */
+void *s_mp_alloc(size_t nb, size_t ni);                          /* general allocator     */
+void s_mp_free(void *ptr);                                       /* general free function */
+
+mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */
+mp_err s_mp_pad(mp_int *mp, mp_size min);  /* left pad with zeroes    */
+
+void s_mp_clamp(mp_int *mp); /* clip leading zeroes     */
+
+void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place   */
+
+mp_err s_mp_lshd(mp_int *mp, mp_size p);    /* left-shift by p digits  */
+void s_mp_rshd(mp_int *mp, mp_size p);      /* right-shift by p digits */
+mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */
+void s_mp_div_2d(mp_int *mp, mp_digit d);   /* divide by 2^d in place  */
+void s_mp_mod_2d(mp_int *mp, mp_digit d);   /* modulo 2^d in place     */
+void s_mp_div_2(mp_int *mp);                /* divide by 2 in place    */
+mp_err s_mp_mul_2(mp_int *mp);              /* multiply by 2 in place  */
+mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd);
+/* normalize for division  */
+mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */
+mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */
+mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */
+mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
+/* unsigned digit divide   */
+mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
+/* Barrett reduction       */
+mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition      */
+mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract      */
+mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
+/* a += b * RADIX^offset   */
+mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply      */
+#if MP_SQUARE
+mp_err s_mp_sqr(mp_int *a); /* magnitude square        */
+#else
+#define s_mp_sqr(a) s_mp_mul(a, a)
+#endif
+mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */
+mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err s_mp_2expt(mp_int *a, mp_digit k);       /* a = 2^k                 */
+int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */
+int s_mp_cmp_d(const mp_int *a, mp_digit d);    /* magnitude digit compare */
+int s_mp_ispow2(const mp_int *v);               /* is v a power of 2?      */
+int s_mp_ispow2d(mp_digit d);                   /* is d a power of 2?      */
+
+int s_mp_tovalue(char ch, int r);                /* convert ch to value    */
+char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
+int s_mp_outlen(int bits, int r);                /* output length in bytes */
+mp_digit s_mp_invmod_radix(mp_digit P);          /* returns (P ** -1) mod RADIX */
+mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c);
+mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c);
+
+#ifdef NSS_USE_COMBA
+
+#define IS_POWER_OF_2(a) ((a) && !((a) & ((a)-1)))
+
+void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C);
+
+void s_mp_sqr_comba_4(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_8(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_16(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_32(const mp_int *A, mp_int *B);
+
+#endif /* end NSS_USE_COMBA */
+
+/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
+#if defined(__OS2__) && defined(__IBMC__)
+#define MPI_ASM_DECL __cdecl
+#else
+#define MPI_ASM_DECL
+#endif
+
+#ifdef MPI_AMD64
+
+mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit *, mp_digit *, mp_size, mp_digit);
+mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit *, const mp_digit *, mp_size, mp_digit);
+
+/* c = a * b */
+#define s_mpv_mul_d(a, a_len, b, c) \
+    ((mp_digit *)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b)
+
+/* c += a * b */
+#define s_mpv_mul_d_add(a, a_len, b, c) \
+    ((mp_digit *)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b)
+
+#else
+
+void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+                              mp_digit b, mp_digit *c);
+void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+                                  mp_digit b, mp_digit *c);
+
+#endif
+
+void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
+                                       mp_size a_len, mp_digit b,
+                                       mp_digit *c);
+void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
+                                     mp_size a_len,
+                                     mp_digit *sqrs);
+
+mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
+                                    mp_digit divisor, mp_digit *quot, mp_digit *rem);
+
+/* c += a * b * (MP_RADIX ** offset);  */
+/* Callers of this macro should be aware that the return type might vary;
+ * it should be treated as a void function. */
+#define s_mp_mul_d_add_offset(a, b, c, off) \
+    s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off)
+
+typedef struct {
+    mp_int N;         /* modulus N */
+    mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */
+} mp_mont_modulus;
+
+mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
+                     mp_mont_modulus *mmm);
+mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm);
+
+/*
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * see mpcpucache.c for the implementation.
+ */
+unsigned long s_mpi_getProcessorLineSize();
+
+/* }}} */
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c
new file mode 100644
index 000000000..f6f75439c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi.c
@@ -0,0 +1,4839 @@
+/*
+ *  mpi.c
+ *
+ *  Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#if defined(OSF1)
+#include <c_asm.h>
+#endif
+
+#if defined(__arm__) && \
+    ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
+/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
+#undef MP_ASSEMBLY_MULTIPLY
+#undef MP_ASSEMBLY_SQUARE
+#endif
+
+#if MP_LOGTAB
+/*
+  A table of the logs of 2 for various bases (the 0 and 1 entries of
+  this table are meaningless and should not be referenced).
+
+  This table is used to compute output lengths for the mp_toradix()
+  function.  Since a number n in radix r takes up about log_r(n)
+  digits, we estimate the output size by taking the least integer
+  greater than log_r(n), where:
+
+  log_r(n) = log_2(n) * log_r(2)
+
+  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
+  which are the output bases supported.
+ */
+#include "logtab.h"
+#endif
+
+#ifdef CT_VERIF
+#include <valgrind/memcheck.h>
+#endif
+
+/* {{{ Constant strings */
+
+/* Constant strings returned by mp_strerror() */
+static const char *mp_err_string[] = {
+    "unknown result code",     /* say what?            */
+    "boolean true",            /* MP_OKAY, MP_YES      */
+    "boolean false",           /* MP_NO                */
+    "out of memory",           /* MP_MEM               */
+    "argument out of range",   /* MP_RANGE             */
+    "invalid input parameter", /* MP_BADARG            */
+    "result is undefined"      /* MP_UNDEF             */
+};
+
+/* Value to digit maps for radix conversion   */
+
+/* s_dmap_1 - standard digits and letters */
+static const char *s_dmap_1 =
+    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+
+/* }}} */
+
+/* {{{ Default precision manipulation */
+
+/* Default precision for newly created mp_int's      */
+static mp_size s_mp_defprec = MP_DEFPREC;
+
+mp_size
+mp_get_prec(void)
+{
+    return s_mp_defprec;
+
+} /* end mp_get_prec() */
+
+void
+mp_set_prec(mp_size prec)
+{
+    if (prec == 0)
+        s_mp_defprec = MP_DEFPREC;
+    else
+        s_mp_defprec = prec;
+
+} /* end mp_set_prec() */
+
+/* }}} */
+
+#ifdef CT_VERIF
+void
+mp_taint(mp_int *mp)
+{
+    size_t i;
+    for (i = 0; i < mp->used; ++i) {
+        VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit));
+    }
+}
+
+void
+mp_untaint(mp_int *mp)
+{
+    size_t i;
+    for (i = 0; i < mp->used; ++i) {
+        VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit));
+    }
+}
+#endif
+
+/*------------------------------------------------------------------------*/
+/* {{{ mp_init(mp) */
+
+/*
+  mp_init(mp)
+
+  Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
+  MP_MEM if memory could not be allocated for the structure.
+ */
+
+mp_err
+mp_init(mp_int *mp)
+{
+    return mp_init_size(mp, s_mp_defprec);
+
+} /* end mp_init() */
+
+/* }}} */
+
+/* {{{ mp_init_size(mp, prec) */
+
+/*
+  mp_init_size(mp, prec)
+
+  Initialize a new zero-valued mp_int with at least the given
+  precision; returns MP_OKAY if successful, or MP_MEM if memory could
+  not be allocated for the structure.
+ */
+
+mp_err
+mp_init_size(mp_int *mp, mp_size prec)
+{
+    ARGCHK(mp != NULL && prec > 0, MP_BADARG);
+
+    prec = MP_ROUNDUP(prec, s_mp_defprec);
+    if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
+        return MP_MEM;
+
+    SIGN(mp) = ZPOS;
+    USED(mp) = 1;
+    ALLOC(mp) = prec;
+
+    return MP_OKAY;
+
+} /* end mp_init_size() */
+
+/* }}} */
+
+/* {{{ mp_init_copy(mp, from) */
+
+/*
+  mp_init_copy(mp, from)
+
+  Initialize mp as an exact copy of from.  Returns MP_OKAY if
+  successful, MP_MEM if memory could not be allocated for the new
+  structure.
+ */
+
+mp_err
+mp_init_copy(mp_int *mp, const mp_int *from)
+{
+    ARGCHK(mp != NULL && from != NULL, MP_BADARG);
+
+    if (mp == from)
+        return MP_OKAY;
+
+    if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
+        return MP_MEM;
+
+    s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
+    USED(mp) = USED(from);
+    ALLOC(mp) = ALLOC(from);
+    SIGN(mp) = SIGN(from);
+
+    return MP_OKAY;
+
+} /* end mp_init_copy() */
+
+/* }}} */
+
+/* {{{ mp_copy(from, to) */
+
+/*
+  mp_copy(from, to)
+
+  Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
+  'to' has already been initialized (if not, use mp_init_copy()
+  instead). If 'from' and 'to' are identical, nothing happens.
+ */
+
+mp_err
+mp_copy(const mp_int *from, mp_int *to)
+{
+    ARGCHK(from != NULL && to != NULL, MP_BADARG);
+
+    if (from == to)
+        return MP_OKAY;
+
+    { /* copy */
+        mp_digit *tmp;
+
+        /*
+          If the allocated buffer in 'to' already has enough space to hold
+          all the used digits of 'from', we'll re-use it to avoid hitting
+          the memory allocater more than necessary; otherwise, we'd have
+          to grow anyway, so we just allocate a hunk and make the copy as
+          usual
+         */
+        if (ALLOC(to) >= USED(from)) {
+            s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
+            s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
+
+        } else {
+            if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
+                return MP_MEM;
+
+            s_mp_copy(DIGITS(from), tmp, USED(from));
+
+            if (DIGITS(to) != NULL) {
+                s_mp_setz(DIGITS(to), ALLOC(to));
+                s_mp_free(DIGITS(to));
+            }
+
+            DIGITS(to) = tmp;
+            ALLOC(to) = ALLOC(from);
+        }
+
+        /* Copy the precision and sign from the original */
+        USED(to) = USED(from);
+        SIGN(to) = SIGN(from);
+    } /* end copy */
+
+    return MP_OKAY;
+
+} /* end mp_copy() */
+
+/* }}} */
+
+/* {{{ mp_exch(mp1, mp2) */
+
+/*
+  mp_exch(mp1, mp2)
+
+  Exchange mp1 and mp2 without allocating any intermediate memory
+  (well, unless you count the stack space needed for this call and the
+  locals it creates...).  This cannot fail.
+ */
+
+void
+mp_exch(mp_int *mp1, mp_int *mp2)
+{
+#if MP_ARGCHK == 2
+    assert(mp1 != NULL && mp2 != NULL);
+#else
+    if (mp1 == NULL || mp2 == NULL)
+        return;
+#endif
+
+    s_mp_exch(mp1, mp2);
+
+} /* end mp_exch() */
+
+/* }}} */
+
+/* {{{ mp_clear(mp) */
+
+/*
+  mp_clear(mp)
+
+  Release the storage used by an mp_int, and void its fields so that
+  if someone calls mp_clear() again for the same int later, we won't
+  get tollchocked.
+ */
+
+void
+mp_clear(mp_int *mp)
+{
+    if (mp == NULL)
+        return;
+
+    if (DIGITS(mp) != NULL) {
+        s_mp_setz(DIGITS(mp), ALLOC(mp));
+        s_mp_free(DIGITS(mp));
+        DIGITS(mp) = NULL;
+    }
+
+    USED(mp) = 0;
+    ALLOC(mp) = 0;
+
+} /* end mp_clear() */
+
+/* }}} */
+
+/* {{{ mp_zero(mp) */
+
+/*
+  mp_zero(mp)
+
+  Set mp to zero.  Does not change the allocated size of the structure,
+  and therefore cannot fail (except on a bad argument, which we ignore)
+ */
+void
+mp_zero(mp_int *mp)
+{
+    if (mp == NULL)
+        return;
+
+    s_mp_setz(DIGITS(mp), ALLOC(mp));
+    USED(mp) = 1;
+    SIGN(mp) = ZPOS;
+
+} /* end mp_zero() */
+
+/* }}} */
+
+/* {{{ mp_set(mp, d) */
+
+void
+mp_set(mp_int *mp, mp_digit d)
+{
+    if (mp == NULL)
+        return;
+
+    mp_zero(mp);
+    DIGIT(mp, 0) = d;
+
+} /* end mp_set() */
+
+/* }}} */
+
+/* {{{ mp_set_int(mp, z) */
+
+mp_err
+mp_set_int(mp_int *mp, long z)
+{
+    int ix;
+    unsigned long v = labs(z);
+    mp_err res;
+
+    ARGCHK(mp != NULL, MP_BADARG);
+
+    mp_zero(mp);
+    if (z == 0)
+        return MP_OKAY; /* shortcut for zero */
+
+    if (sizeof v <= sizeof(mp_digit)) {
+        DIGIT(mp, 0) = v;
+    } else {
+        for (ix = sizeof(long) - 1; ix >= 0; ix--) {
+            if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
+                return res;
+
+            res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
+            if (res != MP_OKAY)
+                return res;
+        }
+    }
+    if (z < 0)
+        SIGN(mp) = NEG;
+
+    return MP_OKAY;
+
+} /* end mp_set_int() */
+
+/* }}} */
+
+/* {{{ mp_set_ulong(mp, z) */
+
+mp_err
+mp_set_ulong(mp_int *mp, unsigned long z)
+{
+    int ix;
+    mp_err res;
+
+    ARGCHK(mp != NULL, MP_BADARG);
+
+    mp_zero(mp);
+    if (z == 0)
+        return MP_OKAY; /* shortcut for zero */
+
+    if (sizeof z <= sizeof(mp_digit)) {
+        DIGIT(mp, 0) = z;
+    } else {
+        for (ix = sizeof(long) - 1; ix >= 0; ix--) {
+            if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
+                return res;
+
+            res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
+            if (res != MP_OKAY)
+                return res;
+        }
+    }
+    return MP_OKAY;
+} /* end mp_set_ulong() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Digit arithmetic */
+
+/* {{{ mp_add_d(a, d, b) */
+
+/*
+  mp_add_d(a, d, b)
+
+  Compute the sum b = a + d, for a single digit d.  Respects the sign of
+  its primary addend (single digits are unsigned anyway).
+ */
+
+mp_err
+mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+    mp_int tmp;
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+        return res;
+
+    if (SIGN(&tmp) == ZPOS) {
+        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
+            goto CLEANUP;
+    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
+        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
+            goto CLEANUP;
+    } else {
+        mp_neg(&tmp, &tmp);
+
+        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
+    }
+
+    if (s_mp_cmp_d(&tmp, 0) == 0)
+        SIGN(&tmp) = ZPOS;
+
+    s_mp_exch(&tmp, b);
+
+CLEANUP:
+    mp_clear(&tmp);
+    return res;
+
+} /* end mp_add_d() */
+
+/* }}} */
+
+/* {{{ mp_sub_d(a, d, b) */
+
+/*
+  mp_sub_d(a, d, b)
+
+  Compute the difference b = a - d, for a single digit d.  Respects the
+  sign of its subtrahend (single digits are unsigned anyway).
+ */
+
+mp_err
+mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+    mp_int tmp;
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+        return res;
+
+    if (SIGN(&tmp) == NEG) {
+        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
+            goto CLEANUP;
+    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
+        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
+            goto CLEANUP;
+    } else {
+        mp_neg(&tmp, &tmp);
+
+        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
+        SIGN(&tmp) = NEG;
+    }
+
+    if (s_mp_cmp_d(&tmp, 0) == 0)
+        SIGN(&tmp) = ZPOS;
+
+    s_mp_exch(&tmp, b);
+
+CLEANUP:
+    mp_clear(&tmp);
+    return res;
+
+} /* end mp_sub_d() */
+
+/* }}} */
+
+/* {{{ mp_mul_d(a, d, b) */
+
+/*
+  mp_mul_d(a, d, b)
+
+  Compute the product b = a * d, for a single digit d.  Respects the sign
+  of its multiplicand (single digits are unsigned anyway)
+ */
+
+mp_err
+mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    if (d == 0) {
+        mp_zero(b);
+        return MP_OKAY;
+    }
+
+    if ((res = mp_copy(a, b)) != MP_OKAY)
+        return res;
+
+    res = s_mp_mul_d(b, d);
+
+    return res;
+
+} /* end mp_mul_d() */
+
+/* }}} */
+
+/* {{{ mp_mul_2(a, c) */
+
+mp_err
+mp_mul_2(const mp_int *a, mp_int *c)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_copy(a, c)) != MP_OKAY)
+        return res;
+
+    return s_mp_mul_2(c);
+
+} /* end mp_mul_2() */
+
+/* }}} */
+
+/* {{{ mp_div_d(a, d, q, r) */
+
+/*
+  mp_div_d(a, d, q, r)
+
+  Compute the quotient q = a / d and remainder r = a mod d, for a
+  single digit d.  Respects the sign of its divisor (single digits are
+  unsigned anyway).
+ */
+
+mp_err
+mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
+{
+    mp_err res;
+    mp_int qp;
+    mp_digit rem = 0;
+    int pow;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    if (d == 0)
+        return MP_RANGE;
+
+    /* Shortcut for powers of two ... */
+    if ((pow = s_mp_ispow2d(d)) >= 0) {
+        mp_digit mask;
+
+        mask = ((mp_digit)1 << pow) - 1;
+        rem = DIGIT(a, 0) & mask;
+
+        if (q) {
+            if ((res = mp_copy(a, q)) != MP_OKAY) {
+                return res;
+            }
+            s_mp_div_2d(q, pow);
+        }
+
+        if (r)
+            *r = rem;
+
+        return MP_OKAY;
+    }
+
+    if ((res = mp_init_copy(&qp, a)) != MP_OKAY)
+        return res;
+
+    res = s_mp_div_d(&qp, d, &rem);
+
+    if (s_mp_cmp_d(&qp, 0) == 0)
+        SIGN(q) = ZPOS;
+
+    if (r) {
+        *r = rem;
+    }
+
+    if (q)
+        s_mp_exch(&qp, q);
+
+    mp_clear(&qp);
+    return res;
+
+} /* end mp_div_d() */
+
+/* }}} */
+
+/* {{{ mp_div_2(a, c) */
+
+/*
+  mp_div_2(a, c)
+
+  Compute c = a / 2, disregarding the remainder.
+ */
+
+mp_err
+mp_div_2(const mp_int *a, mp_int *c)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_copy(a, c)) != MP_OKAY)
+        return res;
+
+    s_mp_div_2(c);
+
+    return MP_OKAY;
+
+} /* end mp_div_2() */
+
+/* }}} */
+
+/* {{{ mp_expt_d(a, d, b) */
+
+mp_err
+mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
+{
+    mp_int s, x;
+    mp_err res;
+
+    ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_init(&s)) != MP_OKAY)
+        return res;
+    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+        goto X;
+
+    DIGIT(&s, 0) = 1;
+
+    while (d != 0) {
+        if (d & 1) {
+            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+                goto CLEANUP;
+        }
+
+        d /= 2;
+
+        if ((res = s_mp_sqr(&x)) != MP_OKAY)
+            goto CLEANUP;
+    }
+
+    s_mp_exch(&s, c);
+
+CLEANUP:
+    mp_clear(&x);
+X:
+    mp_clear(&s);
+
+    return res;
+
+} /* end mp_expt_d() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Full arithmetic */
+
+/* {{{ mp_abs(a, b) */
+
+/*
+  mp_abs(a, b)
+
+  Compute b = |a|.  'a' and 'b' may be identical.
+ */
+
+mp_err
+mp_abs(const mp_int *a, mp_int *b)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    if ((res = mp_copy(a, b)) != MP_OKAY)
+        return res;
+
+    SIGN(b) = ZPOS;
+
+    return MP_OKAY;
+
+} /* end mp_abs() */
+
+/* }}} */
+
+/* {{{ mp_neg(a, b) */
+
+/*
+  mp_neg(a, b)
+
+  Compute b = -a.  'a' and 'b' may be identical.
+ */
+
+mp_err
+mp_neg(const mp_int *a, mp_int *b)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    if ((res = mp_copy(a, b)) != MP_OKAY)
+        return res;
+
+    if (s_mp_cmp_d(b, 0) == MP_EQ)
+        SIGN(b) = ZPOS;
+    else
+        SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
+
+    return MP_OKAY;
+
+} /* end mp_neg() */
+
+/* }}} */
+
+/* {{{ mp_add(a, b, c) */
+
+/*
+  mp_add(a, b, c)
+
+  Compute c = a + b.  All parameters may be identical.
+ */
+
+mp_err
+mp_add(const mp_int *a, const mp_int *b, mp_int *c)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
+        MP_CHECKOK(s_mp_add_3arg(a, b, c));
+    } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b|   */
+        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
+    } else { /* different sign: |a|  < |b|   */
+        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
+    }
+
+    if (s_mp_cmp_d(c, 0) == MP_EQ)
+        SIGN(c) = ZPOS;
+
+CLEANUP:
+    return res;
+
+} /* end mp_add() */
+
+/* }}} */
+
+/* {{{ mp_sub(a, b, c) */
+
+/*
+  mp_sub(a, b, c)
+
+  Compute c = a - b.  All parameters may be identical.
+ */
+
+mp_err
+mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
+{
+    mp_err res;
+    int magDiff;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (a == b) {
+        mp_zero(c);
+        return MP_OKAY;
+    }
+
+    if (MP_SIGN(a) != MP_SIGN(b)) {
+        MP_CHECKOK(s_mp_add_3arg(a, b, c));
+    } else if (!(magDiff = s_mp_cmp(a, b))) {
+        mp_zero(c);
+        res = MP_OKAY;
+    } else if (magDiff > 0) {
+        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
+    } else {
+        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
+        MP_SIGN(c) = !MP_SIGN(a);
+    }
+
+    if (s_mp_cmp_d(c, 0) == MP_EQ)
+        MP_SIGN(c) = MP_ZPOS;
+
+CLEANUP:
+    return res;
+
+} /* end mp_sub() */
+
+/* }}} */
+
+/* {{{ mp_mul(a, b, c) */
+
+/*
+  mp_mul(a, b, c)
+
+  Compute c = a * b.  All parameters may be identical.
+ */
+mp_err
+mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+    mp_digit *pb;
+    mp_int tmp;
+    mp_err res;
+    mp_size ib;
+    mp_size useda, usedb;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (a == c) {
+        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+            return res;
+        if (a == b)
+            b = &tmp;
+        a = &tmp;
+    } else if (b == c) {
+        if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
+            return res;
+        b = &tmp;
+    } else {
+        MP_DIGITS(&tmp) = 0;
+    }
+
+    if (MP_USED(a) < MP_USED(b)) {
+        const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
+        b = a;
+        a = xch;
+    }
+
+    MP_USED(c) = 1;
+    MP_DIGIT(c, 0) = 0;
+    if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
+        goto CLEANUP;
+
+#ifdef NSS_USE_COMBA
+    if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
+        if (MP_USED(a) == 4) {
+            s_mp_mul_comba_4(a, b, c);
+            goto CLEANUP;
+        }
+        if (MP_USED(a) == 8) {
+            s_mp_mul_comba_8(a, b, c);
+            goto CLEANUP;
+        }
+        if (MP_USED(a) == 16) {
+            s_mp_mul_comba_16(a, b, c);
+            goto CLEANUP;
+        }
+        if (MP_USED(a) == 32) {
+            s_mp_mul_comba_32(a, b, c);
+            goto CLEANUP;
+        }
+    }
+#endif
+
+    pb = MP_DIGITS(b);
+    s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
+
+    /* Outer loop:  Digits of b */
+    useda = MP_USED(a);
+    usedb = MP_USED(b);
+    for (ib = 1; ib < usedb; ib++) {
+        mp_digit b_i = *pb++;
+
+        /* Inner product:  Digits of a */
+        if (b_i)
+            s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
+        else
+            MP_DIGIT(c, ib + useda) = b_i;
+    }
+
+    s_mp_clamp(c);
+
+    if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
+        SIGN(c) = ZPOS;
+    else
+        SIGN(c) = NEG;
+
+CLEANUP:
+    mp_clear(&tmp);
+    return res;
+} /* end mp_mul() */
+
+/* }}} */
+
+/* {{{ mp_sqr(a, sqr) */
+
+#if MP_SQUARE
+/*
+  Computes the square of a.  This can be done more
+  efficiently than a general multiplication, because many of the
+  computation steps are redundant when squaring.  The inner product
+  step is a bit more complicated, but we save a fair number of
+  iterations of the multiplication loop.
+ */
+
+/* sqr = a^2;   Caller provides both a and tmp; */
+mp_err
+mp_sqr(const mp_int *a, mp_int *sqr)
+{
+    mp_digit *pa;
+    mp_digit d;
+    mp_err res;
+    mp_size ix;
+    mp_int tmp;
+    int count;
+
+    ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
+
+    if (a == sqr) {
+        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+            return res;
+        a = &tmp;
+    } else {
+        DIGITS(&tmp) = 0;
+        res = MP_OKAY;
+    }
+
+    ix = 2 * MP_USED(a);
+    if (ix > MP_ALLOC(sqr)) {
+        MP_USED(sqr) = 1;
+        MP_CHECKOK(s_mp_grow(sqr, ix));
+    }
+    MP_USED(sqr) = ix;
+    MP_DIGIT(sqr, 0) = 0;
+
+#ifdef NSS_USE_COMBA
+    if (IS_POWER_OF_2(MP_USED(a))) {
+        if (MP_USED(a) == 4) {
+            s_mp_sqr_comba_4(a, sqr);
+            goto CLEANUP;
+        }
+        if (MP_USED(a) == 8) {
+            s_mp_sqr_comba_8(a, sqr);
+            goto CLEANUP;
+        }
+        if (MP_USED(a) == 16) {
+            s_mp_sqr_comba_16(a, sqr);
+            goto CLEANUP;
+        }
+        if (MP_USED(a) == 32) {
+            s_mp_sqr_comba_32(a, sqr);
+            goto CLEANUP;
+        }
+    }
+#endif
+
+    pa = MP_DIGITS(a);
+    count = MP_USED(a) - 1;
+    if (count > 0) {
+        d = *pa++;
+        s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
+        for (ix = 3; --count > 0; ix += 2) {
+            d = *pa++;
+            s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
+        }                                    /* for(ix ...) */
+        MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */
+
+        /* now sqr *= 2 */
+        s_mp_mul_2(sqr);
+    } else {
+        MP_DIGIT(sqr, 1) = 0;
+    }
+
+    /* now add the squares of the digits of a to sqr. */
+    s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
+
+    SIGN(sqr) = ZPOS;
+    s_mp_clamp(sqr);
+
+CLEANUP:
+    mp_clear(&tmp);
+    return res;
+
+} /* end mp_sqr() */
+#endif
+
+/* }}} */
+
+/* {{{ mp_div(a, b, q, r) */
+
+/*
+  mp_div(a, b, q, r)
+
+  Compute q = a / b and r = a mod b.  Input parameters may be re-used
+  as output parameters.  If q or r is NULL, that portion of the
+  computation will be discarded (although it will still be computed)
+ */
+mp_err
+mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
+{
+    mp_err res;
+    mp_int *pQ, *pR;
+    mp_int qtmp, rtmp, btmp;
+    int cmp;
+    mp_sign signA;
+    mp_sign signB;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    signA = MP_SIGN(a);
+    signB = MP_SIGN(b);
+
+    if (mp_cmp_z(b) == MP_EQ)
+        return MP_RANGE;
+
+    DIGITS(&qtmp) = 0;
+    DIGITS(&rtmp) = 0;
+    DIGITS(&btmp) = 0;
+
+    /* Set up some temporaries... */
+    if (!r || r == a || r == b) {
+        MP_CHECKOK(mp_init_copy(&rtmp, a));
+        pR = &rtmp;
+    } else {
+        MP_CHECKOK(mp_copy(a, r));
+        pR = r;
+    }
+
+    if (!q || q == a || q == b) {
+        MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a)));
+        pQ = &qtmp;
+    } else {
+        MP_CHECKOK(s_mp_pad(q, MP_USED(a)));
+        pQ = q;
+        mp_zero(pQ);
+    }
+
+    /*
+      If |a| <= |b|, we can compute the solution without division;
+      otherwise, we actually do the work required.
+     */
+    if ((cmp = s_mp_cmp(a, b)) <= 0) {
+        if (cmp) {
+            /* r was set to a above. */
+            mp_zero(pQ);
+        } else {
+            mp_set(pQ, 1);
+            mp_zero(pR);
+        }
+    } else {
+        MP_CHECKOK(mp_init_copy(&btmp, b));
+        MP_CHECKOK(s_mp_div(pR, &btmp, pQ));
+    }
+
+    /* Compute the signs for the output  */
+    MP_SIGN(pR) = signA;        /* Sr = Sa              */
+    /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
+    MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
+
+    if (s_mp_cmp_d(pQ, 0) == MP_EQ)
+        SIGN(pQ) = ZPOS;
+    if (s_mp_cmp_d(pR, 0) == MP_EQ)
+        SIGN(pR) = ZPOS;
+
+    /* Copy output, if it is needed      */
+    if (q && q != pQ)
+        s_mp_exch(pQ, q);
+
+    if (r && r != pR)
+        s_mp_exch(pR, r);
+
+CLEANUP:
+    mp_clear(&btmp);
+    mp_clear(&rtmp);
+    mp_clear(&qtmp);
+
+    return res;
+
+} /* end mp_div() */
+
+/* }}} */
+
+/* {{{ mp_div_2d(a, d, q, r) */
+
+mp_err
+mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    if (q) {
+        if ((res = mp_copy(a, q)) != MP_OKAY)
+            return res;
+    }
+    if (r) {
+        if ((res = mp_copy(a, r)) != MP_OKAY)
+            return res;
+    }
+    if (q) {
+        s_mp_div_2d(q, d);
+    }
+    if (r) {
+        s_mp_mod_2d(r, d);
+    }
+
+    return MP_OKAY;
+
+} /* end mp_div_2d() */
+
+/* }}} */
+
+/* {{{ mp_expt(a, b, c) */
+
+/*
+  mp_expt(a, b, c)
+
+  Compute c = a ** b, that is, raise a to the b power.  Uses a
+  standard iterative square-and-multiply technique.
+ */
+
+mp_err
+mp_expt(mp_int *a, mp_int *b, mp_int *c)
+{
+    mp_int s, x;
+    mp_err res;
+    mp_digit d;
+    unsigned int dig, bit;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (mp_cmp_z(b) < 0)
+        return MP_RANGE;
+
+    if ((res = mp_init(&s)) != MP_OKAY)
+        return res;
+
+    mp_set(&s, 1);
+
+    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+        goto X;
+
+    /* Loop over low-order digits in ascending order */
+    for (dig = 0; dig < (USED(b) - 1); dig++) {
+        d = DIGIT(b, dig);
+
+        /* Loop over bits of each non-maximal digit */
+        for (bit = 0; bit < DIGIT_BIT; bit++) {
+            if (d & 1) {
+                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+                    goto CLEANUP;
+            }
+
+            d >>= 1;
+
+            if ((res = s_mp_sqr(&x)) != MP_OKAY)
+                goto CLEANUP;
+        }
+    }
+
+    /* Consider now the last digit... */
+    d = DIGIT(b, dig);
+
+    while (d) {
+        if (d & 1) {
+            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+                goto CLEANUP;
+        }
+
+        d >>= 1;
+
+        if ((res = s_mp_sqr(&x)) != MP_OKAY)
+            goto CLEANUP;
+    }
+
+    if (mp_iseven(b))
+        SIGN(&s) = SIGN(a);
+
+    res = mp_copy(&s, c);
+
+CLEANUP:
+    mp_clear(&x);
+X:
+    mp_clear(&s);
+
+    return res;
+
+} /* end mp_expt() */
+
+/* }}} */
+
+/* {{{ mp_2expt(a, k) */
+
+/* Compute a = 2^k */
+
+mp_err
+mp_2expt(mp_int *a, mp_digit k)
+{
+    ARGCHK(a != NULL, MP_BADARG);
+
+    return s_mp_2expt(a, k);
+
+} /* end mp_2expt() */
+
+/* }}} */
+
+/* {{{ mp_mod(a, m, c) */
+
+/*
+  mp_mod(a, m, c)
+
+  Compute c = a (mod m).  Result will always be 0 <= c < m.
+ */
+
+mp_err
+mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+    mp_err res;
+    int mag;
+
+    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+    if (SIGN(m) == NEG)
+        return MP_RANGE;
+
+    /*
+     If |a| > m, we need to divide to get the remainder and take the
+     absolute value.
+
+     If |a| < m, we don't need to do any division, just copy and adjust
+     the sign (if a is negative).
+
+     If |a| == m, we can simply set the result to zero.
+
+     This order is intended to minimize the average path length of the
+     comparison chain on common workloads -- the most frequent cases are
+     that |a| != m, so we do those first.
+     */
+    if ((mag = s_mp_cmp(a, m)) > 0) {
+        if ((res = mp_div(a, m, NULL, c)) != MP_OKAY)
+            return res;
+
+        if (SIGN(c) == NEG) {
+            if ((res = mp_add(c, m, c)) != MP_OKAY)
+                return res;
+        }
+
+    } else if (mag < 0) {
+        if ((res = mp_copy(a, c)) != MP_OKAY)
+            return res;
+
+        if (mp_cmp_z(a) < 0) {
+            if ((res = mp_add(c, m, c)) != MP_OKAY)
+                return res;
+        }
+
+    } else {
+        mp_zero(c);
+    }
+
+    return MP_OKAY;
+
+} /* end mp_mod() */
+
+/* }}} */
+
+/* {{{ mp_mod_d(a, d, c) */
+
+/*
+  mp_mod_d(a, d, c)
+
+  Compute c = a (mod d).  Result will always be 0 <= c < d
+ */
+mp_err
+mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
+{
+    mp_err res;
+    mp_digit rem;
+
+    ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+    if (s_mp_cmp_d(a, d) > 0) {
+        if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
+            return res;
+
+    } else {
+        if (SIGN(a) == NEG)
+            rem = d - DIGIT(a, 0);
+        else
+            rem = DIGIT(a, 0);
+    }
+
+    if (c)
+        *c = rem;
+
+    return MP_OKAY;
+
+} /* end mp_mod_d() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Modular arithmetic */
+
+#if MP_MODARITH
+/* {{{ mp_addmod(a, b, m, c) */
+
+/*
+  mp_addmod(a, b, m, c)
+
+  Compute c = (a + b) mod m
+ */
+
+mp_err
+mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_add(a, b, c)) != MP_OKAY)
+        return res;
+    if ((res = mp_mod(c, m, c)) != MP_OKAY)
+        return res;
+
+    return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_submod(a, b, m, c) */
+
+/*
+  mp_submod(a, b, m, c)
+
+  Compute c = (a - b) mod m
+ */
+
+mp_err
+mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_sub(a, b, c)) != MP_OKAY)
+        return res;
+    if ((res = mp_mod(c, m, c)) != MP_OKAY)
+        return res;
+
+    return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_mulmod(a, b, m, c) */
+
+/*
+  mp_mulmod(a, b, m, c)
+
+  Compute c = (a * b) mod m
+ */
+
+mp_err
+mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_mul(a, b, c)) != MP_OKAY)
+        return res;
+    if ((res = mp_mod(c, m, c)) != MP_OKAY)
+        return res;
+
+    return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_sqrmod(a, m, c) */
+
+#if MP_SQUARE
+mp_err
+mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_sqr(a, c)) != MP_OKAY)
+        return res;
+    if ((res = mp_mod(c, m, c)) != MP_OKAY)
+        return res;
+
+    return MP_OKAY;
+
+} /* end mp_sqrmod() */
+#endif
+
+/* }}} */
+
+/* {{{ s_mp_exptmod(a, b, m, c) */
+
+/*
+  s_mp_exptmod(a, b, m, c)
+
+  Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
+  method with modular reductions at each step. (This is basically the
+  same code as mp_expt(), except for the addition of the reductions)
+
+  The modular reductions are done using Barrett's algorithm (see
+  s_mp_reduce() below for details)
+ */
+
+mp_err
+s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+    mp_int s, x, mu;
+    mp_err res;
+    mp_digit d;
+    unsigned int dig, bit;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
+        return MP_RANGE;
+
+    if ((res = mp_init(&s)) != MP_OKAY)
+        return res;
+    if ((res = mp_init_copy(&x, a)) != MP_OKAY ||
+        (res = mp_mod(&x, m, &x)) != MP_OKAY)
+        goto X;
+    if ((res = mp_init(&mu)) != MP_OKAY)
+        goto MU;
+
+    mp_set(&s, 1);
+
+    /* mu = b^2k / m */
+    if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY)
+        goto CLEANUP;
+    if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY)
+        goto CLEANUP;
+    if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
+        goto CLEANUP;
+
+    /* Loop over digits of b in ascending order, except highest order */
+    for (dig = 0; dig < (USED(b) - 1); dig++) {
+        d = DIGIT(b, dig);
+
+        /* Loop over the bits of the lower-order digits */
+        for (bit = 0; bit < DIGIT_BIT; bit++) {
+            if (d & 1) {
+                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+                    goto CLEANUP;
+                if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
+                    goto CLEANUP;
+            }
+
+            d >>= 1;
+
+            if ((res = s_mp_sqr(&x)) != MP_OKAY)
+                goto CLEANUP;
+            if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
+                goto CLEANUP;
+        }
+    }
+
+    /* Now do the last digit... */
+    d = DIGIT(b, dig);
+
+    while (d) {
+        if (d & 1) {
+            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+                goto CLEANUP;
+            if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
+                goto CLEANUP;
+        }
+
+        d >>= 1;
+
+        if ((res = s_mp_sqr(&x)) != MP_OKAY)
+            goto CLEANUP;
+        if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
+            goto CLEANUP;
+    }
+
+    s_mp_exch(&s, c);
+
+CLEANUP:
+    mp_clear(&mu);
+MU:
+    mp_clear(&x);
+X:
+    mp_clear(&s);
+
+    return res;
+
+} /* end s_mp_exptmod() */
+
+/* }}} */
+
+/* {{{ mp_exptmod_d(a, d, m, c) */
+
+mp_err
+mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
+{
+    mp_int s, x;
+    mp_err res;
+
+    ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_init(&s)) != MP_OKAY)
+        return res;
+    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+        goto X;
+
+    mp_set(&s, 1);
+
+    while (d != 0) {
+        if (d & 1) {
+            if ((res = s_mp_mul(&s, &x)) != MP_OKAY ||
+                (res = mp_mod(&s, m, &s)) != MP_OKAY)
+                goto CLEANUP;
+        }
+
+        d /= 2;
+
+        if ((res = s_mp_sqr(&x)) != MP_OKAY ||
+            (res = mp_mod(&x, m, &x)) != MP_OKAY)
+            goto CLEANUP;
+    }
+
+    s_mp_exch(&s, c);
+
+CLEANUP:
+    mp_clear(&x);
+X:
+    mp_clear(&s);
+
+    return res;
+
+} /* end mp_exptmod_d() */
+
+/* }}} */
+#endif /* if MP_MODARITH */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Comparison functions */
+
+/* {{{ mp_cmp_z(a) */
+
+/*
+  mp_cmp_z(a)
+
+  Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
+ */
+
+int
+mp_cmp_z(const mp_int *a)
+{
+    if (SIGN(a) == NEG)
+        return MP_LT;
+    else if (USED(a) == 1 && DIGIT(a, 0) == 0)
+        return MP_EQ;
+    else
+        return MP_GT;
+
+} /* end mp_cmp_z() */
+
+/* }}} */
+
+/* {{{ mp_cmp_d(a, d) */
+
+/*
+  mp_cmp_d(a, d)
+
+  Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
+ */
+
+int
+mp_cmp_d(const mp_int *a, mp_digit d)
+{
+    ARGCHK(a != NULL, MP_EQ);
+
+    if (SIGN(a) == NEG)
+        return MP_LT;
+
+    return s_mp_cmp_d(a, d);
+
+} /* end mp_cmp_d() */
+
+/* }}} */
+
+/* {{{ mp_cmp(a, b) */
+
+int
+mp_cmp(const mp_int *a, const mp_int *b)
+{
+    ARGCHK(a != NULL && b != NULL, MP_EQ);
+
+    if (SIGN(a) == SIGN(b)) {
+        int mag;
+
+        if ((mag = s_mp_cmp(a, b)) == MP_EQ)
+            return MP_EQ;
+
+        if (SIGN(a) == ZPOS)
+            return mag;
+        else
+            return -mag;
+
+    } else if (SIGN(a) == ZPOS) {
+        return MP_GT;
+    } else {
+        return MP_LT;
+    }
+
+} /* end mp_cmp() */
+
+/* }}} */
+
+/* {{{ mp_cmp_mag(a, b) */
+
+/*
+  mp_cmp_mag(a, b)
+
+  Compares |a| <=> |b|, and returns an appropriate comparison result
+ */
+
+int
+mp_cmp_mag(const mp_int *a, const mp_int *b)
+{
+    ARGCHK(a != NULL && b != NULL, MP_EQ);
+
+    return s_mp_cmp(a, b);
+
+} /* end mp_cmp_mag() */
+
+/* }}} */
+
+/* {{{ mp_isodd(a) */
+
+/*
+  mp_isodd(a)
+
+  Returns a true (non-zero) value if a is odd, false (zero) otherwise.
+ */
+int
+mp_isodd(const mp_int *a)
+{
+    ARGCHK(a != NULL, 0);
+
+    return (int)(DIGIT(a, 0) & 1);
+
+} /* end mp_isodd() */
+
+/* }}} */
+
+/* {{{ mp_iseven(a) */
+
+int
+mp_iseven(const mp_int *a)
+{
+    return !mp_isodd(a);
+
+} /* end mp_iseven() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Number theoretic functions */
+
+#if MP_NUMTH
+/* {{{ mp_gcd(a, b, c) */
+
+/*
+  Like the old mp_gcd() function, except computes the GCD using the
+  binary algorithm due to Josef Stein in 1961 (via Knuth).
+ */
+mp_err
+mp_gcd(mp_int *a, mp_int *b, mp_int *c)
+{
+    mp_err res;
+    mp_int u, v, t;
+    mp_size k = 0;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
+        return MP_RANGE;
+    if (mp_cmp_z(a) == MP_EQ) {
+        return mp_copy(b, c);
+    } else if (mp_cmp_z(b) == MP_EQ) {
+        return mp_copy(a, c);
+    }
+
+    if ((res = mp_init(&t)) != MP_OKAY)
+        return res;
+    if ((res = mp_init_copy(&u, a)) != MP_OKAY)
+        goto U;
+    if ((res = mp_init_copy(&v, b)) != MP_OKAY)
+        goto V;
+
+    SIGN(&u) = ZPOS;
+    SIGN(&v) = ZPOS;
+
+    /* Divide out common factors of 2 until at least 1 of a, b is even */
+    while (mp_iseven(&u) && mp_iseven(&v)) {
+        s_mp_div_2(&u);
+        s_mp_div_2(&v);
+        ++k;
+    }
+
+    /* Initialize t */
+    if (mp_isodd(&u)) {
+        if ((res = mp_copy(&v, &t)) != MP_OKAY)
+            goto CLEANUP;
+
+        /* t = -v */
+        if (SIGN(&v) == ZPOS)
+            SIGN(&t) = NEG;
+        else
+            SIGN(&t) = ZPOS;
+
+    } else {
+        if ((res = mp_copy(&u, &t)) != MP_OKAY)
+            goto CLEANUP;
+    }
+
+    for (;;) {
+        while (mp_iseven(&t)) {
+            s_mp_div_2(&t);
+        }
+
+        if (mp_cmp_z(&t) == MP_GT) {
+            if ((res = mp_copy(&t, &u)) != MP_OKAY)
+                goto CLEANUP;
+
+        } else {
+            if ((res = mp_copy(&t, &v)) != MP_OKAY)
+                goto CLEANUP;
+
+            /* v = -t */
+            if (SIGN(&t) == ZPOS)
+                SIGN(&v) = NEG;
+            else
+                SIGN(&v) = ZPOS;
+        }
+
+        if ((res = mp_sub(&u, &v, &t)) != MP_OKAY)
+            goto CLEANUP;
+
+        if (s_mp_cmp_d(&t, 0) == MP_EQ)
+            break;
+    }
+
+    s_mp_2expt(&v, k);       /* v = 2^k   */
+    res = mp_mul(&u, &v, c); /* c = u * v */
+
+CLEANUP:
+    mp_clear(&v);
+V:
+    mp_clear(&u);
+U:
+    mp_clear(&t);
+
+    return res;
+
+} /* end mp_gcd() */
+
+/* }}} */
+
+/* {{{ mp_lcm(a, b, c) */
+
+/* We compute the least common multiple using the rule:
+
+   ab = [a, b](a, b)
+
+   ... by computing the product, and dividing out the gcd.
+ */
+
+mp_err
+mp_lcm(mp_int *a, mp_int *b, mp_int *c)
+{
+    mp_int gcd, prod;
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    /* Set up temporaries */
+    if ((res = mp_init(&gcd)) != MP_OKAY)
+        return res;
+    if ((res = mp_init(&prod)) != MP_OKAY)
+        goto GCD;
+
+    if ((res = mp_mul(a, b, &prod)) != MP_OKAY)
+        goto CLEANUP;
+    if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
+        goto CLEANUP;
+
+    res = mp_div(&prod, &gcd, c, NULL);
+
+CLEANUP:
+    mp_clear(&prod);
+GCD:
+    mp_clear(&gcd);
+
+    return res;
+
+} /* end mp_lcm() */
+
+/* }}} */
+
+/* {{{ mp_xgcd(a, b, g, x, y) */
+
+/*
+  mp_xgcd(a, b, g, x, y)
+
+  Compute g = (a, b) and values x and y satisfying Bezout's identity
+  (that is, ax + by = g).  This uses the binary extended GCD algorithm
+  based on the Stein algorithm used for mp_gcd()
+  See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
+ */
+
+mp_err
+mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
+{
+    mp_int gx, xc, yc, u, v, A, B, C, D;
+    mp_int *clean[9];
+    mp_err res;
+    int last = -1;
+
+    if (mp_cmp_z(b) == 0)
+        return MP_RANGE;
+
+    /* Initialize all these variables we need */
+    MP_CHECKOK(mp_init(&u));
+    clean[++last] = &u;
+    MP_CHECKOK(mp_init(&v));
+    clean[++last] = &v;
+    MP_CHECKOK(mp_init(&gx));
+    clean[++last] = &gx;
+    MP_CHECKOK(mp_init(&A));
+    clean[++last] = &A;
+    MP_CHECKOK(mp_init(&B));
+    clean[++last] = &B;
+    MP_CHECKOK(mp_init(&C));
+    clean[++last] = &C;
+    MP_CHECKOK(mp_init(&D));
+    clean[++last] = &D;
+    MP_CHECKOK(mp_init_copy(&xc, a));
+    clean[++last] = &xc;
+    mp_abs(&xc, &xc);
+    MP_CHECKOK(mp_init_copy(&yc, b));
+    clean[++last] = &yc;
+    mp_abs(&yc, &yc);
+
+    mp_set(&gx, 1);
+
+    /* Divide by two until at least one of them is odd */
+    while (mp_iseven(&xc) && mp_iseven(&yc)) {
+        mp_size nx = mp_trailing_zeros(&xc);
+        mp_size ny = mp_trailing_zeros(&yc);
+        mp_size n = MP_MIN(nx, ny);
+        s_mp_div_2d(&xc, n);
+        s_mp_div_2d(&yc, n);
+        MP_CHECKOK(s_mp_mul_2d(&gx, n));
+    }
+
+    MP_CHECKOK(mp_copy(&xc, &u));
+    MP_CHECKOK(mp_copy(&yc, &v));
+    mp_set(&A, 1);
+    mp_set(&D, 1);
+
+    /* Loop through binary GCD algorithm */
+    do {
+        while (mp_iseven(&u)) {
+            s_mp_div_2(&u);
+
+            if (mp_iseven(&A) && mp_iseven(&B)) {
+                s_mp_div_2(&A);
+                s_mp_div_2(&B);
+            } else {
+                MP_CHECKOK(mp_add(&A, &yc, &A));
+                s_mp_div_2(&A);
+                MP_CHECKOK(mp_sub(&B, &xc, &B));
+                s_mp_div_2(&B);
+            }
+        }
+
+        while (mp_iseven(&v)) {
+            s_mp_div_2(&v);
+
+            if (mp_iseven(&C) && mp_iseven(&D)) {
+                s_mp_div_2(&C);
+                s_mp_div_2(&D);
+            } else {
+                MP_CHECKOK(mp_add(&C, &yc, &C));
+                s_mp_div_2(&C);
+                MP_CHECKOK(mp_sub(&D, &xc, &D));
+                s_mp_div_2(&D);
+            }
+        }
+
+        if (mp_cmp(&u, &v) >= 0) {
+            MP_CHECKOK(mp_sub(&u, &v, &u));
+            MP_CHECKOK(mp_sub(&A, &C, &A));
+            MP_CHECKOK(mp_sub(&B, &D, &B));
+        } else {
+            MP_CHECKOK(mp_sub(&v, &u, &v));
+            MP_CHECKOK(mp_sub(&C, &A, &C));
+            MP_CHECKOK(mp_sub(&D, &B, &D));
+        }
+    } while (mp_cmp_z(&u) != 0);
+
+    /* copy results to output */
+    if (x)
+        MP_CHECKOK(mp_copy(&C, x));
+
+    if (y)
+        MP_CHECKOK(mp_copy(&D, y));
+
+    if (g)
+        MP_CHECKOK(mp_mul(&gx, &v, g));
+
+CLEANUP:
+    while (last >= 0)
+        mp_clear(clean[last--]);
+
+    return res;
+
+} /* end mp_xgcd() */
+
+/* }}} */
+
+mp_size
+mp_trailing_zeros(const mp_int *mp)
+{
+    mp_digit d;
+    mp_size n = 0;
+    unsigned int ix;
+
+    if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
+        return n;
+
+    for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix)
+        n += MP_DIGIT_BIT;
+    if (!d)
+        return 0; /* shouldn't happen, but ... */
+#if !defined(MP_USE_UINT_DIGIT)
+    if (!(d & 0xffffffffU)) {
+        d >>= 32;
+        n += 32;
+    }
+#endif
+    if (!(d & 0xffffU)) {
+        d >>= 16;
+        n += 16;
+    }
+    if (!(d & 0xffU)) {
+        d >>= 8;
+        n += 8;
+    }
+    if (!(d & 0xfU)) {
+        d >>= 4;
+        n += 4;
+    }
+    if (!(d & 0x3U)) {
+        d >>= 2;
+        n += 2;
+    }
+    if (!(d & 0x1U)) {
+        d >>= 1;
+        n += 1;
+    }
+#if MP_ARGCHK == 2
+    assert(0 != (d & 1));
+#endif
+    return n;
+}
+
+/* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
+** Returns k (positive) or error (negative).
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_err
+s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
+{
+    mp_err res;
+    mp_err k = 0;
+    mp_int d, f, g;
+
+    ARGCHK(a && p && c, MP_BADARG);
+
+    MP_DIGITS(&d) = 0;
+    MP_DIGITS(&f) = 0;
+    MP_DIGITS(&g) = 0;
+    MP_CHECKOK(mp_init(&d));
+    MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */
+    MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */
+
+    mp_set(c, 1);
+    mp_zero(&d);
+
+    if (mp_cmp_z(&f) == 0) {
+        res = MP_UNDEF;
+    } else
+        for (;;) {
+            int diff_sign;
+            while (mp_iseven(&f)) {
+                mp_size n = mp_trailing_zeros(&f);
+                if (!n) {
+                    res = MP_UNDEF;
+                    goto CLEANUP;
+                }
+                s_mp_div_2d(&f, n);
+                MP_CHECKOK(s_mp_mul_2d(&d, n));
+                k += n;
+            }
+            if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */
+                res = k;
+                break;
+            }
+            diff_sign = mp_cmp(&f, &g);
+            if (diff_sign < 0) { /* f < g */
+                s_mp_exch(&f, &g);
+                s_mp_exch(c, &d);
+            } else if (diff_sign == 0) { /* f == g */
+                res = MP_UNDEF;          /* a and p are not relatively prime */
+                break;
+            }
+            if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) {
+                MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */
+                MP_CHECKOK(mp_sub(c, &d, c));   /* c = c - d */
+            } else {
+                MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */
+                MP_CHECKOK(mp_add(c, &d, c));   /* c = c + d */
+            }
+        }
+    if (res >= 0) {
+        while (MP_SIGN(c) != MP_ZPOS) {
+            MP_CHECKOK(mp_add(c, p, c));
+        }
+        res = k;
+    }
+
+CLEANUP:
+    mp_clear(&d);
+    mp_clear(&f);
+    mp_clear(&g);
+    return res;
+}
+
+/* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_digit
+s_mp_invmod_radix(mp_digit P)
+{
+    mp_digit T = P;
+    T *= 2 - (P * T);
+    T *= 2 - (P * T);
+    T *= 2 - (P * T);
+    T *= 2 - (P * T);
+#if !defined(MP_USE_UINT_DIGIT)
+    T *= 2 - (P * T);
+    T *= 2 - (P * T);
+#endif
+    return T;
+}
+
+/* Given c, k, and prime p, where a*c == 2**k (mod p),
+** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_err
+s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
+{
+    int k_orig = k;
+    mp_digit r;
+    mp_size ix;
+    mp_err res;
+
+    if (mp_cmp_z(c) < 0) {           /* c < 0 */
+        MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */
+    } else {
+        MP_CHECKOK(mp_copy(c, x)); /* x = c */
+    }
+
+    /* make sure x is large enough */
+    ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
+    ix = MP_MAX(ix, MP_USED(x));
+    MP_CHECKOK(s_mp_pad(x, ix));
+
+    r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0));
+
+    for (ix = 0; k > 0; ix++) {
+        int j = MP_MIN(k, MP_DIGIT_BIT);
+        mp_digit v = r * MP_DIGIT(x, ix);
+        if (j < MP_DIGIT_BIT) {
+            v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
+        }
+        s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
+        k -= j;
+    }
+    s_mp_clamp(x);
+    s_mp_div_2d(x, k_orig);
+    res = MP_OKAY;
+
+CLEANUP:
+    return res;
+}
+
+/* compute mod inverse using Schroeppel's method, only if m is odd */
+mp_err
+s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
+{
+    int k;
+    mp_err res;
+    mp_int x;
+
+    ARGCHK(a && m && c, MP_BADARG);
+
+    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+        return MP_RANGE;
+    if (mp_iseven(m))
+        return MP_UNDEF;
+
+    MP_DIGITS(&x) = 0;
+
+    if (a == c) {
+        if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+            return res;
+        if (a == m)
+            m = &x;
+        a = &x;
+    } else if (m == c) {
+        if ((res = mp_init_copy(&x, m)) != MP_OKAY)
+            return res;
+        m = &x;
+    } else {
+        MP_DIGITS(&x) = 0;
+    }
+
+    MP_CHECKOK(s_mp_almost_inverse(a, m, c));
+    k = res;
+    MP_CHECKOK(s_mp_fixup_reciprocal(c, m, k, c));
+CLEANUP:
+    mp_clear(&x);
+    return res;
+}
+
+/* Known good algorithm for computing modular inverse.  But slow. */
+mp_err
+mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
+{
+    mp_int g, x;
+    mp_err res;
+
+    ARGCHK(a && m && c, MP_BADARG);
+
+    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+        return MP_RANGE;
+
+    MP_DIGITS(&g) = 0;
+    MP_DIGITS(&x) = 0;
+    MP_CHECKOK(mp_init(&x));
+    MP_CHECKOK(mp_init(&g));
+
+    MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL));
+
+    if (mp_cmp_d(&g, 1) != MP_EQ) {
+        res = MP_UNDEF;
+        goto CLEANUP;
+    }
+
+    res = mp_mod(&x, m, c);
+    SIGN(c) = SIGN(a);
+
+CLEANUP:
+    mp_clear(&x);
+    mp_clear(&g);
+
+    return res;
+}
+
+/* modular inverse where modulus is 2**k. */
+/* c = a**-1 mod 2**k */
+mp_err
+s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
+{
+    mp_err res;
+    mp_size ix = k + 4;
+    mp_int t0, t1, val, tmp, two2k;
+
+    static const mp_digit d2 = 2;
+    static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
+
+    if (mp_iseven(a))
+        return MP_UNDEF;
+    if (k <= MP_DIGIT_BIT) {
+        mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0));
+        if (k < MP_DIGIT_BIT)
+            i &= ((mp_digit)1 << k) - (mp_digit)1;
+        mp_set(c, i);
+        return MP_OKAY;
+    }
+    MP_DIGITS(&t0) = 0;
+    MP_DIGITS(&t1) = 0;
+    MP_DIGITS(&val) = 0;
+    MP_DIGITS(&tmp) = 0;
+    MP_DIGITS(&two2k) = 0;
+    MP_CHECKOK(mp_init_copy(&val, a));
+    s_mp_mod_2d(&val, k);
+    MP_CHECKOK(mp_init_copy(&t0, &val));
+    MP_CHECKOK(mp_init_copy(&t1, &t0));
+    MP_CHECKOK(mp_init(&tmp));
+    MP_CHECKOK(mp_init(&two2k));
+    MP_CHECKOK(s_mp_2expt(&two2k, k));
+    do {
+        MP_CHECKOK(mp_mul(&val, &t1, &tmp));
+        MP_CHECKOK(mp_sub(&two, &tmp, &tmp));
+        MP_CHECKOK(mp_mul(&t1, &tmp, &t1));
+        s_mp_mod_2d(&t1, k);
+        while (MP_SIGN(&t1) != MP_ZPOS) {
+            MP_CHECKOK(mp_add(&t1, &two2k, &t1));
+        }
+        if (mp_cmp(&t1, &t0) == MP_EQ)
+            break;
+        MP_CHECKOK(mp_copy(&t1, &t0));
+    } while (--ix > 0);
+    if (!ix) {
+        res = MP_UNDEF;
+    } else {
+        mp_exch(c, &t1);
+    }
+
+CLEANUP:
+    mp_clear(&t0);
+    mp_clear(&t1);
+    mp_clear(&val);
+    mp_clear(&tmp);
+    mp_clear(&two2k);
+    return res;
+}
+
+mp_err
+s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
+{
+    mp_err res;
+    mp_size k;
+    mp_int oddFactor, evenFactor; /* factors of the modulus */
+    mp_int oddPart, evenPart;     /* parts to combine via CRT. */
+    mp_int C2, tmp1, tmp2;
+
+    /*static const mp_digit d1 = 1; */
+    /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
+
+    if ((res = s_mp_ispow2(m)) >= 0) {
+        k = res;
+        return s_mp_invmod_2d(a, k, c);
+    }
+    MP_DIGITS(&oddFactor) = 0;
+    MP_DIGITS(&evenFactor) = 0;
+    MP_DIGITS(&oddPart) = 0;
+    MP_DIGITS(&evenPart) = 0;
+    MP_DIGITS(&C2) = 0;
+    MP_DIGITS(&tmp1) = 0;
+    MP_DIGITS(&tmp2) = 0;
+
+    MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */
+    MP_CHECKOK(mp_init(&evenFactor));
+    MP_CHECKOK(mp_init(&oddPart));
+    MP_CHECKOK(mp_init(&evenPart));
+    MP_CHECKOK(mp_init(&C2));
+    MP_CHECKOK(mp_init(&tmp1));
+    MP_CHECKOK(mp_init(&tmp2));
+
+    k = mp_trailing_zeros(m);
+    s_mp_div_2d(&oddFactor, k);
+    MP_CHECKOK(s_mp_2expt(&evenFactor, k));
+
+    /* compute a**-1 mod oddFactor. */
+    MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart));
+    /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
+    MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart));
+
+    /* Use Chinese Remainer theorem to compute a**-1 mod m. */
+    /* let m1 = oddFactor,  v1 = oddPart,
+     * let m2 = evenFactor, v2 = evenPart.
+     */
+
+    /* Compute C2 = m1**-1 mod m2. */
+    MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2));
+
+    /* compute u = (v2 - v1)*C2 mod m2 */
+    MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1));
+    MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2));
+    s_mp_mod_2d(&tmp2, k);
+    while (MP_SIGN(&tmp2) != MP_ZPOS) {
+        MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2));
+    }
+
+    /* compute answer = v1 + u*m1 */
+    MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c));
+    MP_CHECKOK(mp_add(&oddPart, c, c));
+    /* not sure this is necessary, but it's low cost if not. */
+    MP_CHECKOK(mp_mod(c, m, c));
+
+CLEANUP:
+    mp_clear(&oddFactor);
+    mp_clear(&evenFactor);
+    mp_clear(&oddPart);
+    mp_clear(&evenPart);
+    mp_clear(&C2);
+    mp_clear(&tmp1);
+    mp_clear(&tmp2);
+    return res;
+}
+
+/* {{{ mp_invmod(a, m, c) */
+
+/*
+  mp_invmod(a, m, c)
+
+  Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
+  This is equivalent to the question of whether (a, m) = 1.  If not,
+  MP_UNDEF is returned, and there is no inverse.
+ */
+
+mp_err
+mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+
+    ARGCHK(a && m && c, MP_BADARG);
+
+    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+        return MP_RANGE;
+
+    if (mp_isodd(m)) {
+        return s_mp_invmod_odd_m(a, m, c);
+    }
+    if (mp_iseven(a))
+        return MP_UNDEF; /* not invertable */
+
+    return s_mp_invmod_even_m(a, m, c);
+
+} /* end mp_invmod() */
+
+/* }}} */
+#endif /* if MP_NUMTH */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ mp_print(mp, ofp) */
+
+#if MP_IOFUNC
+/*
+  mp_print(mp, ofp)
+
+  Print a textual representation of the given mp_int on the output
+  stream 'ofp'.  Output is generated using the internal radix.
+ */
+
+void
+mp_print(mp_int *mp, FILE *ofp)
+{
+    int ix;
+
+    if (mp == NULL || ofp == NULL)
+        return;
+
+    fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
+
+    for (ix = USED(mp) - 1; ix >= 0; ix--) {
+        fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
+    }
+
+} /* end mp_print() */
+
+#endif /* if MP_IOFUNC */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ More I/O Functions */
+
+/* {{{ mp_read_raw(mp, str, len) */
+
+/*
+   mp_read_raw(mp, str, len)
+
+   Read in a raw value (base 256) into the given mp_int
+ */
+
+mp_err
+mp_read_raw(mp_int *mp, char *str, int len)
+{
+    int ix;
+    mp_err res;
+    unsigned char *ustr = (unsigned char *)str;
+
+    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+
+    mp_zero(mp);
+
+    /* Get sign from first byte */
+    if (ustr[0])
+        SIGN(mp) = NEG;
+    else
+        SIGN(mp) = ZPOS;
+
+    /* Read the rest of the digits */
+    for (ix = 1; ix < len; ix++) {
+        if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
+            return res;
+        if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
+            return res;
+    }
+
+    return MP_OKAY;
+
+} /* end mp_read_raw() */
+
+/* }}} */
+
+/* {{{ mp_raw_size(mp) */
+
+int
+mp_raw_size(mp_int *mp)
+{
+    ARGCHK(mp != NULL, 0);
+
+    return (USED(mp) * sizeof(mp_digit)) + 1;
+
+} /* end mp_raw_size() */
+
+/* }}} */
+
+/* {{{ mp_toraw(mp, str) */
+
+mp_err
+mp_toraw(mp_int *mp, char *str)
+{
+    int ix, jx, pos = 1;
+
+    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+
+    str[0] = (char)SIGN(mp);
+
+    /* Iterate over each digit... */
+    for (ix = USED(mp) - 1; ix >= 0; ix--) {
+        mp_digit d = DIGIT(mp, ix);
+
+        /* Unpack digit bytes, high order first */
+        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+            str[pos++] = (char)(d >> (jx * CHAR_BIT));
+        }
+    }
+
+    return MP_OKAY;
+
+} /* end mp_toraw() */
+
+/* }}} */
+
+/* {{{ mp_read_radix(mp, str, radix) */
+
+/*
+  mp_read_radix(mp, str, radix)
+
+  Read an integer from the given string, and set mp to the resulting
+  value.  The input is presumed to be in base 10.  Leading non-digit
+  characters are ignored, and the function reads until a non-digit
+  character or the end of the string.
+ */
+
+mp_err
+mp_read_radix(mp_int *mp, const char *str, int radix)
+{
+    int ix = 0, val = 0;
+    mp_err res;
+    mp_sign sig = ZPOS;
+
+    ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
+           MP_BADARG);
+
+    mp_zero(mp);
+
+    /* Skip leading non-digit characters until a digit or '-' or '+' */
+    while (str[ix] &&
+           (s_mp_tovalue(str[ix], radix) < 0) &&
+           str[ix] != '-' &&
+           str[ix] != '+') {
+        ++ix;
+    }
+
+    if (str[ix] == '-') {
+        sig = NEG;
+        ++ix;
+    } else if (str[ix] == '+') {
+        sig = ZPOS; /* this is the default anyway... */
+        ++ix;
+    }
+
+    while ((val = s_mp_tovalue(str[ix], radix)) >= 0) {
+        if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
+            return res;
+        if ((res = s_mp_add_d(mp, val)) != MP_OKAY)
+            return res;
+        ++ix;
+    }
+
+    if (s_mp_cmp_d(mp, 0) == MP_EQ)
+        SIGN(mp) = ZPOS;
+    else
+        SIGN(mp) = sig;
+
+    return MP_OKAY;
+
+} /* end mp_read_radix() */
+
+mp_err
+mp_read_variable_radix(mp_int *a, const char *str, int default_radix)
+{
+    int radix = default_radix;
+    int cx;
+    mp_sign sig = ZPOS;
+    mp_err res;
+
+    /* Skip leading non-digit characters until a digit or '-' or '+' */
+    while ((cx = *str) != 0 &&
+           (s_mp_tovalue(cx, radix) < 0) &&
+           cx != '-' &&
+           cx != '+') {
+        ++str;
+    }
+
+    if (cx == '-') {
+        sig = NEG;
+        ++str;
+    } else if (cx == '+') {
+        sig = ZPOS; /* this is the default anyway... */
+        ++str;
+    }
+
+    if (str[0] == '0') {
+        if ((str[1] | 0x20) == 'x') {
+            radix = 16;
+            str += 2;
+        } else {
+            radix = 8;
+            str++;
+        }
+    }
+    res = mp_read_radix(a, str, radix);
+    if (res == MP_OKAY) {
+        MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
+    }
+    return res;
+}
+
+/* }}} */
+
+/* {{{ mp_radix_size(mp, radix) */
+
+int
+mp_radix_size(mp_int *mp, int radix)
+{
+    int bits;
+
+    if (!mp || radix < 2 || radix > MAX_RADIX)
+        return 0;
+
+    bits = USED(mp) * DIGIT_BIT - 1;
+
+    return s_mp_outlen(bits, radix);
+
+} /* end mp_radix_size() */
+
+/* }}} */
+
+/* {{{ mp_toradix(mp, str, radix) */
+
+mp_err
+mp_toradix(mp_int *mp, char *str, int radix)
+{
+    int ix, pos = 0;
+
+    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+    ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
+
+    if (mp_cmp_z(mp) == MP_EQ) {
+        str[0] = '0';
+        str[1] = '\0';
+    } else {
+        mp_err res;
+        mp_int tmp;
+        mp_sign sgn;
+        mp_digit rem, rdx = (mp_digit)radix;
+        char ch;
+
+        if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
+            return res;
+
+        /* Save sign for later, and take absolute value */
+        sgn = SIGN(&tmp);
+        SIGN(&tmp) = ZPOS;
+
+        /* Generate output digits in reverse order      */
+        while (mp_cmp_z(&tmp) != 0) {
+            if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
+                mp_clear(&tmp);
+                return res;
+            }
+
+            /* Generate digits, use capital letters */
+            ch = s_mp_todigit(rem, radix, 0);
+
+            str[pos++] = ch;
+        }
+
+        /* Add - sign if original value was negative */
+        if (sgn == NEG)
+            str[pos++] = '-';
+
+        /* Add trailing NUL to end the string        */
+        str[pos--] = '\0';
+
+        /* Reverse the digits and sign indicator     */
+        ix = 0;
+        while (ix < pos) {
+            char tmp = str[ix];
+
+            str[ix] = str[pos];
+            str[pos] = tmp;
+            ++ix;
+            --pos;
+        }
+
+        mp_clear(&tmp);
+    }
+
+    return MP_OKAY;
+
+} /* end mp_toradix() */
+
+/* }}} */
+
+/* {{{ mp_tovalue(ch, r) */
+
+int
+mp_tovalue(char ch, int r)
+{
+    return s_mp_tovalue(ch, r);
+
+} /* end mp_tovalue() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ mp_strerror(ec) */
+
+/*
+  mp_strerror(ec)
+
+  Return a string describing the meaning of error code 'ec'.  The
+  string returned is allocated in static memory, so the caller should
+  not attempt to modify or free the memory associated with this
+  string.
+ */
+const char *
+mp_strerror(mp_err ec)
+{
+    int aec = (ec < 0) ? -ec : ec;
+
+    /* Code values are negative, so the senses of these comparisons
+     are accurate */
+    if (ec < MP_LAST_CODE || ec > MP_OKAY) {
+        return mp_err_string[0]; /* unknown error code */
+    } else {
+        return mp_err_string[aec + 1];
+    }
+
+} /* end mp_strerror() */
+
+/* }}} */
+
+/*========================================================================*/
+/*------------------------------------------------------------------------*/
+/* Static function definitions (internal use only)                        */
+
+/* {{{ Memory management */
+
+/* {{{ s_mp_grow(mp, min) */
+
+/* Make sure there are at least 'min' digits allocated to mp              */
+mp_err
+s_mp_grow(mp_int *mp, mp_size min)
+{
+    if (min > ALLOC(mp)) {
+        mp_digit *tmp;
+
+        /* Set min to next nearest default precision block size */
+        min = MP_ROUNDUP(min, s_mp_defprec);
+
+        if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
+            return MP_MEM;
+
+        s_mp_copy(DIGITS(mp), tmp, USED(mp));
+
+        s_mp_setz(DIGITS(mp), ALLOC(mp));
+        s_mp_free(DIGITS(mp));
+        DIGITS(mp) = tmp;
+        ALLOC(mp) = min;
+    }
+
+    return MP_OKAY;
+
+} /* end s_mp_grow() */
+
+/* }}} */
+
+/* {{{ s_mp_pad(mp, min) */
+
+/* Make sure the used size of mp is at least 'min', growing if needed     */
+mp_err
+s_mp_pad(mp_int *mp, mp_size min)
+{
+    if (min > USED(mp)) {
+        mp_err res;
+
+        /* Make sure there is room to increase precision  */
+        if (min > ALLOC(mp)) {
+            if ((res = s_mp_grow(mp, min)) != MP_OKAY)
+                return res;
+        } else {
+            s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
+        }
+
+        /* Increase precision; should already be 0-filled */
+        USED(mp) = min;
+    }
+
+    return MP_OKAY;
+
+} /* end s_mp_pad() */
+
+/* }}} */
+
+/* {{{ s_mp_setz(dp, count) */
+
+/* Set 'count' digits pointed to by dp to be zeroes                       */
+void
+s_mp_setz(mp_digit *dp, mp_size count)
+{
+#if MP_MEMSET == 0
+    int ix;
+
+    for (ix = 0; ix < count; ix++)
+        dp[ix] = 0;
+#else
+    memset(dp, 0, count * sizeof(mp_digit));
+#endif
+
+} /* end s_mp_setz() */
+
+/* }}} */
+
+/* {{{ s_mp_copy(sp, dp, count) */
+
+/* Copy 'count' digits from sp to dp                                      */
+void
+s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
+{
+#if MP_MEMCPY == 0
+    int ix;
+
+    for (ix = 0; ix < count; ix++)
+        dp[ix] = sp[ix];
+#else
+    memcpy(dp, sp, count * sizeof(mp_digit));
+#endif
+} /* end s_mp_copy() */
+
+/* }}} */
+
+/* {{{ s_mp_alloc(nb, ni) */
+
+/* Allocate ni records of nb bytes each, and return a pointer to that     */
+void *
+s_mp_alloc(size_t nb, size_t ni)
+{
+    return calloc(nb, ni);
+
+} /* end s_mp_alloc() */
+
+/* }}} */
+
+/* {{{ s_mp_free(ptr) */
+
+/* Free the memory pointed to by ptr                                      */
+void
+s_mp_free(void *ptr)
+{
+    if (ptr) {
+        free(ptr);
+    }
+} /* end s_mp_free() */
+
+/* }}} */
+
+/* {{{ s_mp_clamp(mp) */
+
+/* Remove leading zeroes from the given value                             */
+void
+s_mp_clamp(mp_int *mp)
+{
+    mp_size used = MP_USED(mp);
+    while (used > 1 && DIGIT(mp, used - 1) == 0)
+        --used;
+    MP_USED(mp) = used;
+} /* end s_mp_clamp() */
+
+/* }}} */
+
+/* {{{ s_mp_exch(a, b) */
+
+/* Exchange the data for a and b; (b, a) = (a, b)                         */
+void
+s_mp_exch(mp_int *a, mp_int *b)
+{
+    mp_int tmp;
+
+    tmp = *a;
+    *a = *b;
+    *b = tmp;
+
+} /* end s_mp_exch() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Arithmetic helpers */
+
+/* {{{ s_mp_lshd(mp, p) */
+
+/*
+   Shift mp leftward by p digits, growing if needed, and zero-filling
+   the in-shifted digits at the right end.  This is a convenient
+   alternative to multiplication by powers of the radix
+ */
+
+mp_err
+s_mp_lshd(mp_int *mp, mp_size p)
+{
+    mp_err res;
+    unsigned int ix;
+
+    if (p == 0)
+        return MP_OKAY;
+
+    if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
+        return MP_OKAY;
+
+    if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
+        return res;
+
+    /* Shift all the significant figures over as needed */
+    for (ix = USED(mp) - p; ix-- > 0;) {
+        DIGIT(mp, ix + p) = DIGIT(mp, ix);
+    }
+
+    /* Fill the bottom digits with zeroes */
+    for (ix = 0; (mp_size)ix < p; ix++)
+        DIGIT(mp, ix) = 0;
+
+    return MP_OKAY;
+
+} /* end s_mp_lshd() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_2d(mp, d) */
+
+/*
+  Multiply the integer by 2^d, where d is a number of bits.  This
+  amounts to a bitwise shift of the value.
+ */
+mp_err
+s_mp_mul_2d(mp_int *mp, mp_digit d)
+{
+    mp_err res;
+    mp_digit dshift, bshift;
+    mp_digit mask;
+
+    ARGCHK(mp != NULL, MP_BADARG);
+
+    dshift = d / MP_DIGIT_BIT;
+    bshift = d % MP_DIGIT_BIT;
+    /* bits to be shifted out of the top word */
+    if (bshift) {
+        mask = (mp_digit)~0 << (MP_DIGIT_BIT - bshift);
+        mask &= MP_DIGIT(mp, MP_USED(mp) - 1);
+    } else {
+        mask = 0;
+    }
+
+    if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0))))
+        return res;
+
+    if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
+        return res;
+
+    if (bshift) {
+        mp_digit *pa = MP_DIGITS(mp);
+        mp_digit *alim = pa + MP_USED(mp);
+        mp_digit prev = 0;
+
+        for (pa += dshift; pa < alim;) {
+            mp_digit x = *pa;
+            *pa++ = (x << bshift) | prev;
+            prev = x >> (DIGIT_BIT - bshift);
+        }
+    }
+
+    s_mp_clamp(mp);
+    return MP_OKAY;
+} /* end s_mp_mul_2d() */
+
+/* {{{ s_mp_rshd(mp, p) */
+
+/*
+   Shift mp rightward by p digits.  Maintains the invariant that
+   digits above the precision are all zero.  Digits shifted off the
+   end are lost.  Cannot fail.
+ */
+
+void
+s_mp_rshd(mp_int *mp, mp_size p)
+{
+    mp_size ix;
+    mp_digit *src, *dst;
+
+    if (p == 0)
+        return;
+
+    /* Shortcut when all digits are to be shifted off */
+    if (p >= USED(mp)) {
+        s_mp_setz(DIGITS(mp), ALLOC(mp));
+        USED(mp) = 1;
+        SIGN(mp) = ZPOS;
+        return;
+    }
+
+    /* Shift all the significant figures over as needed */
+    dst = MP_DIGITS(mp);
+    src = dst + p;
+    for (ix = USED(mp) - p; ix > 0; ix--)
+        *dst++ = *src++;
+
+    MP_USED(mp) -= p;
+    /* Fill the top digits with zeroes */
+    while (p-- > 0)
+        *dst++ = 0;
+
+} /* end s_mp_rshd() */
+
+/* }}} */
+
+/* {{{ s_mp_div_2(mp) */
+
+/* Divide by two -- take advantage of radix properties to do it fast      */
+void
+s_mp_div_2(mp_int *mp)
+{
+    s_mp_div_2d(mp, 1);
+
+} /* end s_mp_div_2() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_2(mp) */
+
+mp_err
+s_mp_mul_2(mp_int *mp)
+{
+    mp_digit *pd;
+    unsigned int ix, used;
+    mp_digit kin = 0;
+
+    /* Shift digits leftward by 1 bit */
+    used = MP_USED(mp);
+    pd = MP_DIGITS(mp);
+    for (ix = 0; ix < used; ix++) {
+        mp_digit d = *pd;
+        *pd++ = (d << 1) | kin;
+        kin = (d >> (DIGIT_BIT - 1));
+    }
+
+    /* Deal with rollover from last digit */
+    if (kin) {
+        if (ix >= ALLOC(mp)) {
+            mp_err res;
+            if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
+                return res;
+        }
+
+        DIGIT(mp, ix) = kin;
+        USED(mp) += 1;
+    }
+
+    return MP_OKAY;
+
+} /* end s_mp_mul_2() */
+
+/* }}} */
+
+/* {{{ s_mp_mod_2d(mp, d) */
+
+/*
+  Remainder the integer by 2^d, where d is a number of bits.  This
+  amounts to a bitwise AND of the value, and does not require the full
+  division code
+ */
+void
+s_mp_mod_2d(mp_int *mp, mp_digit d)
+{
+    mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
+    mp_size ix;
+    mp_digit dmask;
+
+    if (ndig >= USED(mp))
+        return;
+
+    /* Flush all the bits above 2^d in its digit */
+    dmask = ((mp_digit)1 << nbit) - 1;
+    DIGIT(mp, ndig) &= dmask;
+
+    /* Flush all digits above the one with 2^d in it */
+    for (ix = ndig + 1; ix < USED(mp); ix++)
+        DIGIT(mp, ix) = 0;
+
+    s_mp_clamp(mp);
+
+} /* end s_mp_mod_2d() */
+
+/* }}} */
+
+/* {{{ s_mp_div_2d(mp, d) */
+
+/*
+  Divide the integer by 2^d, where d is a number of bits.  This
+  amounts to a bitwise shift of the value, and does not require the
+  full division code (used in Barrett reduction, see below)
+ */
+void
+s_mp_div_2d(mp_int *mp, mp_digit d)
+{
+    int ix;
+    mp_digit save, next, mask;
+
+    s_mp_rshd(mp, d / DIGIT_BIT);
+    d %= DIGIT_BIT;
+    if (d) {
+        mask = ((mp_digit)1 << d) - 1;
+        save = 0;
+        for (ix = USED(mp) - 1; ix >= 0; ix--) {
+            next = DIGIT(mp, ix) & mask;
+            DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
+            save = next;
+        }
+    }
+    s_mp_clamp(mp);
+
+} /* end s_mp_div_2d() */
+
+/* }}} */
+
+/* {{{ s_mp_norm(a, b, *d) */
+
+/*
+  s_mp_norm(a, b, *d)
+
+  Normalize a and b for division, where b is the divisor.  In order
+  that we might make good guesses for quotient digits, we want the
+  leading digit of b to be at least half the radix, which we
+  accomplish by multiplying a and b by a power of 2.  The exponent
+  (shift count) is placed in *pd, so that the remainder can be shifted
+  back at the end of the division process.
+ */
+
+mp_err
+s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
+{
+    mp_digit d;
+    mp_digit mask;
+    mp_digit b_msd;
+    mp_err res = MP_OKAY;
+
+    d = 0;
+    mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */
+    b_msd = DIGIT(b, USED(b) - 1);
+    while (!(b_msd & mask)) {
+        b_msd <<= 1;
+        ++d;
+    }
+
+    if (d) {
+        MP_CHECKOK(s_mp_mul_2d(a, d));
+        MP_CHECKOK(s_mp_mul_2d(b, d));
+    }
+
+    *pd = d;
+CLEANUP:
+    return res;
+
+} /* end s_mp_norm() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive digit arithmetic */
+
+/* {{{ s_mp_add_d(mp, d) */
+
+/* Add d to |mp| in place                                                 */
+mp_err s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    mp_word w, k = 0;
+    mp_size ix = 1;
+
+    w = (mp_word)DIGIT(mp, 0) + d;
+    DIGIT(mp, 0) = ACCUM(w);
+    k = CARRYOUT(w);
+
+    while (ix < USED(mp) && k) {
+        w = (mp_word)DIGIT(mp, ix) + k;
+        DIGIT(mp, ix) = ACCUM(w);
+        k = CARRYOUT(w);
+        ++ix;
+    }
+
+    if (k != 0) {
+        mp_err res;
+
+        if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
+            return res;
+
+        DIGIT(mp, ix) = (mp_digit)k;
+    }
+
+    return MP_OKAY;
+#else
+    mp_digit *pmp = MP_DIGITS(mp);
+    mp_digit sum, mp_i, carry = 0;
+    mp_err res = MP_OKAY;
+    int used = (int)MP_USED(mp);
+
+    mp_i = *pmp;
+    *pmp++ = sum = d + mp_i;
+    carry = (sum < d);
+    while (carry && --used > 0) {
+        mp_i = *pmp;
+        *pmp++ = sum = carry + mp_i;
+        carry = !sum;
+    }
+    if (carry && !used) {
+        /* mp is growing */
+        used = MP_USED(mp);
+        MP_CHECKOK(s_mp_pad(mp, used + 1));
+        MP_DIGIT(mp, used) = carry;
+    }
+CLEANUP:
+    return res;
+#endif
+} /* end s_mp_add_d() */
+
+/* }}} */
+
+/* {{{ s_mp_sub_d(mp, d) */
+
+/* Subtract d from |mp| in place, assumes |mp| > d                        */
+mp_err s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+    mp_word w, b = 0;
+    mp_size ix = 1;
+
+    /* Compute initial subtraction    */
+    w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
+    b = CARRYOUT(w) ? 0 : 1;
+    DIGIT(mp, 0) = ACCUM(w);
+
+    /* Propagate borrows leftward     */
+    while (b && ix < USED(mp)) {
+        w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
+        b = CARRYOUT(w) ? 0 : 1;
+        DIGIT(mp, ix) = ACCUM(w);
+        ++ix;
+    }
+
+    /* Remove leading zeroes          */
+    s_mp_clamp(mp);
+
+    /* If we have a borrow out, it's a violation of the input invariant */
+    if (b)
+        return MP_RANGE;
+    else
+        return MP_OKAY;
+#else
+    mp_digit *pmp = MP_DIGITS(mp);
+    mp_digit mp_i, diff, borrow;
+    mp_size used = MP_USED(mp);
+
+    mp_i = *pmp;
+    *pmp++ = diff = mp_i - d;
+    borrow = (diff > mp_i);
+    while (borrow && --used) {
+        mp_i = *pmp;
+        *pmp++ = diff = mp_i - borrow;
+        borrow = (diff > mp_i);
+    }
+    s_mp_clamp(mp);
+    return (borrow && !used) ? MP_RANGE : MP_OKAY;
+#endif
+} /* end s_mp_sub_d() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_d(a, d) */
+
+/* Compute a = a * d, single digit multiplication                         */
+mp_err
+s_mp_mul_d(mp_int *a, mp_digit d)
+{
+    mp_err res;
+    mp_size used;
+    int pow;
+
+    if (!d) {
+        mp_zero(a);
+        return MP_OKAY;
+    }
+    if (d == 1)
+        return MP_OKAY;
+    if (0 <= (pow = s_mp_ispow2d(d))) {
+        return s_mp_mul_2d(a, (mp_digit)pow);
+    }
+
+    used = MP_USED(a);
+    MP_CHECKOK(s_mp_pad(a, used + 1));
+
+    s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
+
+    s_mp_clamp(a);
+
+CLEANUP:
+    return res;
+
+} /* end s_mp_mul_d() */
+
+/* }}} */
+
+/* {{{ s_mp_div_d(mp, d, r) */
+
+/*
+  s_mp_div_d(mp, d, r)
+
+  Compute the quotient mp = mp / d and remainder r = mp mod d, for a
+  single digit d.  If r is null, the remainder will be discarded.
+ */
+
+mp_err
+s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+    mp_word w = 0, q;
+#else
+    mp_digit w = 0, q;
+#endif
+    int ix;
+    mp_err res;
+    mp_int quot;
+    mp_int rem;
+
+    if (d == 0)
+        return MP_RANGE;
+    if (d == 1) {
+        if (r)
+            *r = 0;
+        return MP_OKAY;
+    }
+    /* could check for power of 2 here, but mp_div_d does that. */
+    if (MP_USED(mp) == 1) {
+        mp_digit n = MP_DIGIT(mp, 0);
+        mp_digit rem;
+
+        q = n / d;
+        rem = n % d;
+        MP_DIGIT(mp, 0) = q;
+        if (r)
+            *r = rem;
+        return MP_OKAY;
+    }
+
+    MP_DIGITS(&rem) = 0;
+    MP_DIGITS(&quot) = 0;
+    /* Make room for the quotient */
+    MP_CHECKOK(mp_init_size(&quot, USED(mp)));
+
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+    for (ix = USED(mp) - 1; ix >= 0; ix--) {
+        w = (w << DIGIT_BIT) | DIGIT(mp, ix);
+
+        if (w >= d) {
+            q = w / d;
+            w = w % d;
+        } else {
+            q = 0;
+        }
+
+        s_mp_lshd(&quot, 1);
+        DIGIT(&quot, 0) = (mp_digit)q;
+    }
+#else
+    {
+        mp_digit p;
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+        mp_digit norm;
+#endif
+
+        MP_CHECKOK(mp_init_copy(&rem, mp));
+
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+        MP_DIGIT(&quot, 0) = d;
+        MP_CHECKOK(s_mp_norm(&rem, &quot, &norm));
+        if (norm)
+            d <<= norm;
+        MP_DIGIT(&quot, 0) = 0;
+#endif
+
+        p = 0;
+        for (ix = USED(&rem) - 1; ix >= 0; ix--) {
+            w = DIGIT(&rem, ix);
+
+            if (p) {
+                MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w));
+            } else if (w >= d) {
+                q = w / d;
+                w = w % d;
+            } else {
+                q = 0;
+            }
+
+            MP_CHECKOK(s_mp_lshd(&quot, 1));
+            DIGIT(&quot, 0) = q;
+            p = w;
+        }
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+        if (norm)
+            w >>= norm;
+#endif
+    }
+#endif
+
+    /* Deliver the remainder, if desired */
+    if (r) {
+        *r = (mp_digit)w;
+    }
+
+    s_mp_clamp(&quot);
+    mp_exch(&quot, mp);
+CLEANUP:
+    mp_clear(&quot);
+    mp_clear(&rem);
+
+    return res;
+} /* end s_mp_div_d() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive full arithmetic */
+
+/* {{{ s_mp_add(a, b) */
+
+/* Compute a = |a| + |b|                                                  */
+mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition      */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    mp_word w = 0;
+#else
+    mp_digit d, sum, carry = 0;
+#endif
+    mp_digit *pa, *pb;
+    mp_size ix;
+    mp_size used;
+    mp_err res;
+
+    /* Make sure a has enough precision for the output value */
+    if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
+        return res;
+
+    /*
+      Add up all digits up to the precision of b.  If b had initially
+      the same precision as a, or greater, we took care of it by the
+      padding step above, so there is no problem.  If b had initially
+      less precision, we'll have to make sure the carry out is duly
+      propagated upward among the higher-order digits of the sum.
+     */
+    pa = MP_DIGITS(a);
+    pb = MP_DIGITS(b);
+    used = MP_USED(b);
+    for (ix = 0; ix < used; ix++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+        w = w + *pa + *pb++;
+        *pa++ = ACCUM(w);
+        w = CARRYOUT(w);
+#else
+        d = *pa;
+        sum = d + *pb++;
+        d = (sum < d); /* detect overflow */
+        *pa++ = sum += carry;
+        carry = d + (sum < carry); /* detect overflow */
+#endif
+    }
+
+    /* If we run out of 'b' digits before we're actually done, make
+       sure the carries get propagated upward...
+     */
+    used = MP_USED(a);
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    while (w && ix < used) {
+        w = w + *pa;
+        *pa++ = ACCUM(w);
+        w = CARRYOUT(w);
+        ++ix;
+    }
+#else
+    while (carry && ix < used) {
+        sum = carry + *pa;
+        *pa++ = sum;
+        carry = !sum;
+        ++ix;
+    }
+#endif
+
+/* If there's an overall carry out, increase precision and include
+     it.  We could have done this initially, but why touch the memory
+     allocator unless we're sure we have to?
+   */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    if (w) {
+        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+            return res;
+
+        DIGIT(a, ix) = (mp_digit)w;
+    }
+#else
+    if (carry) {
+        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+            return res;
+
+        DIGIT(a, used) = carry;
+    }
+#endif
+
+    return MP_OKAY;
+} /* end s_mp_add() */
+
+/* }}} */
+
+/* Compute c = |a| + |b|         */ /* magnitude addition      */
+mp_err
+s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
+{
+    mp_digit *pa, *pb, *pc;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    mp_word w = 0;
+#else
+    mp_digit sum, carry = 0, d;
+#endif
+    mp_size ix;
+    mp_size used;
+    mp_err res;
+
+    MP_SIGN(c) = MP_SIGN(a);
+    if (MP_USED(a) < MP_USED(b)) {
+        const mp_int *xch = a;
+        a = b;
+        b = xch;
+    }
+
+    /* Make sure a has enough precision for the output value */
+    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
+        return res;
+
+    /*
+     Add up all digits up to the precision of b.  If b had initially
+     the same precision as a, or greater, we took care of it by the
+     exchange step above, so there is no problem.  If b had initially
+     less precision, we'll have to make sure the carry out is duly
+     propagated upward among the higher-order digits of the sum.
+    */
+    pa = MP_DIGITS(a);
+    pb = MP_DIGITS(b);
+    pc = MP_DIGITS(c);
+    used = MP_USED(b);
+    for (ix = 0; ix < used; ix++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+        w = w + *pa++ + *pb++;
+        *pc++ = ACCUM(w);
+        w = CARRYOUT(w);
+#else
+        d = *pa++;
+        sum = d + *pb++;
+        d = (sum < d); /* detect overflow */
+        *pc++ = sum += carry;
+        carry = d + (sum < carry); /* detect overflow */
+#endif
+    }
+
+    /* If we run out of 'b' digits before we're actually done, make
+     sure the carries get propagated upward...
+   */
+    for (used = MP_USED(a); ix < used; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+        w = w + *pa++;
+        *pc++ = ACCUM(w);
+        w = CARRYOUT(w);
+#else
+        *pc++ = sum = carry + *pa++;
+        carry = (sum < carry);
+#endif
+    }
+
+/* If there's an overall carry out, increase precision and include
+     it.  We could have done this initially, but why touch the memory
+     allocator unless we're sure we have to?
+   */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    if (w) {
+        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
+            return res;
+
+        DIGIT(c, used) = (mp_digit)w;
+        ++used;
+    }
+#else
+    if (carry) {
+        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
+            return res;
+
+        DIGIT(c, used) = carry;
+        ++used;
+    }
+#endif
+    MP_USED(c) = used;
+    return MP_OKAY;
+}
+/* {{{ s_mp_add_offset(a, b, offset) */
+
+/* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
+mp_err
+s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    mp_word w, k = 0;
+#else
+    mp_digit d, sum, carry = 0;
+#endif
+    mp_size ib;
+    mp_size ia;
+    mp_size lim;
+    mp_err res;
+
+    /* Make sure a has enough precision for the output value */
+    lim = MP_USED(b) + offset;
+    if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
+        return res;
+
+    /*
+    Add up all digits up to the precision of b.  If b had initially
+    the same precision as a, or greater, we took care of it by the
+    padding step above, so there is no problem.  If b had initially
+    less precision, we'll have to make sure the carry out is duly
+    propagated upward among the higher-order digits of the sum.
+   */
+    lim = USED(b);
+    for (ib = 0, ia = offset; ib < lim; ib++, ia++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+        w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
+        DIGIT(a, ia) = ACCUM(w);
+        k = CARRYOUT(w);
+#else
+        d = MP_DIGIT(a, ia);
+        sum = d + MP_DIGIT(b, ib);
+        d = (sum < d);
+        MP_DIGIT(a, ia) = sum += carry;
+        carry = d + (sum < carry);
+#endif
+    }
+
+/* If we run out of 'b' digits before we're actually done, make
+     sure the carries get propagated upward...
+   */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    for (lim = MP_USED(a); k && (ia < lim); ++ia) {
+        w = (mp_word)DIGIT(a, ia) + k;
+        DIGIT(a, ia) = ACCUM(w);
+        k = CARRYOUT(w);
+    }
+#else
+    for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
+        d = MP_DIGIT(a, ia);
+        MP_DIGIT(a, ia) = sum = d + carry;
+        carry = (sum < d);
+    }
+#endif
+
+/* If there's an overall carry out, increase precision and include
+     it.  We could have done this initially, but why touch the memory
+     allocator unless we're sure we have to?
+   */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+    if (k) {
+        if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
+            return res;
+
+        DIGIT(a, ia) = (mp_digit)k;
+    }
+#else
+    if (carry) {
+        if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
+            return res;
+
+        DIGIT(a, lim) = carry;
+    }
+#endif
+    s_mp_clamp(a);
+
+    return MP_OKAY;
+
+} /* end s_mp_add_offset() */
+
+/* }}} */
+
+/* {{{ s_mp_sub(a, b) */
+
+/* Compute a = |a| - |b|, assumes |a| >= |b|                              */
+mp_err s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract      */
+{
+    mp_digit *pa, *pb, *limit;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+    mp_sword w = 0;
+#else
+    mp_digit d, diff, borrow = 0;
+#endif
+
+    /*
+    Subtract and propagate borrow.  Up to the precision of b, this
+    accounts for the digits of b; after that, we just make sure the
+    carries get to the right place.  This saves having to pad b out to
+    the precision of a just to make the loops work right...
+   */
+    pa = MP_DIGITS(a);
+    pb = MP_DIGITS(b);
+    limit = pb + MP_USED(b);
+    while (pb < limit) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+        w = w + *pa - *pb++;
+        *pa++ = ACCUM(w);
+        w >>= MP_DIGIT_BIT;
+#else
+        d = *pa;
+        diff = d - *pb++;
+        d = (diff > d); /* detect borrow */
+        if (borrow && --diff == MP_DIGIT_MAX)
+            ++d;
+        *pa++ = diff;
+        borrow = d;
+#endif
+    }
+    limit = MP_DIGITS(a) + MP_USED(a);
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+    while (w && pa < limit) {
+        w = w + *pa;
+        *pa++ = ACCUM(w);
+        w >>= MP_DIGIT_BIT;
+    }
+#else
+    while (borrow && pa < limit) {
+        d = *pa;
+        *pa++ = diff = d - borrow;
+        borrow = (diff > d);
+    }
+#endif
+
+    /* Clobber any leading zeroes we created    */
+    s_mp_clamp(a);
+
+/*
+     If there was a borrow out, then |b| > |a| in violation
+     of our input invariant.  We've already done the work,
+     but we'll at least complain about it...
+   */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+    return w ? MP_RANGE : MP_OKAY;
+#else
+    return borrow ? MP_RANGE : MP_OKAY;
+#endif
+} /* end s_mp_sub() */
+
+/* }}} */
+
+/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
+mp_err
+s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
+{
+    mp_digit *pa, *pb, *pc;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+    mp_sword w = 0;
+#else
+    mp_digit d, diff, borrow = 0;
+#endif
+    int ix, limit;
+    mp_err res;
+
+    MP_SIGN(c) = MP_SIGN(a);
+
+    /* Make sure a has enough precision for the output value */
+    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
+        return res;
+
+    /*
+    Subtract and propagate borrow.  Up to the precision of b, this
+    accounts for the digits of b; after that, we just make sure the
+    carries get to the right place.  This saves having to pad b out to
+    the precision of a just to make the loops work right...
+   */
+    pa = MP_DIGITS(a);
+    pb = MP_DIGITS(b);
+    pc = MP_DIGITS(c);
+    limit = MP_USED(b);
+    for (ix = 0; ix < limit; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+        w = w + *pa++ - *pb++;
+        *pc++ = ACCUM(w);
+        w >>= MP_DIGIT_BIT;
+#else
+        d = *pa++;
+        diff = d - *pb++;
+        d = (diff > d);
+        if (borrow && --diff == MP_DIGIT_MAX)
+            ++d;
+        *pc++ = diff;
+        borrow = d;
+#endif
+    }
+    for (limit = MP_USED(a); ix < limit; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+        w = w + *pa++;
+        *pc++ = ACCUM(w);
+        w >>= MP_DIGIT_BIT;
+#else
+        d = *pa++;
+        *pc++ = diff = d - borrow;
+        borrow = (diff > d);
+#endif
+    }
+
+    /* Clobber any leading zeroes we created    */
+    MP_USED(c) = ix;
+    s_mp_clamp(c);
+
+/*
+     If there was a borrow out, then |b| > |a| in violation
+     of our input invariant.  We've already done the work,
+     but we'll at least complain about it...
+   */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+    return w ? MP_RANGE : MP_OKAY;
+#else
+    return borrow ? MP_RANGE : MP_OKAY;
+#endif
+}
+/* {{{ s_mp_mul(a, b) */
+
+/* Compute a = |a| * |b|                                                  */
+mp_err
+s_mp_mul(mp_int *a, const mp_int *b)
+{
+    return mp_mul(a, b, a);
+} /* end s_mp_mul() */
+
+/* }}} */
+
+#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_MUL_DxD(a, b, Phi, Plo)                              \
+    {                                                           \
+        unsigned long long product = (unsigned long long)a * b; \
+        Plo = (mp_digit)product;                                \
+        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \
+    }
+#elif defined(OSF1)
+#define MP_MUL_DxD(a, b, Phi, Plo)              \
+    {                                           \
+        Plo = asm("mulq %a0, %a1, %v0", a, b);  \
+        Phi = asm("umulh %a0, %a1, %v0", a, b); \
+    }
+#else
+#define MP_MUL_DxD(a, b, Phi, Plo)                                 \
+    {                                                              \
+        mp_digit a0b1, a1b0;                                       \
+        Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX);   \
+        Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
+        a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
+        a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
+        a1b0 += a0b1;                                              \
+        Phi += a1b0 >> MP_HALF_DIGIT_BIT;                          \
+        if (a1b0 < a0b1)                                           \
+            Phi += MP_HALF_RADIX;                                  \
+        a1b0 <<= MP_HALF_DIGIT_BIT;                                \
+        Plo += a1b0;                                               \
+        if (Plo < a1b0)                                            \
+            ++Phi;                                                 \
+    }
+#endif
+
+#if !defined(MP_ASSEMBLY_MULTIPLY)
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+    mp_digit d = 0;
+
+    /* Inner product:  Digits of a */
+    while (a_len--) {
+        mp_word w = ((mp_word)b * *a++) + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+    *c = d;
+#else
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        if (a0b0 < carry)
+            ++a1b1;
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+    *c = carry;
+#endif
+}
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
+                mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+    mp_digit d = 0;
+
+    /* Inner product:  Digits of a */
+    while (a_len--) {
+        mp_word w = ((mp_word)b * *a++) + *c + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+    *c = d;
+#else
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        if (a0b0 < carry)
+            ++a1b1;
+        a0b0 += a_i = *c;
+        if (a0b0 < a_i)
+            ++a1b1;
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+    *c = carry;
+#endif
+}
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+    mp_digit d = 0;
+
+    /* Inner product:  Digits of a */
+    while (a_len--) {
+        mp_word w = ((mp_word)b * *a++) + *c + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+
+    while (d) {
+        mp_word w = (mp_word)*c + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+#else
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        if (a0b0 < carry)
+            ++a1b1;
+
+        a0b0 += a_i = *c;
+        if (a0b0 < a_i)
+            ++a1b1;
+
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+    while (carry) {
+        mp_digit c_i = *c;
+        carry += c_i;
+        *c++ = carry;
+        carry = carry < c_i;
+    }
+#endif
+}
+#endif
+
+#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_SQR_D(a, Phi, Plo)                                  \
+    {                                                          \
+        unsigned long long square = (unsigned long long)a * a; \
+        Plo = (mp_digit)square;                                \
+        Phi = (mp_digit)(square >> MP_DIGIT_BIT);              \
+    }
+#elif defined(OSF1)
+#define MP_SQR_D(a, Phi, Plo)                \
+    {                                        \
+        Plo = asm("mulq  %a0, %a0, %v0", a); \
+        Phi = asm("umulh %a0, %a0, %v0", a); \
+    }
+#else
+#define MP_SQR_D(a, Phi, Plo)                                      \
+    {                                                              \
+        mp_digit Pmid;                                             \
+        Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX);   \
+        Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
+        Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
+        Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);                    \
+        Pmid <<= (MP_HALF_DIGIT_BIT + 1);                          \
+        Plo += Pmid;                                               \
+        if (Plo < Pmid)                                            \
+            ++Phi;                                                 \
+    }
+#endif
+
+#if !defined(MP_ASSEMBLY_SQUARE)
+/* Add the squares of the digits of a to the digits of b. */
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+    mp_word w;
+    mp_digit d;
+    mp_size ix;
+
+    w = 0;
+#define ADD_SQUARE(n)                     \
+    d = pa[n];                            \
+    w += (d * (mp_word)d) + ps[2 * n];    \
+    ps[2 * n] = ACCUM(w);                 \
+    w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \
+    ps[2 * n + 1] = ACCUM(w);             \
+    w = (w >> DIGIT_BIT)
+
+    for (ix = a_len; ix >= 4; ix -= 4) {
+        ADD_SQUARE(0);
+        ADD_SQUARE(1);
+        ADD_SQUARE(2);
+        ADD_SQUARE(3);
+        pa += 4;
+        ps += 8;
+    }
+    if (ix) {
+        ps += 2 * ix;
+        pa += ix;
+        switch (ix) {
+            case 3:
+                ADD_SQUARE(-3); /* FALLTHRU */
+            case 2:
+                ADD_SQUARE(-2); /* FALLTHRU */
+            case 1:
+                ADD_SQUARE(-1); /* FALLTHRU */
+            case 0:
+                break;
+        }
+    }
+    while (w) {
+        w += *ps;
+        *ps++ = ACCUM(w);
+        w = (w >> DIGIT_BIT);
+    }
+#else
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *pa++;
+        mp_digit a0a0, a1a1;
+
+        MP_SQR_D(a_i, a1a1, a0a0);
+
+        /* here a1a1 and a0a0 constitute a_i ** 2 */
+        a0a0 += carry;
+        if (a0a0 < carry)
+            ++a1a1;
+
+        /* now add to ps */
+        a0a0 += a_i = *ps;
+        if (a0a0 < a_i)
+            ++a1a1;
+        *ps++ = a0a0;
+        a1a1 += a_i = *ps;
+        carry = (a1a1 < a_i);
+        *ps++ = a1a1;
+    }
+    while (carry) {
+        mp_digit s_i = *ps;
+        carry += s_i;
+        *ps++ = carry;
+        carry = carry < s_i;
+    }
+#endif
+}
+#endif
+
+#if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) && !defined(MP_ASSEMBLY_DIV_2DX1D)
+/*
+** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+** so its high bit is 1.   This code is from NSPR.
+*/
+mp_err
+s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+                mp_digit *qp, mp_digit *rp)
+{
+    mp_digit d1, d0, q1, q0;
+    mp_digit r1, r0, m;
+
+    d1 = divisor >> MP_HALF_DIGIT_BIT;
+    d0 = divisor & MP_HALF_DIGIT_MAX;
+    r1 = Nhi % d1;
+    q1 = Nhi / d1;
+    m = q1 * d0;
+    r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
+    if (r1 < m) {
+        q1--, r1 += divisor;
+        if (r1 >= divisor && r1 < m) {
+            q1--, r1 += divisor;
+        }
+    }
+    r1 -= m;
+    r0 = r1 % d1;
+    q0 = r1 / d1;
+    m = q0 * d0;
+    r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
+    if (r0 < m) {
+        q0--, r0 += divisor;
+        if (r0 >= divisor && r0 < m) {
+            q0--, r0 += divisor;
+        }
+    }
+    if (qp)
+        *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
+    if (rp)
+        *rp = r0 - m;
+    return MP_OKAY;
+}
+#endif
+
+#if MP_SQUARE
+/* {{{ s_mp_sqr(a) */
+
+mp_err
+s_mp_sqr(mp_int *a)
+{
+    mp_err res;
+    mp_int tmp;
+
+    if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
+        return res;
+    res = mp_sqr(a, &tmp);
+    if (res == MP_OKAY) {
+        s_mp_exch(&tmp, a);
+    }
+    mp_clear(&tmp);
+    return res;
+}
+
+/* }}} */
+#endif
+
+/* {{{ s_mp_div(a, b) */
+
+/*
+  s_mp_div(a, b)
+
+  Compute a = a / b and b = a mod b.  Assumes b > a.
+ */
+
+mp_err s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
+                mp_int *div,  /* i: divisor                */
+                mp_int *quot) /* i: 0;        o: quotient  */
+{
+    mp_int part, t;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+    mp_word q_msd;
+#else
+    mp_digit q_msd;
+#endif
+    mp_err res;
+    mp_digit d;
+    mp_digit div_msd;
+    int ix;
+
+    if (mp_cmp_z(div) == 0)
+        return MP_RANGE;
+
+    DIGITS(&t) = 0;
+    /* Shortcut if divisor is power of two */
+    if ((ix = s_mp_ispow2(div)) >= 0) {
+        MP_CHECKOK(mp_copy(rem, quot));
+        s_mp_div_2d(quot, (mp_digit)ix);
+        s_mp_mod_2d(rem, (mp_digit)ix);
+
+        return MP_OKAY;
+    }
+
+    MP_SIGN(rem) = ZPOS;
+    MP_SIGN(div) = ZPOS;
+    MP_SIGN(&part) = ZPOS;
+
+    /* A working temporary for division     */
+    MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem)));
+
+    /* Normalize to optimize guessing       */
+    MP_CHECKOK(s_mp_norm(rem, div, &d));
+
+    /* Perform the division itself...woo!   */
+    MP_USED(quot) = MP_ALLOC(quot);
+
+    /* Find a partial substring of rem which is at least div */
+    /* If we didn't find one, we're finished dividing    */
+    while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
+        int i;
+        int unusedRem;
+        int partExtended = 0; /* set to true if we need to extend part */
+
+        unusedRem = MP_USED(rem) - MP_USED(div);
+        MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
+        MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
+        MP_USED(&part) = MP_USED(div);
+
+        /* We have now truncated the part of the remainder to the same length as
+     * the divisor. If part is smaller than div, extend part by one digit. */
+        if (s_mp_cmp(&part, div) < 0) {
+            --unusedRem;
+#if MP_ARGCHK == 2
+            assert(unusedRem >= 0);
+#endif
+            --MP_DIGITS(&part);
+            ++MP_USED(&part);
+            ++MP_ALLOC(&part);
+            partExtended = 1;
+        }
+
+        /* Compute a guess for the next quotient digit       */
+        q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
+        div_msd = MP_DIGIT(div, MP_USED(div) - 1);
+        if (!partExtended) {
+            /* In this case, q_msd /= div_msd is always 1. First, since div_msd is
+       * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since
+       * we didn't extend part, q_msd >= div_msd. Therefore we know that
+       * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we
+       * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */
+            q_msd = 1;
+        } else {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+            q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
+            q_msd /= div_msd;
+            if (q_msd == RADIX)
+                --q_msd;
+#else
+            if (q_msd == div_msd) {
+                q_msd = MP_DIGIT_MAX;
+            } else {
+                mp_digit r;
+                MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
+                                           div_msd, &q_msd, &r));
+            }
+#endif
+        }
+#if MP_ARGCHK == 2
+        assert(q_msd > 0); /* This case should never occur any more. */
+#endif
+        if (q_msd <= 0)
+            break;
+
+        /* See what that multiplies out to                   */
+        mp_copy(div, &t);
+        MP_CHECKOK(s_mp_mul_d(&t, (mp_digit)q_msd));
+
+        /*
+       If it's too big, back it off.  We should not have to do this
+       more than once, or, in rare cases, twice.  Knuth describes a
+       method by which this could be reduced to a maximum of once, but
+       I didn't implement that here.
+     * When using s_mpv_div_2dx1d, we may have to do this 3 times.
+     */
+        for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
+            --q_msd;
+            MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */
+        }
+        if (i < 0) {
+            res = MP_RANGE;
+            goto CLEANUP;
+        }
+
+        /* At this point, q_msd should be the right next digit   */
+        MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */
+        s_mp_clamp(rem);
+
+        /*
+      Include the digit in the quotient.  We allocated enough memory
+      for any quotient we could ever possibly get, so we should not
+      have to check for failures here
+     */
+        MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
+    }
+
+    /* Denormalize remainder                */
+    if (d) {
+        s_mp_div_2d(rem, d);
+    }
+
+    s_mp_clamp(quot);
+
+CLEANUP:
+    mp_clear(&t);
+
+    return res;
+
+} /* end s_mp_div() */
+
+/* }}} */
+
+/* {{{ s_mp_2expt(a, k) */
+
+mp_err
+s_mp_2expt(mp_int *a, mp_digit k)
+{
+    mp_err res;
+    mp_size dig, bit;
+
+    dig = k / DIGIT_BIT;
+    bit = k % DIGIT_BIT;
+
+    mp_zero(a);
+    if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
+        return res;
+
+    DIGIT(a, dig) |= ((mp_digit)1 << bit);
+
+    return MP_OKAY;
+
+} /* end s_mp_2expt() */
+
+/* }}} */
+
+/* {{{ s_mp_reduce(x, m, mu) */
+
+/*
+  Compute Barrett reduction, x (mod m), given a precomputed value for
+  mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
+  faster than straight division, when many reductions by the same
+  value of m are required (such as in modular exponentiation).  This
+  can nearly halve the time required to do modular exponentiation,
+  as compared to using the full integer divide to reduce.
+
+  This algorithm was derived from the _Handbook of Applied
+  Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
+  pp. 603-604.
+ */
+
+mp_err
+s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
+{
+    mp_int q;
+    mp_err res;
+
+    if ((res = mp_init_copy(&q, x)) != MP_OKAY)
+        return res;
+
+    s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1)  */
+    s_mp_mul(&q, mu);           /* q2 = q1 * mu      */
+    s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */
+
+    /* x = x mod b^(k+1), quick (no division) */
+    s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
+
+    /* q = q * m mod b^(k+1), quick (no division) */
+    s_mp_mul(&q, m);
+    s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
+
+    /* x = x - q */
+    if ((res = mp_sub(x, &q, x)) != MP_OKAY)
+        goto CLEANUP;
+
+    /* If x < 0, add b^(k+1) to it */
+    if (mp_cmp_z(x) < 0) {
+        mp_set(&q, 1);
+        if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
+            goto CLEANUP;
+        if ((res = mp_add(x, &q, x)) != MP_OKAY)
+            goto CLEANUP;
+    }
+
+    /* Back off if it's too big */
+    while (mp_cmp(x, m) >= 0) {
+        if ((res = s_mp_sub(x, m)) != MP_OKAY)
+            break;
+    }
+
+CLEANUP:
+    mp_clear(&q);
+
+    return res;
+
+} /* end s_mp_reduce() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive comparisons */
+
+/* {{{ s_mp_cmp(a, b) */
+
+/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
+int
+s_mp_cmp(const mp_int *a, const mp_int *b)
+{
+    mp_size used_a = MP_USED(a);
+    {
+        mp_size used_b = MP_USED(b);
+
+        if (used_a > used_b)
+            goto IS_GT;
+        if (used_a < used_b)
+            goto IS_LT;
+    }
+    {
+        mp_digit *pa, *pb;
+        mp_digit da = 0, db = 0;
+
+#define CMP_AB(n)                     \
+    if ((da = pa[n]) != (db = pb[n])) \
+    goto done
+
+        pa = MP_DIGITS(a) + used_a;
+        pb = MP_DIGITS(b) + used_a;
+        while (used_a >= 4) {
+            pa -= 4;
+            pb -= 4;
+            used_a -= 4;
+            CMP_AB(3);
+            CMP_AB(2);
+            CMP_AB(1);
+            CMP_AB(0);
+        }
+        while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
+            /* do nothing */;
+    done:
+        if (da > db)
+            goto IS_GT;
+        if (da < db)
+            goto IS_LT;
+    }
+    return MP_EQ;
+IS_LT:
+    return MP_LT;
+IS_GT:
+    return MP_GT;
+} /* end s_mp_cmp() */
+
+/* }}} */
+
+/* {{{ s_mp_cmp_d(a, d) */
+
+/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
+int
+s_mp_cmp_d(const mp_int *a, mp_digit d)
+{
+    if (USED(a) > 1)
+        return MP_GT;
+
+    if (DIGIT(a, 0) < d)
+        return MP_LT;
+    else if (DIGIT(a, 0) > d)
+        return MP_GT;
+    else
+        return MP_EQ;
+
+} /* end s_mp_cmp_d() */
+
+/* }}} */
+
+/* {{{ s_mp_ispow2(v) */
+
+/*
+  Returns -1 if the value is not a power of two; otherwise, it returns
+  k such that v = 2^k, i.e. lg(v).
+ */
+int
+s_mp_ispow2(const mp_int *v)
+{
+    mp_digit d;
+    int extra = 0, ix;
+
+    ix = MP_USED(v) - 1;
+    d = MP_DIGIT(v, ix); /* most significant digit of v */
+
+    extra = s_mp_ispow2d(d);
+    if (extra < 0 || ix == 0)
+        return extra;
+
+    while (--ix >= 0) {
+        if (DIGIT(v, ix) != 0)
+            return -1; /* not a power of two */
+        extra += MP_DIGIT_BIT;
+    }
+
+    return extra;
+
+} /* end s_mp_ispow2() */
+
+/* }}} */
+
+/* {{{ s_mp_ispow2d(d) */
+
+int
+s_mp_ispow2d(mp_digit d)
+{
+    if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */
+        int pow = 0;
+#if defined(MP_USE_UINT_DIGIT)
+        if (d & 0xffff0000U)
+            pow += 16;
+        if (d & 0xff00ff00U)
+            pow += 8;
+        if (d & 0xf0f0f0f0U)
+            pow += 4;
+        if (d & 0xccccccccU)
+            pow += 2;
+        if (d & 0xaaaaaaaaU)
+            pow += 1;
+#elif defined(MP_USE_LONG_LONG_DIGIT)
+        if (d & 0xffffffff00000000ULL)
+            pow += 32;
+        if (d & 0xffff0000ffff0000ULL)
+            pow += 16;
+        if (d & 0xff00ff00ff00ff00ULL)
+            pow += 8;
+        if (d & 0xf0f0f0f0f0f0f0f0ULL)
+            pow += 4;
+        if (d & 0xccccccccccccccccULL)
+            pow += 2;
+        if (d & 0xaaaaaaaaaaaaaaaaULL)
+            pow += 1;
+#elif defined(MP_USE_LONG_DIGIT)
+        if (d & 0xffffffff00000000UL)
+            pow += 32;
+        if (d & 0xffff0000ffff0000UL)
+            pow += 16;
+        if (d & 0xff00ff00ff00ff00UL)
+            pow += 8;
+        if (d & 0xf0f0f0f0f0f0f0f0UL)
+            pow += 4;
+        if (d & 0xccccccccccccccccUL)
+            pow += 2;
+        if (d & 0xaaaaaaaaaaaaaaaaUL)
+            pow += 1;
+#else
+#error "unknown type for mp_digit"
+#endif
+        return pow;
+    }
+    return -1;
+
+} /* end s_mp_ispow2d() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive I/O helpers */
+
+/* {{{ s_mp_tovalue(ch, r) */
+
+/*
+  Convert the given character to its digit value, in the given radix.
+  If the given character is not understood in the given radix, -1 is
+  returned.  Otherwise the digit's numeric value is returned.
+
+  The results will be odd if you use a radix < 2 or > 62, you are
+  expected to know what you're up to.
+ */
+int
+s_mp_tovalue(char ch, int r)
+{
+    int val, xch;
+
+    if (r > 36)
+        xch = ch;
+    else
+        xch = toupper(ch);
+
+    if (isdigit(xch))
+        val = xch - '0';
+    else if (isupper(xch))
+        val = xch - 'A' + 10;
+    else if (islower(xch))
+        val = xch - 'a' + 36;
+    else if (xch == '+')
+        val = 62;
+    else if (xch == '/')
+        val = 63;
+    else
+        return -1;
+
+    if (val < 0 || val >= r)
+        return -1;
+
+    return val;
+
+} /* end s_mp_tovalue() */
+
+/* }}} */
+
+/* {{{ s_mp_todigit(val, r, low) */
+
+/*
+  Convert val to a radix-r digit, if possible.  If val is out of range
+  for r, returns zero.  Otherwise, returns an ASCII character denoting
+  the value in the given radix.
+
+  The results may be odd if you use a radix < 2 or > 64, you are
+  expected to know what you're doing.
+ */
+
+char
+s_mp_todigit(mp_digit val, int r, int low)
+{
+    char ch;
+
+    if (val >= r)
+        return 0;
+
+    ch = s_dmap_1[val];
+
+    if (r <= 36 && low)
+        ch = tolower(ch);
+
+    return ch;
+
+} /* end s_mp_todigit() */
+
+/* }}} */
+
+/* {{{ s_mp_outlen(bits, radix) */
+
+/*
+   Return an estimate for how long a string is needed to hold a radix
+   r representation of a number with 'bits' significant bits, plus an
+   extra for a zero terminator (assuming C style strings here)
+ */
+int
+s_mp_outlen(int bits, int r)
+{
+    return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
+
+} /* end s_mp_outlen() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ mp_read_unsigned_octets(mp, str, len) */
+/* mp_read_unsigned_octets(mp, str, len)
+   Read in a raw value (base 256) into the given mp_int
+   No sign bit, number is positive.  Leading zeros ignored.
+ */
+
+mp_err
+mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
+{
+    int count;
+    mp_err res;
+    mp_digit d;
+
+    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+
+    mp_zero(mp);
+
+    count = len % sizeof(mp_digit);
+    if (count) {
+        for (d = 0; count-- > 0; --len) {
+            d = (d << 8) | *str++;
+        }
+        MP_DIGIT(mp, 0) = d;
+    }
+
+    /* Read the rest of the digits */
+    for (; len > 0; len -= sizeof(mp_digit)) {
+        for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
+            d = (d << 8) | *str++;
+        }
+        if (MP_EQ == mp_cmp_z(mp)) {
+            if (!d)
+                continue;
+        } else {
+            if ((res = s_mp_lshd(mp, 1)) != MP_OKAY)
+                return res;
+        }
+        MP_DIGIT(mp, 0) = d;
+    }
+    return MP_OKAY;
+} /* end mp_read_unsigned_octets() */
+/* }}} */
+
+/* {{{ mp_unsigned_octet_size(mp) */
+unsigned int
+mp_unsigned_octet_size(const mp_int *mp)
+{
+    unsigned int bytes;
+    int ix;
+    mp_digit d = 0;
+
+    ARGCHK(mp != NULL, MP_BADARG);
+    ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
+
+    bytes = (USED(mp) * sizeof(mp_digit));
+
+    /* subtract leading zeros. */
+    /* Iterate over each digit... */
+    for (ix = USED(mp) - 1; ix >= 0; ix--) {
+        d = DIGIT(mp, ix);
+        if (d)
+            break;
+        bytes -= sizeof(d);
+    }
+    if (!bytes)
+        return 1;
+
+    /* Have MSD, check digit bytes, high order first */
+    for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
+        unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
+        if (x)
+            break;
+        --bytes;
+    }
+    return bytes;
+} /* end mp_unsigned_octet_size() */
+/* }}} */
+
+/* {{{ mp_to_unsigned_octets(mp, str) */
+/* output a buffer of big endian octets no longer than specified. */
+mp_err
+mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
+{
+    int ix, pos = 0;
+    unsigned int bytes;
+
+    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+    bytes = mp_unsigned_octet_size(mp);
+    ARGCHK(bytes <= maxlen, MP_BADARG);
+
+    /* Iterate over each digit... */
+    for (ix = USED(mp) - 1; ix >= 0; ix--) {
+        mp_digit d = DIGIT(mp, ix);
+        int jx;
+
+        /* Unpack digit bytes, high order first */
+        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+            if (!pos && !x) /* suppress leading zeros */
+                continue;
+            str[pos++] = x;
+        }
+    }
+    if (!pos)
+        str[pos++] = 0;
+    return pos;
+} /* end mp_to_unsigned_octets() */
+/* }}} */
+
+/* {{{ mp_to_signed_octets(mp, str) */
+/* output a buffer of big endian octets no longer than specified. */
+mp_err
+mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
+{
+    int ix, pos = 0;
+    unsigned int bytes;
+
+    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+    bytes = mp_unsigned_octet_size(mp);
+    ARGCHK(bytes <= maxlen, MP_BADARG);
+
+    /* Iterate over each digit... */
+    for (ix = USED(mp) - 1; ix >= 0; ix--) {
+        mp_digit d = DIGIT(mp, ix);
+        int jx;
+
+        /* Unpack digit bytes, high order first */
+        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+            if (!pos) {
+                if (!x) /* suppress leading zeros */
+                    continue;
+                if (x & 0x80) { /* add one leading zero to make output positive.  */
+                    ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
+                    if (bytes + 1 > maxlen)
+                        return MP_BADARG;
+                    str[pos++] = 0;
+                }
+            }
+            str[pos++] = x;
+        }
+    }
+    if (!pos)
+        str[pos++] = 0;
+    return pos;
+} /* end mp_to_signed_octets() */
+/* }}} */
+
+/* {{{ mp_to_fixlen_octets(mp, str) */
+/* output a buffer of big endian octets exactly as long as requested. */
+mp_err
+mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
+{
+    int ix, pos = 0;
+    unsigned int bytes;
+
+    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+    bytes = mp_unsigned_octet_size(mp);
+    ARGCHK(bytes <= length, MP_BADARG);
+
+    /* place any needed leading zeros */
+    for (; length > bytes; --length) {
+        *str++ = 0;
+    }
+
+    /* Iterate over each digit... */
+    for (ix = USED(mp) - 1; ix >= 0; ix--) {
+        mp_digit d = DIGIT(mp, ix);
+        int jx;
+
+        /* Unpack digit bytes, high order first */
+        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+            if (!pos && !x) /* suppress leading zeros */
+                continue;
+            str[pos++] = x;
+        }
+    }
+    if (!pos)
+        str[pos++] = 0;
+    return MP_OKAY;
+} /* end mp_to_fixlen_octets() */
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS                                                  */
diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h
new file mode 100644
index 000000000..64ffe75d5
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi.h
@@ -0,0 +1,313 @@
+/*
+ *  mpi.h
+ *
+ *  Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MPI_
+#define _H_MPI_
+
+#include "mpi-config.h"
+
+#include "seccomon.h"
+SEC_BEGIN_PROTOS
+
+#if MP_DEBUG
+#undef MP_IOFUNC
+#define MP_IOFUNC 1
+#endif
+
+#if MP_IOFUNC
+#include <stdio.h>
+#include <ctype.h>
+#endif
+
+#include <limits.h>
+
+#if defined(BSDI)
+#undef ULLONG_MAX
+#endif
+
+#include <sys/types.h>
+
+#define MP_NEG 1
+#define MP_ZPOS 0
+
+#define MP_OKAY 0    /* no error, all is well */
+#define MP_YES 0     /* yes (boolean result)  */
+#define MP_NO -1     /* no (boolean result)   */
+#define MP_MEM -2    /* out of memory         */
+#define MP_RANGE -3  /* argument out of range */
+#define MP_BADARG -4 /* invalid parameter     */
+#define MP_UNDEF -5  /* answer is undefined   */
+#define MP_LAST_CODE MP_UNDEF
+
+typedef unsigned int mp_sign;
+typedef unsigned int mp_size;
+typedef int mp_err;
+
+#define MP_32BIT_MAX 4294967295U
+
+#if !defined(ULONG_MAX)
+#error "ULONG_MAX not defined"
+#elif !defined(UINT_MAX)
+#error "UINT_MAX not defined"
+#elif !defined(USHRT_MAX)
+#error "USHRT_MAX not defined"
+#endif
+
+#if defined(ULLONG_MAX) /* C99, Solaris */
+#define MP_ULONG_LONG_MAX ULLONG_MAX
+/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */
+#elif defined(ULONG_LONG_MAX) /* HPUX */
+#define MP_ULONG_LONG_MAX ULONG_LONG_MAX
+#elif defined(ULONGLONG_MAX) /* IRIX, AIX */
+#define MP_ULONG_LONG_MAX ULONGLONG_MAX
+#endif
+
+/* We only use unsigned long for mp_digit iff long is more than 32 bits. */
+#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX
+typedef unsigned long mp_digit;
+#define MP_DIGIT_MAX ULONG_MAX
+#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX UINT_MAX
+#undef MP_NO_MP_WORD
+#define MP_NO_MP_WORD 1
+#undef MP_USE_LONG_DIGIT
+#define MP_USE_LONG_DIGIT 1
+#undef MP_USE_LONG_LONG_DIGIT
+
+#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX)
+typedef unsigned long long mp_digit;
+#define MP_DIGIT_MAX MP_ULONG_LONG_MAX
+#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX UINT_MAX
+#undef MP_NO_MP_WORD
+#define MP_NO_MP_WORD 1
+#undef MP_USE_LONG_LONG_DIGIT
+#define MP_USE_LONG_LONG_DIGIT 1
+#undef MP_USE_LONG_DIGIT
+
+#else
+typedef unsigned int mp_digit;
+#define MP_DIGIT_MAX UINT_MAX
+#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX USHRT_MAX
+#undef MP_USE_UINT_DIGIT
+#define MP_USE_UINT_DIGIT 1
+#undef MP_USE_LONG_LONG_DIGIT
+#undef MP_USE_LONG_DIGIT
+#endif
+
+#if !defined(MP_NO_MP_WORD)
+#if defined(MP_USE_UINT_DIGIT) && \
+    (defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX))
+
+#if (ULONG_MAX > UINT_MAX)
+typedef unsigned long mp_word;
+typedef long mp_sword;
+#define MP_WORD_MAX ULONG_MAX
+
+#else
+typedef unsigned long long mp_word;
+typedef long long mp_sword;
+#define MP_WORD_MAX MP_ULONG_LONG_MAX
+#endif
+
+#else
+#define MP_NO_MP_WORD 1
+#endif
+#endif /* !defined(MP_NO_MP_WORD) */
+
+#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD)
+typedef unsigned int mp_word;
+typedef int mp_sword;
+#define MP_WORD_MAX UINT_MAX
+#endif
+
+#define MP_DIGIT_BIT (CHAR_BIT * sizeof(mp_digit))
+#define MP_WORD_BIT (CHAR_BIT * sizeof(mp_word))
+#define MP_RADIX (1 + (mp_word)MP_DIGIT_MAX)
+
+#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT / 2)
+#define MP_HALF_RADIX (1 + (mp_digit)MP_HALF_DIGIT_MAX)
+/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named
+** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's
+** consistent with the other _HALF_ names.
+*/
+
+/* Macros for accessing the mp_int internals           */
+#define MP_SIGN(MP) ((MP)->sign)
+#define MP_USED(MP) ((MP)->used)
+#define MP_ALLOC(MP) ((MP)->alloc)
+#define MP_DIGITS(MP) ((MP)->dp)
+#define MP_DIGIT(MP, N) (MP)->dp[(N)]
+
+/* This defines the maximum I/O base (minimum is 2)   */
+#define MP_MAX_RADIX 64
+
+typedef struct {
+    mp_sign sign;  /* sign of this quantity      */
+    mp_size alloc; /* how many digits allocated  */
+    mp_size used;  /* how many digits used       */
+    mp_digit *dp;  /* the digits themselves      */
+} mp_int;
+
+/* Default precision       */
+mp_size mp_get_prec(void);
+void mp_set_prec(mp_size prec);
+
+/* Memory management       */
+mp_err mp_init(mp_int *mp);
+mp_err mp_init_size(mp_int *mp, mp_size prec);
+mp_err mp_init_copy(mp_int *mp, const mp_int *from);
+mp_err mp_copy(const mp_int *from, mp_int *to);
+void mp_exch(mp_int *mp1, mp_int *mp2);
+void mp_clear(mp_int *mp);
+void mp_zero(mp_int *mp);
+void mp_set(mp_int *mp, mp_digit d);
+mp_err mp_set_int(mp_int *mp, long z);
+#define mp_set_long(mp, z) mp_set_int(mp, z)
+mp_err mp_set_ulong(mp_int *mp, unsigned long z);
+
+/* Single digit arithmetic */
+mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_mul_2(const mp_int *a, mp_int *c);
+mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r);
+mp_err mp_div_2(const mp_int *a, mp_int *c);
+mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c);
+
+/* Sign manipulations      */
+mp_err mp_abs(const mp_int *a, mp_int *b);
+mp_err mp_neg(const mp_int *a, mp_int *b);
+
+/* Full arithmetic         */
+mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
+#if MP_SQUARE
+mp_err mp_sqr(const mp_int *a, mp_int *b);
+#else
+#define mp_sqr(a, b) mp_mul(a, a, b)
+#endif
+mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
+mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
+mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_2expt(mp_int *a, mp_digit k);
+
+/* Modular arithmetic      */
+#if MP_MODARITH
+mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c);
+mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+#if MP_SQUARE
+mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
+#else
+#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
+#endif
+mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
+#endif /* MP_MODARITH */
+
+/* Comparisons             */
+int mp_cmp_z(const mp_int *a);
+int mp_cmp_d(const mp_int *a, mp_digit d);
+int mp_cmp(const mp_int *a, const mp_int *b);
+int mp_cmp_mag(const mp_int *a, const mp_int *b);
+int mp_isodd(const mp_int *a);
+int mp_iseven(const mp_int *a);
+
+/* Number theoretic        */
+#if MP_NUMTH
+mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
+mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
+#endif /* end MP_NUMTH */
+
+/* Input and output        */
+#if MP_IOFUNC
+void mp_print(mp_int *mp, FILE *ofp);
+#endif /* end MP_IOFUNC */
+
+/* Base conversion         */
+mp_err mp_read_raw(mp_int *mp, char *str, int len);
+int mp_raw_size(mp_int *mp);
+mp_err mp_toraw(mp_int *mp, char *str);
+mp_err mp_read_radix(mp_int *mp, const char *str, int radix);
+mp_err mp_read_variable_radix(mp_int *a, const char *str, int default_radix);
+int mp_radix_size(mp_int *mp, int radix);
+mp_err mp_toradix(mp_int *mp, char *str, int radix);
+int mp_tovalue(char ch, int r);
+
+#define mp_tobinary(M, S) mp_toradix((M), (S), 2)
+#define mp_tooctal(M, S) mp_toradix((M), (S), 8)
+#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
+#define mp_tohex(M, S) mp_toradix((M), (S), 16)
+
+/* Error strings           */
+const char *mp_strerror(mp_err ec);
+
+/* Octet string conversion functions */
+mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len);
+unsigned int mp_unsigned_octet_size(const mp_int *mp);
+mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
+mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
+mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
+
+/* Miscellaneous */
+mp_size mp_trailing_zeros(const mp_int *mp);
+void freebl_cpuid(unsigned long op, unsigned long *eax,
+                  unsigned long *ebx, unsigned long *ecx,
+                  unsigned long *edx);
+
+#define MP_CHECKOK(x)          \
+    if (MP_OKAY > (res = (x))) \
+    goto CLEANUP
+#define MP_CHECKERR(x)         \
+    if (MP_OKAY > (res = (x))) \
+    goto CLEANUP
+
+#define NEG MP_NEG
+#define ZPOS MP_ZPOS
+#define DIGIT_MAX MP_DIGIT_MAX
+#define DIGIT_BIT MP_DIGIT_BIT
+#define DIGIT_FMT MP_DIGIT_FMT
+#define RADIX MP_RADIX
+#define MAX_RADIX MP_MAX_RADIX
+#define SIGN(MP) MP_SIGN(MP)
+#define USED(MP) MP_USED(MP)
+#define ALLOC(MP) MP_ALLOC(MP)
+#define DIGITS(MP) MP_DIGITS(MP)
+#define DIGIT(MP, N) MP_DIGIT(MP, N)
+
+#if MP_ARGCHK == 1
+#define ARGCHK(X, Y)    \
+    {                   \
+        if (!(X)) {     \
+            return (Y); \
+        }               \
+    }
+#elif MP_ARGCHK == 2
+#include <assert.h>
+#define ARGCHK(X, Y) assert(X)
+#else
+#define ARGCHK(X, Y) /*  */
+#endif
+
+#ifdef CT_VERIF
+void mp_taint(mp_int *mp);
+void mp_untaint(mp_int *mp);
+#endif
+
+SEC_END_PROTOS
+
+#endif /* end _H_MPI_ */
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64.c b/security/nss/lib/freebl/mpi/mpi_amd64.c
new file mode 100644
index 000000000..9e538bb6a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64.c
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MPI_AMD64
+#error This file only works on AMD64 platforms.
+#endif
+
+#include <mpi-priv.h>
+
+/*
+ * MPI glue
+ *
+ */
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void MPI_ASM_DECL
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+                     mp_digit b, mp_digit *c)
+{
+    mp_digit w;
+    mp_digit d;
+
+    d = s_mpv_mul_add_vec64(c, a, a_len, b);
+    c += a_len;
+    while (d) {
+        w = c[0] + d;
+        d = (w < c[0] || w < d);
+        *c++ = w;
+    }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_gas.s b/security/nss/lib/freebl/mpi/mpi_amd64_gas.s
new file mode 100644
index 000000000..ad6e2b9d7
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_gas.s
@@ -0,0 +1,389 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# ------------------------------------------------------------------------
+#
+#  Implementation of s_mpv_mul_set_vec which exploits
+#  the 64X64->128 bit  unsigned multiply instruction.
+#
+# ------------------------------------------------------------------------
+
+# r = a * digit, r and a are vectors of length len
+# returns the carry digit
+# r and a are 64 bit aligned.
+#
+# uint64_t
+# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+#
+
+.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+
+	xorq	%rax, %rax		# if (len == 0) return (0)
+	testq	%rdx, %rdx
+	jz	.L17
+
+	movq	%rdx, %r8		# Use r8 for len; %rdx is used by mul
+	xorq	%r9, %r9		# cy = 0
+
+.L15:
+	cmpq	$8, %r8			# 8 - len
+	jb	.L16
+	movq	0(%rsi), %rax		# rax = a[0]
+	movq	8(%rsi), %r11		# prefetch a[1]
+	mulq	%rcx			# p = a[0] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 0(%rdi)		# r[0] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	16(%rsi), %r11		# prefetch a[2]
+	mulq	%rcx			# p = a[1] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 8(%rdi)		# r[1] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	24(%rsi), %r11		# prefetch a[3]
+	mulq	%rcx			# p = a[2] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 16(%rdi)		# r[2] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	32(%rsi), %r11		# prefetch a[4]
+	mulq	%rcx			# p = a[3] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 24(%rdi)		# r[3] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	40(%rsi), %r11		# prefetch a[5]
+	mulq	%rcx			# p = a[4] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 32(%rdi)		# r[4] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	48(%rsi), %r11		# prefetch a[6]
+	mulq	%rcx			# p = a[5] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 40(%rdi)		# r[5] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	56(%rsi), %r11		# prefetch a[7]
+	mulq	%rcx			# p = a[6] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 48(%rdi)		# r[6] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	mulq	%rcx			# p = a[7] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 56(%rdi)		# r[7] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	addq	$64, %rsi
+	addq	$64, %rdi
+	subq	$8, %r8
+
+	jz	.L17
+	jmp	.L15
+
+.L16:
+	movq	0(%rsi), %rax
+	mulq	%rcx			# p = a[0] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 0(%rdi)		# r[0] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	8(%rsi), %rax
+	mulq	%rcx			# p = a[1] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 8(%rdi)		# r[1] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	16(%rsi), %rax
+	mulq	%rcx			# p = a[2] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 16(%rdi)		# r[2] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	24(%rsi), %rax
+	mulq	%rcx			# p = a[3] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 24(%rdi)		# r[3] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	32(%rsi), %rax
+	mulq	%rcx			# p = a[4] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 32(%rdi)		# r[4] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	40(%rsi), %rax
+	mulq	%rcx			# p = a[5] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 40(%rdi)		# r[5] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	48(%rsi), %rax
+	mulq	%rcx			# p = a[6] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 48(%rdi)		# r[6] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+
+.L17:
+	movq	%r9, %rax
+	ret
+
+.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+
+# ------------------------------------------------------------------------
+#
+#  Implementation of s_mpv_mul_add_vec which exploits
+#  the 64X64->128 bit  unsigned multiply instruction.
+#
+# ------------------------------------------------------------------------
+
+# r += a * digit, r and a are vectors of length len
+# returns the carry digit
+# r and a are 64 bit aligned.
+#
+# uint64_t
+# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+#
+
+.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+
+	xorq	%rax, %rax		# if (len == 0) return (0)
+	testq	%rdx, %rdx
+	jz	.L27
+
+	movq	%rdx, %r8		# Use r8 for len; %rdx is used by mul
+	xorq	%r9, %r9		# cy = 0
+
+.L25:
+	cmpq	$8, %r8			# 8 - len
+	jb	.L26
+	movq	0(%rsi), %rax		# rax = a[0]
+	movq	0(%rdi), %r10		# r10 = r[0]
+	movq	8(%rsi), %r11		# prefetch a[1]
+	mulq	%rcx			# p = a[0] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[0]
+	movq	8(%rdi), %r10		# prefetch r[1]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 0(%rdi)		# r[0] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	16(%rsi), %r11		# prefetch a[2]
+	mulq	%rcx			# p = a[1] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[1]
+	movq	16(%rdi), %r10		# prefetch r[2]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 8(%rdi)		# r[1] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	24(%rsi), %r11		# prefetch a[3]
+	mulq	%rcx			# p = a[2] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[2]
+	movq	24(%rdi), %r10		# prefetch r[3]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 16(%rdi)		# r[2] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	32(%rsi), %r11		# prefetch a[4]
+	mulq	%rcx			# p = a[3] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[3]
+	movq	32(%rdi), %r10		# prefetch r[4]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 24(%rdi)		# r[3] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	40(%rsi), %r11		# prefetch a[5]
+	mulq	%rcx			# p = a[4] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[4]
+	movq	40(%rdi), %r10		# prefetch r[5]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 32(%rdi)		# r[4] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	48(%rsi), %r11		# prefetch a[6]
+	mulq	%rcx			# p = a[5] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[5]
+	movq	48(%rdi), %r10		# prefetch r[6]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 40(%rdi)		# r[5] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	movq	56(%rsi), %r11		# prefetch a[7]
+	mulq	%rcx			# p = a[6] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[6]
+	movq	56(%rdi), %r10		# prefetch r[7]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 48(%rdi)		# r[6] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	movq	%r11, %rax
+	mulq	%rcx			# p = a[7] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[7]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 56(%rdi)		# r[7] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+
+	addq	$64, %rsi
+	addq	$64, %rdi
+	subq	$8, %r8
+
+	jz	.L27
+	jmp	.L25
+
+.L26:
+	movq	0(%rsi), %rax
+	movq	0(%rdi), %r10
+	mulq	%rcx			# p = a[0] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[0]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 0(%rdi)		# r[0] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	8(%rsi), %rax
+	movq	8(%rdi), %r10
+	mulq	%rcx			# p = a[1] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[1]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 8(%rdi)		# r[1] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	16(%rsi), %rax
+	movq	16(%rdi), %r10
+	mulq	%rcx			# p = a[2] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[2]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 16(%rdi)		# r[2] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	24(%rsi), %rax
+	movq	24(%rdi), %r10
+	mulq	%rcx			# p = a[3] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[3]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 24(%rdi)		# r[3] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	32(%rsi), %rax
+	movq	32(%rdi), %r10
+	mulq	%rcx			# p = a[4] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[4]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 32(%rdi)		# r[4] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	40(%rsi), %rax
+	movq	40(%rdi), %r10
+	mulq	%rcx			# p = a[5] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[5]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 40(%rdi)		# r[5] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	48(%rsi), %rax
+	movq	48(%rdi), %r10
+	mulq	%rcx			# p = a[6] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		# p += r[6]
+	addq	%r9, %rax
+	adcq	$0, %rdx		# p += cy
+	movq	%rax, 48(%rdi)		# r[6] = lo(p)
+	movq	%rdx, %r9		# cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+
+.L27:
+	movq	%r9, %rax
+	ret
+        
+.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
+
+# Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
new file mode 100644
index 000000000..2120c18f9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
@@ -0,0 +1,388 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;
+; This code is converted from mpi_amd64_gas.asm for MASM for x64.
+;
+
+; ------------------------------------------------------------------------
+;
+;  Implementation of s_mpv_mul_set_vec which exploits
+;  the 64X64->128 bit  unsigned multiply instruction.
+;
+; ------------------------------------------------------------------------
+
+; r = a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+.CODE
+
+s_mpv_mul_set_vec64 PROC
+
+        ; compatibilities for paramenter registers
+        ;
+        ; About GAS and MASM, the usage of parameter registers are different.
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+        mov edx, r8d
+        mov rcx, r9
+
+        xor rax, rax
+        test rdx, rdx
+        jz L17
+        mov r8, rdx
+        xor r9, r9
+
+L15:
+        cmp r8, 8
+        jb  L16
+        mov rax, [rsi]
+        mov r11, [8+rsi]
+        mul rcx
+        add rax, r9
+        adc rdx, 0
+        mov [0+rdi], rax
+        mov r9, rdx
+        mov rax,r11
+        mov r11, [16+rsi]
+        mul rcx
+        add rax,r9
+        adc rdx,0
+        mov [8+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [24+rsi]
+        mul rcx
+        add rax,r9
+        adc rdx,0
+        mov [16+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [32+rsi]
+        mul rcx
+        add rax,r9
+        adc rdx,0
+        mov [24+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [40+rsi]
+        mul rcx
+        add rax,r9
+        adc rdx,0
+        mov [32+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [48+rsi]
+        mul rcx
+        add rax,r9
+        adc rdx,0
+        mov [40+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [56+rsi]
+        mul rcx
+        add rax,r9
+        adc rdx,0
+        mov [48+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mul rcx
+        add rax,r9
+        adc rdx,0
+        mov [56+rdi],rax
+        mov r9,rdx
+        add rsi, 64
+        add rdi, 64
+        sub r8, 8
+        jz L17
+        jmp L15
+
+L16:
+        mov rax, [0+rsi]
+        mul rcx
+        add rax, r9
+        adc rdx,0
+        mov [0+rdi],rax
+        mov r9,rdx
+        dec r8
+        jz L17
+        mov rax, [8+rsi]
+        mul rcx
+        add rax,r9
+        adc rdx,0
+        mov [8+rdi], rax
+        mov r9, rdx
+        dec r8
+        jz L17
+        mov rax, [16+rsi]
+        mul rcx
+        add rax, r9
+        adc rdx, 0
+        mov [16+rdi],rax
+        mov r9,rdx
+        dec r8
+        jz L17
+        mov rax, [24+rsi]
+        mul rcx
+        add rax, r9
+        adc rdx, 0
+        mov [24+rdi], rax
+        mov r9, rdx
+        dec r8
+        jz L17
+        mov rax, [32+rsi]
+        mul rcx
+        add rax, r9
+        adc rdx, 0
+        mov [32+rdi],rax
+        mov r9, rdx
+        dec r8
+        jz L17
+        mov rax, [40+rsi]
+        mul rcx
+        add rax, r9
+        adc rdx, 0
+        mov [40+rdi], rax
+        mov r9, rdx
+        dec r8
+        jz L17
+        mov rax, [48+rsi]
+        mul rcx
+        add rax, r9
+        adc rdx, 0
+        mov [48+rdi], rax
+        mov r9, rdx
+        dec r8
+        jz L17
+
+L17:
+        mov rax, r9
+        pop rsi
+        pop rdi
+        ret
+
+s_mpv_mul_set_vec64 ENDP
+
+
+;------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_add_vec which exploits
+; the 64X64->128 bit  unsigned multiply instruction.
+;
+;------------------------------------------------------------------------
+
+; r += a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+; 
+
+s_mpv_mul_add_vec64 PROC
+
+        ; compatibilities for paramenter registers
+        ;
+        ; About GAS and MASM, the usage of parameter registers are different.
+
+        push rdi
+        push rsi
+
+        mov rdi, rcx
+        mov rsi, rdx
+        mov edx, r8d
+        mov rcx, r9
+
+        xor rax, rax
+        test rdx, rdx
+        jz L27
+        mov r8, rdx
+        xor r9, r9
+
+L25:
+        cmp r8, 8
+        jb L26
+        mov rax, [0+rsi]
+        mov r10, [0+rdi]
+        mov r11, [8+rsi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        mov r10, [8+rdi]
+        add rax,r9
+        adc rdx,0
+        mov [0+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [16+rsi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        mov r10, [16+rdi]
+        add rax,r9
+        adc rdx,0
+        mov [8+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [24+rsi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        mov r10, [24+rdi]
+        add rax,r9
+        adc rdx,0
+        mov [16+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [32+rsi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        mov r10, [32+rdi]
+        add rax,r9
+        adc rdx,0
+        mov [24+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [40+rsi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        mov r10, [40+rdi]
+        add rax,r9
+        adc rdx,0
+        mov [32+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [48+rsi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        mov r10, [48+rdi]
+        add rax,r9
+        adc rdx,0
+        mov [40+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mov r11, [56+rsi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        mov r10, [56+rdi]
+        add rax,r9
+        adc rdx,0
+        mov [48+rdi],rax
+        mov r9,rdx
+        mov rax,r11
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        add rax,r9
+        adc rdx,0
+        mov [56+rdi],rax
+        mov r9,rdx
+        add rsi,64
+        add rdi,64
+        sub r8, 8
+        jz L27
+        jmp L25
+
+L26:
+        mov rax, [0+rsi]
+        mov r10, [0+rdi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        add rax,r9
+        adc rdx,0
+        mov [0+rdi],rax
+        mov r9,rdx
+        dec r8
+        jz L27
+        mov rax, [8+rsi]
+        mov r10, [8+rdi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        add rax,r9
+        adc rdx,0
+        mov [8+rdi],rax
+        mov r9,rdx
+        dec r8
+        jz L27
+        mov rax, [16+rsi]
+        mov r10, [16+rdi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        add rax,r9
+        adc rdx,0
+        mov [16+rdi],rax
+        mov r9,rdx
+        dec r8
+        jz L27
+        mov rax, [24+rsi]
+        mov r10, [24+rdi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        add rax,r9
+        adc rdx,0
+        mov [24+rdi],rax
+        mov r9,rdx
+        dec r8
+        jz L27
+        mov rax, [32+rsi]
+        mov r10, [32+rdi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        add rax,r9
+        adc rdx,0
+        mov [32+rdi],rax
+        mov r9,rdx
+        dec r8
+        jz L27
+        mov rax, [40+rsi]
+        mov r10, [40+rdi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        add rax,r9
+        adc rdx,0
+        mov [40+rdi],rax
+        mov r9,rdx
+        dec r8
+        jz L27
+        mov rax, [48+rsi]
+        mov r10, [48+rdi]
+        mul rcx
+        add rax,r10
+        adc rdx,0
+        add rax, r9
+        adc rdx, 0
+        mov [48+rdi], rax
+        mov r9, rdx
+        dec r8
+        jz L27
+
+L27:
+        mov rax, r9
+
+        pop rsi
+        pop rdi
+        ret
+
+s_mpv_mul_add_vec64 ENDP
+
+END
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
new file mode 100644
index 000000000..ddd5c40fd
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
@@ -0,0 +1,385 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+/ ------------------------------------------------------------------------
+/
+/  Implementation of s_mpv_mul_set_vec which exploits
+/  the 64X64->128 bit  unsigned multiply instruction.
+/
+/ ------------------------------------------------------------------------
+
+/ r = a * digit, r and a are vectors of length len
+/ returns the carry digit
+/ r and a are 64 bit aligned.
+/
+/ uint64_t
+/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+/
+
+.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+
+	xorq	%rax, %rax		/ if (len == 0) return (0)
+	testq	%rdx, %rdx
+	jz	.L17
+
+	movq	%rdx, %r8		/ Use r8 for len; %rdx is used by mul
+	xorq	%r9, %r9		/ cy = 0
+
+.L15:
+	cmpq	$8, %r8			/ 8 - len
+	jb	.L16
+	movq	0(%rsi), %rax		/ rax = a[0]
+	movq	8(%rsi), %r11		/ prefetch a[1]
+	mulq	%rcx			/ p = a[0] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 0(%rdi)		/ r[0] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	16(%rsi), %r11		/ prefetch a[2]
+	mulq	%rcx			/ p = a[1] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 8(%rdi)		/ r[1] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	24(%rsi), %r11		/ prefetch a[3]
+	mulq	%rcx			/ p = a[2] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 16(%rdi)		/ r[2] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	32(%rsi), %r11		/ prefetch a[4]
+	mulq	%rcx			/ p = a[3] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 24(%rdi)		/ r[3] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	40(%rsi), %r11		/ prefetch a[5]
+	mulq	%rcx			/ p = a[4] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 32(%rdi)		/ r[4] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	48(%rsi), %r11		/ prefetch a[6]
+	mulq	%rcx			/ p = a[5] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 40(%rdi)		/ r[5] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	56(%rsi), %r11		/ prefetch a[7]
+	mulq	%rcx			/ p = a[6] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 48(%rdi)		/ r[6] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	mulq	%rcx			/ p = a[7] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 56(%rdi)		/ r[7] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	addq	$64, %rsi
+	addq	$64, %rdi
+	subq	$8, %r8
+
+	jz	.L17
+	jmp	.L15
+
+.L16:
+	movq	0(%rsi), %rax
+	mulq	%rcx			/ p = a[0] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 0(%rdi)		/ r[0] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	8(%rsi), %rax
+	mulq	%rcx			/ p = a[1] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 8(%rdi)		/ r[1] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	16(%rsi), %rax
+	mulq	%rcx			/ p = a[2] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 16(%rdi)		/ r[2] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	24(%rsi), %rax
+	mulq	%rcx			/ p = a[3] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 24(%rdi)		/ r[3] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	32(%rsi), %rax
+	mulq	%rcx			/ p = a[4] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 32(%rdi)		/ r[4] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	40(%rsi), %rax
+	mulq	%rcx			/ p = a[5] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 40(%rdi)		/ r[5] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+	movq	48(%rsi), %rax
+	mulq	%rcx			/ p = a[6] * digit
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 48(%rdi)		/ r[6] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L17
+
+
+.L17:
+	movq	%r9, %rax
+	ret
+
+.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+
+/ ------------------------------------------------------------------------
+/
+/  Implementation of s_mpv_mul_add_vec which exploits
+/  the 64X64->128 bit  unsigned multiply instruction.
+/
+/ ------------------------------------------------------------------------
+
+/ r += a * digit, r and a are vectors of length len
+/ returns the carry digit
+/ r and a are 64 bit aligned.
+/
+/ uint64_t
+/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+/
+
+.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+
+	xorq	%rax, %rax		/ if (len == 0) return (0)
+	testq	%rdx, %rdx
+	jz	.L27
+
+	movq	%rdx, %r8		/ Use r8 for len; %rdx is used by mul
+	xorq	%r9, %r9		/ cy = 0
+
+.L25:
+	cmpq	$8, %r8			/ 8 - len
+	jb	.L26
+	movq	0(%rsi), %rax		/ rax = a[0]
+	movq	0(%rdi), %r10		/ r10 = r[0]
+	movq	8(%rsi), %r11		/ prefetch a[1]
+	mulq	%rcx			/ p = a[0] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[0]
+	movq	8(%rdi), %r10		/ prefetch r[1]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 0(%rdi)		/ r[0] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	16(%rsi), %r11		/ prefetch a[2]
+	mulq	%rcx			/ p = a[1] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[1]
+	movq	16(%rdi), %r10		/ prefetch r[2]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 8(%rdi)		/ r[1] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	24(%rsi), %r11		/ prefetch a[3]
+	mulq	%rcx			/ p = a[2] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[2]
+	movq	24(%rdi), %r10		/ prefetch r[3]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 16(%rdi)		/ r[2] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	32(%rsi), %r11		/ prefetch a[4]
+	mulq	%rcx			/ p = a[3] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[3]
+	movq	32(%rdi), %r10		/ prefetch r[4]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 24(%rdi)		/ r[3] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	40(%rsi), %r11		/ prefetch a[5]
+	mulq	%rcx			/ p = a[4] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[4]
+	movq	40(%rdi), %r10		/ prefetch r[5]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 32(%rdi)		/ r[4] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	48(%rsi), %r11		/ prefetch a[6]
+	mulq	%rcx			/ p = a[5] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[5]
+	movq	48(%rdi), %r10		/ prefetch r[6]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 40(%rdi)		/ r[5] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	movq	56(%rsi), %r11		/ prefetch a[7]
+	mulq	%rcx			/ p = a[6] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[6]
+	movq	56(%rdi), %r10		/ prefetch r[7]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 48(%rdi)		/ r[6] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	movq	%r11, %rax
+	mulq	%rcx			/ p = a[7] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[7]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 56(%rdi)		/ r[7] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+
+	addq	$64, %rsi
+	addq	$64, %rdi
+	subq	$8, %r8
+
+	jz	.L27
+	jmp	.L25
+
+.L26:
+	movq	0(%rsi), %rax
+	movq	0(%rdi), %r10
+	mulq	%rcx			/ p = a[0] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[0]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 0(%rdi)		/ r[0] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	8(%rsi), %rax
+	movq	8(%rdi), %r10
+	mulq	%rcx			/ p = a[1] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[1]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 8(%rdi)		/ r[1] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	16(%rsi), %rax
+	movq	16(%rdi), %r10
+	mulq	%rcx			/ p = a[2] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[2]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 16(%rdi)		/ r[2] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	24(%rsi), %rax
+	movq	24(%rdi), %r10
+	mulq	%rcx			/ p = a[3] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[3]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 24(%rdi)		/ r[3] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	32(%rsi), %rax
+	movq	32(%rdi), %r10
+	mulq	%rcx			/ p = a[4] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[4]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 32(%rdi)		/ r[4] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	40(%rsi), %rax
+	movq	40(%rdi), %r10
+	mulq	%rcx			/ p = a[5] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[5]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 40(%rdi)		/ r[5] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+	movq	48(%rsi), %rax
+	movq	48(%rdi), %r10
+	mulq	%rcx			/ p = a[6] * digit
+	addq	%r10, %rax
+	adcq	$0, %rdx		/ p += r[6]
+	addq	%r9, %rax
+	adcq	$0, %rdx		/ p += cy
+	movq	%rax, 48(%rdi)		/ r[6] = lo(p)
+	movq	%rdx, %r9		/ cy = hi(p)
+	decq	%r8
+	jz	.L27
+
+
+.L27:
+	movq	%r9, %rax
+	ret
+        
+.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
diff --git a/security/nss/lib/freebl/mpi/mpi_arm.c b/security/nss/lib/freebl/mpi/mpi_arm.c
new file mode 100644
index 000000000..b5139f28d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_arm.c
@@ -0,0 +1,175 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This inlined version is for 32-bit ARM platform only */
+
+#if !defined(__arm__)
+#error "This is for ARM only"
+#endif
+
+/* 16-bit thumb doesn't work inlined assember version */
+#if (!defined(__thumb__) || defined(__thumb2__)) && !defined(__ARM_ARCH_3__)
+
+#include "mpi-priv.h"
+
+#ifdef MP_ASSEMBLY_MULTIPLY
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    __asm__ __volatile__(
+        "mov     r5, #0\n"
+#ifdef __thumb2__
+        "cbz     %1, 2f\n"
+#else
+        "cmp     %1, r5\n" /* r5 is 0 now */
+        "beq     2f\n"
+#endif
+
+        "1:\n"
+        "mov     r4, #0\n"
+        "ldr     r6, [%0], #4\n"
+        "umlal   r5, r4, r6, %2\n"
+        "str     r5, [%3], #4\n"
+        "mov     r5, r4\n"
+
+        "subs    %1, #1\n"
+        "bne     1b\n"
+
+        "2:\n"
+        "str     r5, [%3]\n"
+        :
+        : "r"(a), "r"(a_len), "r"(b), "r"(c)
+        : "memory", "cc", "%r4", "%r5", "%r6");
+}
+
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    __asm__ __volatile__(
+        "mov     r5, #0\n"
+#ifdef __thumb2__
+        "cbz     %1, 2f\n"
+#else
+        "cmp     %1, r5\n" /* r5 is 0 now */
+        "beq     2f\n"
+#endif
+
+        "1:\n"
+        "mov     r4, #0\n"
+        "ldr     r6, [%3]\n"
+        "adds    r5, r6\n"
+        "adc     r4, r4, #0\n"
+
+        "ldr     r6, [%0], #4\n"
+        "umlal   r5, r4, r6, %2\n"
+        "str     r5, [%3], #4\n"
+        "mov     r5, r4\n"
+
+        "subs    %1, #1\n"
+        "bne     1b\n"
+
+        "2:\n"
+        "str     r5, [%3]\n"
+        :
+        : "r"(a), "r"(a_len), "r"(b), "r"(c)
+        : "memory", "cc", "%r4", "%r5", "%r6");
+}
+
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    if (!a_len)
+        return;
+
+    __asm__ __volatile__(
+        "mov     r5, #0\n"
+
+        "1:\n"
+        "mov     r4, #0\n"
+        "ldr     r6, [%3]\n"
+        "adds    r5, r6\n"
+        "adc     r4, r4, #0\n"
+        "ldr     r6, [%0], #4\n"
+        "umlal   r5, r4, r6, %2\n"
+        "str     r5, [%3], #4\n"
+        "mov     r5, r4\n"
+
+        "subs    %1, #1\n"
+        "bne     1b\n"
+
+#ifdef __thumb2__
+        "cbz     r4, 3f\n"
+#else
+        "cmp     r4, #0\n"
+        "beq     3f\n"
+#endif
+
+        "2:\n"
+        "mov     r4, #0\n"
+        "ldr     r6, [%3]\n"
+        "adds    r5, r6\n"
+        "adc     r4, r4, #0\n"
+        "str     r5, [%3], #4\n"
+        "movs    r5, r4\n"
+        "bne     2b\n"
+
+        "3:\n"
+        :
+        : "r"(a), "r"(a_len), "r"(b), "r"(c)
+        : "memory", "cc", "%r4", "%r5", "%r6");
+}
+#endif
+
+#ifdef MP_ASSEMBLY_SQUARE
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+    if (!a_len)
+        return;
+
+    __asm__ __volatile__(
+        "mov     r3, #0\n"
+
+        "1:\n"
+        "mov     r4, #0\n"
+        "ldr     r6, [%0], #4\n"
+        "ldr     r5, [%2]\n"
+        "adds    r3, r5\n"
+        "adc     r4, r4, #0\n"
+        "umlal   r3, r4, r6, r6\n" /* w = r3:r4 */
+        "str     r3, [%2], #4\n"
+
+        "ldr     r5, [%2]\n"
+        "adds    r3, r4, r5\n"
+        "mov     r4, #0\n"
+        "adc     r4, r4, #0\n"
+        "str     r3, [%2], #4\n"
+        "mov     r3, r4\n"
+
+        "subs    %1, #1\n"
+        "bne     1b\n"
+
+#ifdef __thumb2__
+        "cbz     r3, 3f\n"
+#else
+        "cmp     r3, #0\n"
+        "beq     3f\n"
+#endif
+
+        "2:\n"
+        "mov     r4, #0\n"
+        "ldr     r5, [%2]\n"
+        "adds    r3, r5\n"
+        "adc     r4, r4, #0\n"
+        "str     r3, [%2], #4\n"
+        "movs    r3, r4\n"
+        "bne     2b\n"
+
+        "3:"
+        :
+        : "r"(pa), "r"(a_len), "r"(ps)
+        : "memory", "cc", "%r3", "%r4", "%r5", "%r6");
+}
+#endif
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_hp.c b/security/nss/lib/freebl/mpi/mpi_hp.c
new file mode 100644
index 000000000..0cea7685d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_hp.c
@@ -0,0 +1,81 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file contains routines that perform vector multiplication.  */
+
+#include "mpi-priv.h"
+#include <unistd.h>
+
+#include <stddef.h>
+/* #include <sys/systeminfo.h> */
+#include <strings.h>
+
+extern void multacc512(
+    int length,                   /* doublewords in multiplicand vector. */
+    const mp_digit *scalaraddr,   /* Address of scalar. */
+    const mp_digit *multiplicand, /* The multiplicand vector. */
+    mp_digit *result);            /* Where to accumulate the result. */
+
+extern void maxpy_little(
+    int length,                   /* doublewords in multiplicand vector. */
+    const mp_digit *scalaraddr,   /* Address of scalar. */
+    const mp_digit *multiplicand, /* The multiplicand vector. */
+    mp_digit *result);            /* Where to accumulate the result. */
+
+extern void add_diag_little(
+    int length,           /* doublewords in input vector. */
+    const mp_digit *root, /* The vector to square. */
+    mp_digit *result);    /* Where to accumulate the result. */
+
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+    add_diag_little(a_len, pa, ps);
+}
+
+#define MAX_STACK_DIGITS 258
+#define MULTACC512_LEN (512 / MP_DIGIT_BIT)
+#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little)
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    mp_digit x[MAX_STACK_DIGITS];
+    mp_digit *px = x;
+    size_t xSize = 0;
+
+    if (a == c) {
+        if (a_len > MAX_STACK_DIGITS) {
+            xSize = sizeof(mp_digit) * (a_len + 2);
+            px = malloc(xSize);
+            if (!px)
+                return;
+        }
+        memcpy(px, a, a_len * sizeof(*a));
+        a = px;
+    }
+    s_mp_setz(c, a_len + 1);
+    HP_MPY_ADD_FN(a_len, &b, a, c);
+    if (px != x && px) {
+        memset(px, 0, xSize);
+        free(px);
+    }
+}
+
+/* c += a * b, where a is a_len words long. */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    c[a_len] = 0; /* so carry propagation stops here. */
+    HP_MPY_ADD_FN(a_len, &b, a, c);
+}
+
+/* c += a * b, where a is y words long. */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
+                     mp_digit *c)
+{
+    HP_MPY_ADD_FN(a_len, &b, a, c);
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_i86pc.s b/security/nss/lib/freebl/mpi/mpi_i86pc.s
new file mode 100644
index 000000000..f80039659
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_i86pc.s
@@ -0,0 +1,313 @@
+/
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.text
+
+ /  ebp - 36:	caller's esi
+ /  ebp - 32:	caller's edi
+ /  ebp - 28:	
+ /  ebp - 24:	
+ /  ebp - 20:	
+ /  ebp - 16:	
+ /  ebp - 12:	
+ /  ebp - 8:	
+ /  ebp - 4:	
+ /  ebp + 0:	caller's ebp
+ /  ebp + 4:	return address
+ /  ebp + 8:	a	argument
+ /  ebp + 12:	a_len	argument
+ /  ebp + 16:	b	argument
+ /  ebp + 20:	c	argument
+ /  registers:
+ / 	eax:
+ /	ebx:	carry
+ /	ecx:	a_len
+ /	edx:
+ /	esi:	a ptr
+ /	edi:	c ptr
+.globl	s_mpv_mul_d
+.type	s_mpv_mul_d,@function
+s_mpv_mul_d:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		/ carry = 0
+    mov    12(%ebp),%ecx	/ ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     L2			/ jmp if a_len == 0
+    mov    8(%ebp),%esi		/ esi = a
+    cld
+L1:
+    lodsl			/ eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	/ edx = b
+    mull   %edx			/ edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		/ add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    %edx,%ebx		/ high half of product becomes next carry
+
+    stosl			/ [es:edi] = ax; edi += 4;
+    dec    %ecx			/ --a_len
+    jnz    L1			/ jmp if a_len != 0
+L2:
+    mov    %ebx,0(%edi)		/ *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ /  ebp - 36:	caller's esi
+ /  ebp - 32:	caller's edi
+ /  ebp - 28:	
+ /  ebp - 24:	
+ /  ebp - 20:	
+ /  ebp - 16:	
+ /  ebp - 12:	
+ /  ebp - 8:	
+ /  ebp - 4:	
+ /  ebp + 0:	caller's ebp
+ /  ebp + 4:	return address
+ /  ebp + 8:	a	argument
+ /  ebp + 12:	a_len	argument
+ /  ebp + 16:	b	argument
+ /  ebp + 20:	c	argument
+ /  registers:
+ / 	eax:
+ /	ebx:	carry
+ /	ecx:	a_len
+ /	edx:
+ /	esi:	a ptr
+ /	edi:	c ptr
+.globl	s_mpv_mul_d_add
+.type	s_mpv_mul_d_add,@function
+s_mpv_mul_d_add:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		/ carry = 0
+    mov    12(%ebp),%ecx	/ ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     L4			/ jmp if a_len == 0
+    mov    8(%ebp),%esi		/ esi = a
+    cld
+L3:
+    lodsl			/ eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	/ edx = b
+    mull   %edx			/ edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		/ add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx		/ add in current word from *c
+    add    %ebx,%eax		
+    adc    $0,%edx
+    mov    %edx,%ebx		/ high half of product becomes next carry
+
+    stosl			/ [es:edi] = ax; edi += 4;
+    dec    %ecx			/ --a_len
+    jnz    L3			/ jmp if a_len != 0
+L4:
+    mov    %ebx,0(%edi)		/ *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ /  ebp - 36:	caller's esi
+ /  ebp - 32:	caller's edi
+ /  ebp - 28:	
+ /  ebp - 24:	
+ /  ebp - 20:	
+ /  ebp - 16:	
+ /  ebp - 12:	
+ /  ebp - 8:	
+ /  ebp - 4:	
+ /  ebp + 0:	caller's ebp
+ /  ebp + 4:	return address
+ /  ebp + 8:	a	argument
+ /  ebp + 12:	a_len	argument
+ /  ebp + 16:	b	argument
+ /  ebp + 20:	c	argument
+ /  registers:
+ / 	eax:
+ /	ebx:	carry
+ /	ecx:	a_len
+ /	edx:
+ /	esi:	a ptr
+ /	edi:	c ptr
+.globl	s_mpv_mul_d_add_prop
+.type	s_mpv_mul_d_add_prop,@function
+s_mpv_mul_d_add_prop:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		/ carry = 0
+    mov    12(%ebp),%ecx	/ ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     L6			/ jmp if a_len == 0
+    cld
+    mov    8(%ebp),%esi		/ esi = a
+L5:
+    lodsl			/ eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	/ edx = b
+    mull   %edx			/ edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		/ add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx		/ add in current word from *c
+    add    %ebx,%eax		
+    adc    $0,%edx
+    mov    %edx,%ebx		/ high half of product becomes next carry
+
+    stosl			/ [es:edi] = ax; edi += 4;
+    dec    %ecx			/ --a_len
+    jnz    L5			/ jmp if a_len != 0
+L6:
+    cmp    $0,%ebx		/ is carry zero?
+    jz     L8
+    mov    0(%edi),%eax		/ add in current word from *c
+    add	   %ebx,%eax
+    stosl			/ [es:edi] = ax; edi += 4;
+    jnc    L8
+L7:
+    mov    0(%edi),%eax		/ add in current word from *c
+    adc	   $0,%eax
+    stosl			/ [es:edi] = ax; edi += 4;
+    jc     L7
+L8:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ /  ebp - 20:	caller's esi
+ /  ebp - 16:	caller's edi
+ /  ebp - 12:	
+ /  ebp - 8:	carry
+ /  ebp - 4:	a_len	local
+ /  ebp + 0:	caller's ebp
+ /  ebp + 4:	return address
+ /  ebp + 8:	pa	argument
+ /  ebp + 12:	a_len	argument
+ /  ebp + 16:	ps	argument
+ /  ebp + 20:	
+ /  registers:
+ / 	eax:
+ /	ebx:	carry
+ /	ecx:	a_len
+ /	edx:
+ /	esi:	a ptr
+ /	edi:	c ptr
+
+.globl	s_mpv_sqr_add_prop
+.type	s_mpv_sqr_add_prop,@function
+s_mpv_sqr_add_prop:
+     push   %ebp
+     mov    %esp,%ebp
+     sub    $12,%esp
+     push   %edi
+     push   %esi
+     push   %ebx
+     movl   $0,%ebx		/ carry = 0
+     mov    12(%ebp),%ecx	/ a_len
+     mov    16(%ebp),%edi	/ edi = ps
+     cmp    $0,%ecx
+     je     L11			/ jump if a_len == 0
+     cld
+     mov    8(%ebp),%esi	/ esi = pa
+L10:
+     lodsl			/ %eax = [ds:si]; si += 4;
+     mull   %eax
+
+     add    %ebx,%eax		/ add "carry"
+     adc    $0,%edx
+     mov    0(%edi),%ebx
+     add    %ebx,%eax		/ add low word from result
+     mov    4(%edi),%ebx
+     stosl			/ [es:di] = %eax; di += 4;
+     adc    %ebx,%edx		/ add high word from result
+     movl   $0,%ebx
+     mov    %edx,%eax
+     adc    $0,%ebx
+     stosl			/ [es:di] = %eax; di += 4;
+     dec    %ecx		/ --a_len
+     jnz    L10			/ jmp if a_len != 0
+L11:
+    cmp    $0,%ebx		/ is carry zero?
+    jz     L14
+    mov    0(%edi),%eax		/ add in current word from *c
+    add	   %ebx,%eax
+    stosl			/ [es:edi] = ax; edi += 4;
+    jnc    L14
+L12:
+    mov    0(%edi),%eax		/ add in current word from *c
+    adc	   $0,%eax
+    stosl			/ [es:edi] = ax; edi += 4;
+    jc     L12
+L14:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ /
+ / Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ / so its high bit is 1.   This code is from NSPR.
+ /
+ / mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ / 		          mp_digit *qp, mp_digit *rp)
+
+ /  esp +  0:   Caller's ebx
+ /  esp +  4:	return address
+ /  esp +  8:	Nhi	argument
+ /  esp + 12:	Nlo	argument
+ /  esp + 16:	divisor	argument
+ /  esp + 20:	qp	argument
+ /  esp + 24:   rp	argument
+ /  registers:
+ / 	eax:
+ /	ebx:	carry
+ /	ecx:	a_len
+ /	edx:
+ /	esi:	a ptr
+ /	edi:	c ptr
+ / 
+
+.globl	s_mpv_div_2dx1d
+.type	s_mpv_div_2dx1d,@function
+s_mpv_div_2dx1d:
+       push   %ebx
+       mov    8(%esp),%edx
+       mov    12(%esp),%eax
+       mov    16(%esp),%ebx
+       div    %ebx
+       mov    20(%esp),%ebx
+       mov    %eax,0(%ebx)
+       mov    24(%esp),%ebx
+       mov    %edx,0(%ebx)
+       xor    %eax,%eax		/ return zero
+       pop    %ebx
+       ret    
+       nop
+  
diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s
new file mode 100644
index 000000000..455792bbb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_mips.s
@@ -0,0 +1,472 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include <regdef.h>
+        .set    noreorder
+        .set    noat
+
+        .section        .text, 1, 0x00000006, 4, 4
+.text:
+        .section        .text
+
+        .ent    s_mpv_mul_d_add
+        .globl  s_mpv_mul_d_add
+
+s_mpv_mul_d_add: 
+ #/* c += a * b */
+ #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, 
+ #			      mp_digit *c)
+ #{
+ #  mp_digit   a0, a1;	regs a4, a5
+ #  mp_digit   c0, c1;  regs a6, a7
+ #  mp_digit   cy = 0;  reg t2
+ #  mp_word    w0, w1;  regs t0, t1
+ #
+ #  if (a_len) {
+	beq	a1,zero,.L.1
+	move	t2,zero		# cy = 0
+	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
+	dsrl32	a2,a2,0		# This clears the upper 32 bits.
+ #    a0 = a[0];
+	lwu	a4,0(a0)
+ #    w0 = ((mp_word)b * a0);
+	dmultu	a2,a4
+ #    if (--a_len) {
+	addiu	a1,a1,-1
+	beq	a1,zero,.L.2
+ #      while (a_len >= 2) {
+	sltiu	t3,a1,2
+	bne	t3,zero,.L.3
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+.L.4:
+ #	  a_len -= 2;
+        addiu	a1,a1,-2
+ #	  c0     = c[0];
+	lwu	a6,0(a3)
+ #	  w0    += cy;
+	mflo	t0
+	daddu	t0,t0,t2
+ #	  w0    += c0;
+	daddu	t0,t0,a6
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5			#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  a0     = a[2];
+	lwu	a4,8(a0)
+ #	  a     += 2;
+	addiu	a0,a0,8
+ #	  c1     = c[1];
+	lwu	a7,4(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  w1    += c1;
+	daddu	t1,t1,a7
+ #	  w0     = (mp_word)b * a0;
+	dmultu	a2,a4			#
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  c     += 2;
+	addiu	a3,a3,8
+	sltiu	t3,a1,2
+	beq	t3,zero,.L.4
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+ #      }
+.L.3:
+ #      c0       = c[0];
+	lwu	a6,0(a3)
+ #      w0      += cy;
+ #      if (a_len) {
+	mflo	t0
+	beq	a1,zero,.L.5
+	daddu	t0,t0,t2
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5
+ #	  w0    += c0;
+	daddu	t0,t0,a6		#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  c1     = c[1];
+	lwu	a7,4(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  w1    += c1;
+	daddu	t1,t1,a7
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c     += 1;
+	b	.L.6
+	addiu	a3,a3,4
+ #      } else {
+.L.5:
+ #	  w0    += c0;
+	daddu	t0,t0,a6
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  cy     = CARRYOUT(w0);
+	b	.L.6
+	dsrl32	t2,t0,0
+ #      }
+ #    } else {
+.L.2:
+ #      c0     = c[0];
+	lwu	a6,0(a3)
+ #      w0    += c0;
+	mflo	t0
+	daddu	t0,t0,a6
+ #      c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #      cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #    }
+.L.6:
+ #    c[1] = cy;
+	jr	ra
+	sw	t2,4(a3)
+ #  }
+.L.1:
+	jr	ra
+	nop
+ #}
+ #
+        .end    s_mpv_mul_d_add
+
+        .ent    s_mpv_mul_d_add_prop
+        .globl  s_mpv_mul_d_add_prop
+
+s_mpv_mul_d_add_prop: 
+ #/* c += a * b */
+ #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, 
+ #			      mp_digit *c)
+ #{
+ #  mp_digit   a0, a1;	regs a4, a5
+ #  mp_digit   c0, c1;  regs a6, a7
+ #  mp_digit   cy = 0;  reg t2
+ #  mp_word    w0, w1;  regs t0, t1
+ #
+ #  if (a_len) {
+	beq	a1,zero,.M.1
+	move	t2,zero		# cy = 0
+	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
+	dsrl32	a2,a2,0		# This clears the upper 32 bits.
+ #    a0 = a[0];
+	lwu	a4,0(a0)
+ #    w0 = ((mp_word)b * a0);
+	dmultu	a2,a4
+ #    if (--a_len) {
+	addiu	a1,a1,-1
+	beq	a1,zero,.M.2
+ #      while (a_len >= 2) {
+	sltiu	t3,a1,2
+	bne	t3,zero,.M.3
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+.M.4:
+ #	  a_len -= 2;
+        addiu	a1,a1,-2
+ #	  c0     = c[0];
+	lwu	a6,0(a3)
+ #	  w0    += cy;
+	mflo	t0
+	daddu	t0,t0,t2
+ #	  w0    += c0;
+	daddu	t0,t0,a6
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5			#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  a0     = a[2];
+	lwu	a4,8(a0)
+ #	  a     += 2;
+	addiu	a0,a0,8
+ #	  c1     = c[1];
+	lwu	a7,4(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  w1    += c1;
+	daddu	t1,t1,a7
+ #	  w0     = (mp_word)b * a0;
+	dmultu	a2,a4			#
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  c     += 2;
+	addiu	a3,a3,8
+	sltiu	t3,a1,2
+	beq	t3,zero,.M.4
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+ #      }
+.M.3:
+ #      c0       = c[0];
+	lwu	a6,0(a3)
+ #      w0      += cy;
+ #      if (a_len) {
+	mflo	t0
+	beq	a1,zero,.M.5
+	daddu	t0,t0,t2
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5
+ #	  w0    += c0;
+	daddu	t0,t0,a6		#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  c1     = c[1];
+	lwu	a7,4(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  w1    += c1;
+	daddu	t1,t1,a7
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c     += 1;
+	b	.M.6
+	addiu	a3,a3,8
+ #      } else {
+.M.5:
+ #	  w0    += c0;
+	daddu	t0,t0,a6
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+	b	.M.6
+	addiu	a3,a3,4
+ #      }
+ #    } else {
+.M.2:
+ #      c0     = c[0];
+	lwu	a6,0(a3)
+ #      w0    += c0;
+	mflo	t0
+	daddu	t0,t0,a6
+ #      c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #      cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+	addiu	a3,a3,4
+ #    }
+.M.6:
+
+ #    while (cy) {
+	beq	t2,zero,.M.1
+	nop
+.M.7:
+ #      mp_word w = (mp_word)*c + cy;
+	lwu	a6,0(a3)
+	daddu	t2,t2,a6
+ #      *c++ = ACCUM(w);
+	sw	t2,0(a3)
+ #      cy = CARRYOUT(w);
+	dsrl32	t2,t2,0
+	bne	t2,zero,.M.7
+	addiu	a3,a3,4
+
+ #  }
+.M.1:
+	jr	ra
+	nop
+ #}
+ #
+        .end    s_mpv_mul_d_add_prop
+
+        .ent    s_mpv_mul_d
+        .globl  s_mpv_mul_d
+
+s_mpv_mul_d: 
+ #/* c = a * b */
+ #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, 
+ #			      mp_digit *c)
+ #{
+ #  mp_digit   a0, a1;	regs a4, a5
+ #  mp_digit   cy = 0;  reg t2
+ #  mp_word    w0, w1;  regs t0, t1
+ #
+ #  if (a_len) {
+	beq	a1,zero,.N.1
+	move	t2,zero		# cy = 0
+	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
+	dsrl32	a2,a2,0		# This clears the upper 32 bits.
+ #    a0 = a[0];
+	lwu	a4,0(a0)
+ #    w0 = ((mp_word)b * a0);
+	dmultu	a2,a4
+ #    if (--a_len) {
+	addiu	a1,a1,-1
+	beq	a1,zero,.N.2
+ #      while (a_len >= 2) {
+	sltiu	t3,a1,2
+	bne	t3,zero,.N.3
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+.N.4:
+ #	  a_len -= 2;
+        addiu	a1,a1,-2
+ #	  w0    += cy;
+	mflo	t0
+	daddu	t0,t0,t2
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5	
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  a0     = a[2];
+	lwu	a4,8(a0)
+ #	  a     += 2;
+	addiu	a0,a0,8
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  w0     = (mp_word)b * a0;
+	dmultu	a2,a4	
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  c     += 2;
+	addiu	a3,a3,8
+	sltiu	t3,a1,2
+	beq	t3,zero,.N.4
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+ #      }
+.N.3:
+ #      w0      += cy;
+ #      if (a_len) {
+	mflo	t0
+	beq	a1,zero,.N.5
+	daddu	t0,t0,t2
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5			#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c     += 1;
+	b	.N.6
+	addiu	a3,a3,4
+ #      } else {
+.N.5:
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  cy     = CARRYOUT(w0);
+	b	.N.6
+	dsrl32	t2,t0,0
+ #      }
+ #    } else {
+.N.2:
+	mflo	t0
+ #      c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #      cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #    }
+.N.6:
+ #    c[1] = cy;
+	jr	ra
+	sw	t2,4(a3)
+ #  }
+.N.1:
+	jr	ra
+	nop
+ #}
+ #
+        .end    s_mpv_mul_d
+
+
+        .ent    s_mpv_sqr_add_prop
+        .globl  s_mpv_sqr_add_prop
+ #void   s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
+ #	registers
+ #	a0		*a
+ #	a1		a_len
+ #	a2		*sqr
+ #	a3		digit from *a, a_i
+ #	a4		square of digit from a
+ #	a5,a6		next 2 digits in sqr
+ #	a7,t0		carry 
+s_mpv_sqr_add_prop:
+	move	a7,zero
+	move	t0,zero
+	lwu	a3,0(a0)
+	addiu	a1,a1,-1	# --a_len
+	dmultu	a3,a3
+	beq	a1,zero,.P.3	# jump if we've already done the only sqr
+	addiu	a0,a0,4		# ++a
+.P.2:
+        lwu	a5,0(a2)
+        lwu	a6,4(a2)
+	addiu	a2,a2,8		# sqrs += 2;
+	dsll32	a6,a6,0
+	daddu	a5,a5,a6
+	lwu	a3,0(a0)
+	addiu	a0,a0,4		# ++a
+	mflo	a4
+	daddu	a6,a5,a4
+	sltu	a7,a6,a5	# a7 = a6 < a5	detect overflow
+	dmultu	a3,a3
+	daddu	a4,a6,t0
+	sltu	t0,a4,a6
+	add	t0,t0,a7
+	sw	a4,-8(a2)
+	addiu	a1,a1,-1	# --a_len
+	dsrl32	a4,a4,0
+	bne	a1,zero,.P.2	# loop if a_len > 0
+	sw	a4,-4(a2)
+.P.3:
+        lwu	a5,0(a2)
+        lwu	a6,4(a2)
+	addiu	a2,a2,8		# sqrs += 2;
+	dsll32	a6,a6,0
+	daddu	a5,a5,a6
+	mflo	a4
+	daddu	a6,a5,a4
+	sltu	a7,a6,a5	# a7 = a6 < a5	detect overflow
+	daddu	a4,a6,t0
+	sltu	t0,a4,a6
+	add	t0,t0,a7
+	sw	a4,-8(a2)
+	beq	t0,zero,.P.9	# jump if no carry
+	dsrl32	a4,a4,0
+.P.8:
+	sw	a4,-4(a2)
+	/* propagate final carry */
+	lwu	a5,0(a2)
+	daddu	a6,a5,t0
+	sltu	t0,a6,a5
+	bne	t0,zero,.P.8	# loop if carry persists
+	addiu	a2,a2,4		# sqrs++
+.P.9:
+	jr	ra
+	sw	a4,-4(a2)
+
+        .end    s_mpv_sqr_add_prop
diff --git a/security/nss/lib/freebl/mpi/mpi_sparc.c b/security/nss/lib/freebl/mpi/mpi_sparc.c
new file mode 100644
index 000000000..1e88357af
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_sparc.c
@@ -0,0 +1,226 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Multiplication performance enhancements for sparc v8+vis CPUs. */
+
+#include "mpi-priv.h"
+#include <stddef.h>
+#include <sys/systeminfo.h>
+#include <strings.h>
+
+/* In the functions below, */
+/* vector y must be 8-byte aligned, and n must be even */
+/* returns carry out of high order word of result */
+/* maximum n is 256 */
+
+/* vector x += vector y * scaler a; where y is of length n words. */
+extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
+
+/* vector z = vector x + vector y * scaler a; where y is of length n words. */
+extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
+                        int n, mp_digit a);
+
+/* v8 versions of these functions run on any Sparc v8 CPU. */
+
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_MUL_DxD(a, b, Phi, Plo)                              \
+    {                                                           \
+        unsigned long long product = (unsigned long long)a * b; \
+        Plo = (mp_digit)product;                                \
+        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \
+    }
+
+/* c = a * b */
+static void
+v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+    mp_digit d = 0;
+
+    /* Inner product:  Digits of a */
+    while (a_len--) {
+        mp_word w = ((mp_word)b * *a++) + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+    *c = d;
+#else
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        if (a0b0 < carry)
+            ++a1b1;
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+    *c = carry;
+#endif
+}
+
+/* c += a * b */
+static void
+v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+    mp_digit d = 0;
+
+    /* Inner product:  Digits of a */
+    while (a_len--) {
+        mp_word w = ((mp_word)b * *a++) + *c + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+    *c = d;
+#else
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        if (a0b0 < carry)
+            ++a1b1;
+        a0b0 += a_i = *c;
+        if (a0b0 < a_i)
+            ++a1b1;
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+    *c = carry;
+#endif
+}
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+static void
+v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+    mp_digit d = 0;
+
+    /* Inner product:  Digits of a */
+    while (a_len--) {
+        mp_word w = ((mp_word)b * *a++) + *c + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+
+    while (d) {
+        mp_word w = (mp_word)*c + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+#else
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        if (a0b0 < carry)
+            ++a1b1;
+
+        a0b0 += a_i = *c;
+        if (a0b0 < a_i)
+            ++a1b1;
+
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+    while (carry) {
+        mp_digit c_i = *c;
+        carry += c_i;
+        *c++ = carry;
+        carry = carry < c_i;
+    }
+#endif
+}
+
+/* These functions run only on v8plus+vis or v9+vis CPUs. */
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    mp_digit d;
+    mp_digit x[258];
+    if (a_len <= 256) {
+        if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+            mp_digit *px;
+            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+            memcpy(px, a, a_len * sizeof(*a));
+            a = px;
+            if (a_len & 1) {
+                px[a_len] = 0;
+            }
+        }
+        s_mp_setz(c, a_len + 1);
+        d = mul_add_inp(c, a, a_len, b);
+        c[a_len] = d;
+    } else {
+        v8_mpv_mul_d(a, a_len, b, c);
+    }
+}
+
+/* c += a * b, where a is a_len words long. */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    mp_digit d;
+    mp_digit x[258];
+    if (a_len <= 256) {
+        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+            mp_digit *px;
+            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+            memcpy(px, a, a_len * sizeof(*a));
+            a = px;
+            if (a_len & 1) {
+                px[a_len] = 0;
+            }
+        }
+        d = mul_add_inp(c, a, a_len, b);
+        c[a_len] = d;
+    } else {
+        v8_mpv_mul_d_add(a, a_len, b, c);
+    }
+}
+
+/* c += a * b, where a is y words long. */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    mp_digit d;
+    mp_digit x[258];
+    if (a_len <= 256) {
+        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+            mp_digit *px;
+            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+            memcpy(px, a, a_len * sizeof(*a));
+            a = px;
+            if (a_len & 1) {
+                px[a_len] = 0;
+            }
+        }
+        d = mul_add_inp(c, a, a_len, b);
+        if (d) {
+            c += a_len;
+            do {
+                mp_digit sum = d + *c;
+                *c++ = sum;
+                d = sum < d;
+            } while (d);
+        }
+    } else {
+        v8_mpv_mul_d_add_prop(a, a_len, b, c);
+    }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_sse2.s b/security/nss/lib/freebl/mpi/mpi_sse2.s
new file mode 100644
index 000000000..16a47019c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_sse2.s
@@ -0,0 +1,294 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifdef DARWIN
+#define s_mpv_mul_d          _s_mpv_mul_d
+#define s_mpv_mul_d_add      _s_mpv_mul_d_add
+#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
+#define s_mpv_sqr_add_prop   _s_mpv_sqr_add_prop
+#define s_mpv_div_2dx1d      _s_mpv_div_2dx1d
+#define TYPE_FUNCTION(x)
+#else
+#define TYPE_FUNCTION(x) .type x, @function
+#endif
+
+.text
+
+ #  ebp - 8:    caller's esi
+ #  ebp - 4:    caller's edi
+ #  ebp + 0:    caller's ebp
+ #  ebp + 4:    return address
+ #  ebp + 8:    a       argument
+ #  ebp + 12:   a_len   argument
+ #  ebp + 16:   b       argument
+ #  ebp + 20:   c       argument
+ #  registers:
+ #      ebx:
+ #      ecx:    a_len
+ #      esi:    a ptr
+ #      edi:    c ptr
+.globl s_mpv_mul_d
+.private_extern s_mpv_mul_d
+TYPE_FUNCTION(s_mpv_mul_d)
+s_mpv_mul_d:
+    push   %ebp
+    mov    %esp, %ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2, %mm2           # carry = 0
+    mov    12(%ebp), %ecx       # ecx = a_len
+    movd   16(%ebp), %mm1       # mm1 = b
+    mov    20(%ebp), %edi
+    cmp    $0, %ecx
+    je     2f                   # jmp if a_len == 0
+    mov    8(%ebp), %esi        # esi = a
+    cld
+1:
+    movd   0(%esi), %mm0        # mm0 = *a++
+    add    $4, %esi
+    pmuludq %mm1, %mm0          # mm0 = b * *a++
+    paddq  %mm0, %mm2           # add the carry
+    movd   %mm2, 0(%edi)        # store the 32bit result
+    add    $4, %edi
+    psrlq  $32, %mm2            # save the carry
+    dec    %ecx                 # --a_len
+    jnz    1b                   # jmp if a_len != 0
+2:
+    movd   %mm2, 0(%edi)        # *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 8:    caller's esi
+ #  ebp - 4:    caller's edi
+ #  ebp + 0:    caller's ebp
+ #  ebp + 4:    return address
+ #  ebp + 8:    a       argument
+ #  ebp + 12:   a_len   argument
+ #  ebp + 16:   b       argument
+ #  ebp + 20:   c       argument
+ #  registers:
+ #      ebx:
+ #      ecx:    a_len
+ #      esi:    a ptr
+ #      edi:    c ptr
+.globl s_mpv_mul_d_add
+.private_extern s_mpv_mul_d_add
+TYPE_FUNCTION(s_mpv_mul_d_add)
+s_mpv_mul_d_add:
+    push   %ebp
+    mov    %esp, %ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2, %mm2           # carry = 0
+    mov    12(%ebp), %ecx       # ecx = a_len
+    movd   16(%ebp), %mm1       # mm1 = b
+    mov    20(%ebp), %edi
+    cmp    $0, %ecx
+    je     2f                   # jmp if a_len == 0
+    mov    8(%ebp), %esi        # esi = a
+    cld
+1:
+    movd   0(%esi), %mm0        # mm0 = *a++
+    add    $4, %esi
+    pmuludq %mm1, %mm0          # mm0 = b * *a++
+    paddq  %mm0, %mm2           # add the carry
+    movd   0(%edi), %mm0
+    paddq  %mm0, %mm2           # add the carry
+    movd   %mm2, 0(%edi)        # store the 32bit result
+    add    $4, %edi
+    psrlq  $32, %mm2            # save the carry
+    dec    %ecx                 # --a_len
+    jnz    1b                   # jmp if a_len != 0
+2:
+    movd   %mm2, 0(%edi)        # *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 12:   caller's ebx
+ #  ebp - 8:    caller's esi
+ #  ebp - 4:    caller's edi
+ #  ebp + 0:    caller's ebp
+ #  ebp + 4:    return address
+ #  ebp + 8:    a       argument
+ #  ebp + 12:   a_len   argument
+ #  ebp + 16:   b       argument
+ #  ebp + 20:   c       argument
+ #  registers:
+ #      eax:
+ #      ebx:    carry
+ #      ecx:    a_len
+ #      esi:    a ptr
+ #      edi:    c ptr
+.globl s_mpv_mul_d_add_prop
+.private_extern s_mpv_mul_d_add_prop
+TYPE_FUNCTION(s_mpv_mul_d_add_prop)
+s_mpv_mul_d_add_prop:
+    push   %ebp
+    mov    %esp, %ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2, %mm2           # carry = 0
+    mov    12(%ebp), %ecx       # ecx = a_len
+    movd   16(%ebp), %mm1       # mm1 = b
+    mov    20(%ebp), %edi
+    cmp    $0, %ecx
+    je     2f                   # jmp if a_len == 0
+    mov    8(%ebp), %esi        # esi = a
+    cld
+1:
+    movd   0(%esi), %mm0        # mm0 = *a++
+    movd   0(%edi), %mm3        # fetch the sum
+    add    $4, %esi
+    pmuludq %mm1, %mm0          # mm0 = b * *a++
+    paddq  %mm0, %mm2           # add the carry
+    paddq  %mm3, %mm2           # add *c++
+    movd   %mm2, 0(%edi)        # store the 32bit result
+    add    $4, %edi
+    psrlq  $32, %mm2            # save the carry
+    dec    %ecx                 # --a_len
+    jnz    1b                   # jmp if a_len != 0
+2:
+    movd   %mm2, %ebx
+    cmp    $0, %ebx             # is carry zero?
+    jz     4f
+    mov    0(%edi), %eax
+    add    %ebx, %eax
+    stosl
+    jnc    4f
+3:
+    mov    0(%edi), %eax        # add in current word from *c
+    adc    $0, %eax
+    stosl                       # [es:edi] = ax; edi += 4;
+    jc     3b
+4:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 12:   caller's ebx
+ #  ebp - 8:    caller's esi
+ #  ebp - 4:    caller's edi
+ #  ebp + 0:    caller's ebp
+ #  ebp + 4:    return address
+ #  ebp + 8:    pa      argument
+ #  ebp + 12:   a_len   argument
+ #  ebp + 16:   ps      argument
+ #  registers:
+ #      eax:
+ #      ebx:    carry
+ #      ecx:    a_len
+ #      esi:    a ptr
+ #      edi:    c ptr
+.globl s_mpv_sqr_add_prop
+.private_extern s_mpv_sqr_add_prop
+TYPE_FUNCTION(s_mpv_sqr_add_prop)
+s_mpv_sqr_add_prop:
+    push   %ebp
+    mov    %esp, %ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2, %mm2           # carry = 0
+    mov    12(%ebp), %ecx       # ecx = a_len
+    mov    16(%ebp), %edi
+    cmp    $0, %ecx
+    je     2f                   # jmp if a_len == 0
+    mov    8(%ebp), %esi        # esi = a
+    cld
+1:
+    movd   0(%esi), %mm0        # mm0 = *a
+    movd   0(%edi), %mm3        # fetch the sum
+    add    $4, %esi
+    pmuludq %mm0, %mm0          # mm0 = sqr(a)
+    paddq  %mm0, %mm2           # add the carry
+    paddq  %mm3, %mm2           # add the low word
+    movd   4(%edi), %mm3
+    movd   %mm2, 0(%edi)        # store the 32bit result
+    psrlq  $32, %mm2
+    paddq  %mm3, %mm2           # add the high word
+    movd   %mm2, 4(%edi)        # store the 32bit result
+    psrlq  $32, %mm2            # save the carry.
+    add    $8, %edi
+    dec    %ecx                 # --a_len
+    jnz    1b                   # jmp if a_len != 0
+2:
+    movd   %mm2, %ebx
+    cmp    $0, %ebx             # is carry zero?
+    jz     4f
+    mov    0(%edi), %eax
+    add    %ebx, %eax
+    stosl
+    jnc    4f
+3:
+    mov    0(%edi), %eax        # add in current word from *c
+    adc    $0, %eax
+    stosl                       #  [es:edi] = ax; edi += 4;
+    jc     3b
+4:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1.   This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ #                        mp_digit *qp, mp_digit *rp)
+
+ #  esp +  0:   Caller's ebx
+ #  esp +  4:   return address
+ #  esp +  8:   Nhi     argument
+ #  esp + 12:   Nlo     argument
+ #  esp + 16:   divisor argument
+ #  esp + 20:   qp      argument
+ #  esp + 24:   rp      argument
+ #  registers:
+ #      eax:
+ #      ebx:    carry
+ #      ecx:    a_len
+ #      edx:
+ #      esi:    a ptr
+ #      edi:    c ptr
+ # 
+.globl s_mpv_div_2dx1d
+.private_extern s_mpv_div_2dx1d
+TYPE_FUNCTION(s_mpv_div_2dx1d)
+s_mpv_div_2dx1d:
+       push   %ebx
+       mov    8(%esp), %edx
+       mov    12(%esp), %eax
+       mov    16(%esp), %ebx
+       div    %ebx
+       mov    20(%esp), %ebx
+       mov    %eax, 0(%ebx)
+       mov    24(%esp), %ebx
+       mov    %edx, 0(%ebx)
+       xor    %eax, %eax        # return zero
+       pop    %ebx
+       ret    
+       nop
+
+#ifndef DARWIN
+ # Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_x86.s b/security/nss/lib/freebl/mpi/mpi_x86.s
new file mode 100644
index 000000000..8f7e2130c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86.s
@@ -0,0 +1,541 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call s_mpi_is_sse to determine if we support sse 
+ #    instructions.
+ #  0 means to use x86 instructions
+ #  1 means to use sse2 instructions
+.type	is_sse,@object
+.size	is_sse,4
+is_sse: .long	-1 
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+.ifndef NO_PIC
+.macro GET   var,reg
+    movl   \var@GOTOFF(%ebx),\reg
+.endm
+.macro PUT   reg,var
+    movl   \reg,\var@GOTOFF(%ebx)
+.endm
+.else
+.macro GET   var,reg
+    movl   \var,\reg
+.endm
+.macro PUT   reg,var
+    movl   \reg,\var
+.endm
+.endif
+
+.text
+
+
+ #  ebp - 36:	caller's esi
+ #  ebp - 32:	caller's edi
+ #  ebp - 28:	
+ #  ebp - 24:	
+ #  ebp - 20:	
+ #  ebp - 16:	
+ #  ebp - 12:	
+ #  ebp - 8:	
+ #  ebp - 4:	
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	s_mpv_mul_d
+.type	s_mpv_mul_d,@function
+s_mpv_mul_d:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     s_mpv_mul_d_x86
+    jg     s_mpv_mul_d_sse2
+    call   s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     2f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+1:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    1b			# jmp if a_len != 0
+2:
+    mov    %ebx,0(%edi)		# *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+s_mpv_mul_d_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     6f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+5:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    5b			# jmp if a_len != 0
+6:
+    movd   %mm2,0(%edi)		# *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 36:	caller's esi
+ #  ebp - 32:	caller's edi
+ #  ebp - 28:	
+ #  ebp - 24:	
+ #  ebp - 20:	
+ #  ebp - 16:	
+ #  ebp - 12:	
+ #  ebp - 8:	
+ #  ebp - 4:	
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	s_mpv_mul_d_add
+.type	s_mpv_mul_d_add,@function
+s_mpv_mul_d_add:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     s_mpv_mul_d_add_x86
+    jg     s_mpv_mul_d_add_sse2
+    call   s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     11f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+10:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx		# add in current word from *c
+    add    %ebx,%eax		
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    10b			# jmp if a_len != 0
+11:
+    mov    %ebx,0(%edi)		# *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+s_mpv_mul_d_add_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     16f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+15:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    movd   0(%edi),%mm0
+    paddq  %mm0,%mm2            # add the carry
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    15b			# jmp if a_len != 0
+16:
+    movd   %mm2,0(%edi)		# *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 8:	caller's esi
+ #  ebp - 4:	caller's edi
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	s_mpv_mul_d_add_prop
+.type	s_mpv_mul_d_add_prop,@function
+s_mpv_mul_d_add_prop:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     s_mpv_mul_d_add_prop_x86
+    jg     s_mpv_mul_d_add_prop_sse2
+    call   s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     21f			# jmp if a_len == 0
+    cld
+    mov    8(%ebp),%esi		# esi = a
+20:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx		# add in current word from *c
+    add    %ebx,%eax		
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    20b			# jmp if a_len != 0
+21:
+    cmp    $0,%ebx		# is carry zero?
+    jz     23f
+    mov    0(%edi),%eax		# add in current word from *c
+    add	   %ebx,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jnc    23f
+22:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     22b
+23:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+s_mpv_mul_d_add_prop_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     26f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+25:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    movd   0(%edi),%mm3		# fetch the sum
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    paddq  %mm3,%mm2            # add *c++
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    25b			# jmp if a_len != 0
+26:
+    movd   %mm2,%ebx
+    cmp    $0,%ebx		# is carry zero?
+    jz     28f
+    mov    0(%edi),%eax
+    add    %ebx, %eax
+    stosl
+    jnc    28f
+27:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     27b
+28:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+
+ #  ebp - 20:	caller's esi
+ #  ebp - 16:	caller's edi
+ #  ebp - 12:	
+ #  ebp - 8:	carry
+ #  ebp - 4:	a_len	local
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	pa	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	ps	argument
+ #  ebp + 20:	
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+
+.globl	s_mpv_sqr_add_prop
+.type	s_mpv_sqr_add_prop,@function
+s_mpv_sqr_add_prop:
+     GET   is_sse,%eax
+     cmp    $0,%eax
+     je     s_mpv_sqr_add_prop_x86
+     jg     s_mpv_sqr_add_prop_sse2
+     call   s_mpi_is_sse2
+     PUT    %eax,is_sse
+     cmp    $0,%eax
+     jg     s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+     push   %ebp
+     mov    %esp,%ebp
+     sub    $12,%esp
+     push   %edi
+     push   %esi
+     push   %ebx
+     movl   $0,%ebx		# carry = 0
+     mov    12(%ebp),%ecx	# a_len
+     mov    16(%ebp),%edi	# edi = ps
+     cmp    $0,%ecx
+     je     31f			# jump if a_len == 0
+     cld
+     mov    8(%ebp),%esi	# esi = pa
+30:
+     lodsl			# %eax = [ds:si]; si += 4;
+     mull   %eax
+
+     add    %ebx,%eax		# add "carry"
+     adc    $0,%edx
+     mov    0(%edi),%ebx
+     add    %ebx,%eax		# add low word from result
+     mov    4(%edi),%ebx
+     stosl			# [es:di] = %eax; di += 4;
+     adc    %ebx,%edx		# add high word from result
+     movl   $0,%ebx
+     mov    %edx,%eax
+     adc    $0,%ebx
+     stosl			# [es:di] = %eax; di += 4;
+     dec    %ecx		# --a_len
+     jnz    30b			# jmp if a_len != 0
+31:
+    cmp    $0,%ebx		# is carry zero?
+    jz     34f
+    mov    0(%edi),%eax		# add in current word from *c
+    add	   %ebx,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jnc    34f
+32:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     32b
+34:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+s_mpv_sqr_add_prop_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    16(%ebp),%edi
+    cmp    $0,%ecx
+    je     36f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+35:
+    movd   0(%esi),%mm0        # mm0 = *a
+    movd   0(%edi),%mm3	       # fetch the sum
+    add	   $4,%esi
+    pmuludq %mm0,%mm0          # mm0 = sqr(a)
+    paddq  %mm0,%mm2           # add the carry
+    paddq  %mm3,%mm2           # add the low word
+    movd   4(%edi),%mm3
+    movd   %mm2,0(%edi)        # store the 32bit result
+    psrlq  $32, %mm2	
+    paddq  %mm3,%mm2           # add the high word
+    movd   %mm2,4(%edi)        # store the 32bit result
+    psrlq  $32, %mm2	       # save the carry.
+    add    $8,%edi
+    dec    %ecx			# --a_len
+    jnz    35b			# jmp if a_len != 0
+36:
+    movd   %mm2,%ebx
+    cmp    $0,%ebx		# is carry zero?
+    jz     38f
+    mov    0(%edi),%eax
+    add    %ebx, %eax
+    stosl
+    jnc    38f
+37:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     37b
+38:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1.   This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # 		          mp_digit *qp, mp_digit *rp)
+
+ #  esp +  0:   Caller's ebx
+ #  esp +  4:	return address
+ #  esp +  8:	Nhi	argument
+ #  esp + 12:	Nlo	argument
+ #  esp + 16:	divisor	argument
+ #  esp + 20:	qp	argument
+ #  esp + 24:   rp	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+ # 
+
+.globl	s_mpv_div_2dx1d
+.type	s_mpv_div_2dx1d,@function
+s_mpv_div_2dx1d:
+       push   %ebx
+       mov    8(%esp),%edx
+       mov    12(%esp),%eax
+       mov    16(%esp),%ebx
+       div    %ebx
+       mov    20(%esp),%ebx
+       mov    %eax,0(%ebx)
+       mov    24(%esp),%ebx
+       mov    %edx,0(%ebx)
+       xor    %eax,%eax		# return zero
+       pop    %ebx
+       ret    
+       nop
+  
+ # Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_asm.c b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
new file mode 100644
index 000000000..4faeef30c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
@@ -0,0 +1,531 @@
+/*
+ *  mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+
+static int is_sse = -1;
+extern unsigned long s_mpi_is_sse2();
+
+/*
+ *   ebp - 36:  caller's esi
+ *   ebp - 32:  caller's edi
+ *   ebp - 28:
+ *   ebp - 24:
+ *   ebp - 20:
+ *   ebp - 16:
+ *   ebp - 12:
+ *   ebp - 8:
+ *   ebp - 4:
+ *   ebp + 0:   caller's ebp
+ *   ebp + 4:   return address
+ *   ebp + 8:   a   argument
+ *   ebp + 12:  a_len   argument
+ *   ebp + 16:  b   argument
+ *   ebp + 20:  c   argument
+ *   registers:
+ *      eax:
+ *  ebx:    carry
+ *  ecx:    a_len
+ *  edx:
+ *  esi:    a ptr
+ *  edi:    c ptr
+ */
+__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    __asm {
+    mov    eax, is_sse
+    cmp    eax, 0
+    je     s_mpv_mul_d_x86
+    jg     s_mpv_mul_d_sse2
+    call   s_mpi_is_sse2
+    mov    is_sse, eax
+    cmp    eax, 0
+    jg     s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+    push   ebp
+    mov    ebp,esp
+    sub    esp,28
+    push   edi
+    push   esi
+    push   ebx
+    mov    ebx,0        ; carry = 0
+    mov    ecx,[ebp+12]     ; ecx = a_len
+    mov    edi,[ebp+20]
+    cmp    ecx,0
+    je     L_2          ; jmp if a_len == 0
+    mov    esi,[ebp+8]      ; esi = a
+    cld
+L_1:
+    lodsd           ; eax = [ds:esi]; esi += 4
+    mov    edx,[ebp+16]     ; edx = b
+    mul    edx          ; edx:eax = Phi:Plo = a_i * b
+
+    add    eax,ebx      ; add carry (ebx) to edx:eax
+    adc    edx,0
+    mov    ebx,edx      ; high half of product becomes next carry
+
+    stosd           ; [es:edi] = ax; edi += 4;
+    dec    ecx          ; --a_len
+    jnz    L_1          ; jmp if a_len != 0
+L_2:
+    mov    [edi],ebx        ; *c = carry
+    pop    ebx
+    pop    esi
+    pop    edi
+    leave
+    ret
+    nop
+s_mpv_mul_d_sse2:
+    push   ebp
+    mov    ebp, esp
+    push   edi
+    push   esi
+    psubq  mm2, mm2     ; carry = 0
+    mov    ecx, [ebp+12]    ; ecx = a_len
+    movd   mm1, [ebp+16]    ; mm1 = b
+    mov    edi, [ebp+20]
+    cmp    ecx, 0
+    je     L_6          ; jmp if a_len == 0
+    mov    esi, [ebp+8]     ; esi = a
+    cld
+L_5:
+    movd   mm0, [esi]       ; mm0 = *a++
+    add    esi, 4
+    pmuludq mm0, mm1        ; mm0 = b * *a++
+    paddq  mm2, mm0     ; add the carry
+    movd   [edi], mm2       ; store the 32bit result
+    add    edi, 4
+    psrlq  mm2, 32      ; save the carry
+    dec    ecx          ; --a_len
+    jnz    L_5          ; jmp if a_len != 0
+L_6:
+    movd   [edi], mm2       ; *c = carry
+    emms
+    pop    esi
+    pop    edi
+    leave
+    ret
+    nop
+    }
+}
+
+/*
+ *   ebp - 36:  caller's esi
+ *   ebp - 32:  caller's edi
+ *   ebp - 28:
+ *   ebp - 24:
+ *   ebp - 20:
+ *   ebp - 16:
+ *   ebp - 12:
+ *   ebp - 8:
+ *   ebp - 4:
+ *   ebp + 0:   caller's ebp
+ *   ebp + 4:   return address
+ *   ebp + 8:   a   argument
+ *   ebp + 12:  a_len   argument
+ *   ebp + 16:  b   argument
+ *   ebp + 20:  c   argument
+ *   registers:
+ *      eax:
+ *  ebx:    carry
+ *  ecx:    a_len
+ *  edx:
+ *  esi:    a ptr
+ *  edi:    c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    __asm {
+    mov    eax, is_sse
+    cmp    eax, 0
+    je     s_mpv_mul_d_add_x86
+    jg     s_mpv_mul_d_add_sse2
+    call   s_mpi_is_sse2
+    mov    is_sse, eax
+    cmp    eax, 0
+    jg     s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+    push   ebp
+    mov    ebp,esp
+    sub    esp,28
+    push   edi
+    push   esi
+    push   ebx
+    mov    ebx,0        ; carry = 0
+    mov    ecx,[ebp+12]     ; ecx = a_len
+    mov    edi,[ebp+20]
+    cmp    ecx,0
+    je     L_11         ; jmp if a_len == 0
+    mov    esi,[ebp+8]      ; esi = a
+    cld
+L_10:
+    lodsd           ; eax = [ds:esi]; esi += 4
+    mov    edx,[ebp+16]     ; edx = b
+    mul    edx          ; edx:eax = Phi:Plo = a_i * b
+
+    add    eax,ebx      ; add carry (ebx) to edx:eax
+    adc    edx,0
+    mov    ebx,[edi]        ; add in current word from *c
+    add    eax,ebx
+    adc    edx,0
+    mov    ebx,edx      ; high half of product becomes next carry
+
+    stosd           ; [es:edi] = ax; edi += 4;
+    dec    ecx          ; --a_len
+    jnz    L_10         ; jmp if a_len != 0
+L_11:
+    mov    [edi],ebx        ; *c = carry
+    pop    ebx
+    pop    esi
+    pop    edi
+    leave
+    ret
+    nop
+s_mpv_mul_d_add_sse2:
+    push   ebp
+    mov    ebp, esp
+    push   edi
+    push   esi
+    psubq  mm2, mm2     ; carry = 0
+    mov    ecx, [ebp+12]    ; ecx = a_len
+    movd   mm1, [ebp+16]    ; mm1 = b
+    mov    edi, [ebp+20]
+    cmp    ecx, 0
+    je     L_16         ; jmp if a_len == 0
+    mov    esi, [ebp+8]     ; esi = a
+    cld
+L_15:
+    movd   mm0, [esi]       ; mm0 = *a++
+    add    esi, 4
+    pmuludq mm0, mm1        ; mm0 = b * *a++
+    paddq  mm2, mm0     ; add the carry
+    movd   mm0, [edi]
+    paddq  mm2, mm0     ; add the carry
+    movd   [edi], mm2       ; store the 32bit result
+    add    edi, 4
+    psrlq  mm2, 32      ; save the carry
+    dec    ecx          ; --a_len
+    jnz    L_15         ; jmp if a_len != 0
+L_16:
+    movd   [edi], mm2       ; *c = carry
+    emms
+    pop    esi
+    pop    edi
+    leave
+    ret
+    nop
+    }
+}
+
+/*
+ *   ebp - 36:  caller's esi
+ *   ebp - 32:  caller's edi
+ *   ebp - 28:
+ *   ebp - 24:
+ *   ebp - 20:
+ *   ebp - 16:
+ *   ebp - 12:
+ *   ebp - 8:
+ *   ebp - 4:
+ *   ebp + 0:   caller's ebp
+ *   ebp + 4:   return address
+ *   ebp + 8:   a   argument
+ *   ebp + 12:  a_len   argument
+ *   ebp + 16:  b   argument
+ *   ebp + 20:  c   argument
+ *   registers:
+ *      eax:
+ *  ebx:    carry
+ *  ecx:    a_len
+ *  edx:
+ *  esi:    a ptr
+ *  edi:    c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+    __asm {
+    mov    eax, is_sse
+    cmp    eax, 0
+    je     s_mpv_mul_d_add_prop_x86
+    jg     s_mpv_mul_d_add_prop_sse2
+    call   s_mpi_is_sse2
+    mov    is_sse, eax
+    cmp    eax, 0
+    jg     s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+    push   ebp
+    mov    ebp,esp
+    sub    esp,28
+    push   edi
+    push   esi
+    push   ebx
+    mov    ebx,0        ; carry = 0
+    mov    ecx,[ebp+12]     ; ecx = a_len
+    mov    edi,[ebp+20]
+    cmp    ecx,0
+    je     L_21         ; jmp if a_len == 0
+    cld
+    mov    esi,[ebp+8]      ; esi = a
+L_20:
+    lodsd           ; eax = [ds:esi]; esi += 4
+    mov    edx,[ebp+16]     ; edx = b
+    mul    edx          ; edx:eax = Phi:Plo = a_i * b
+
+    add    eax,ebx      ; add carry (ebx) to edx:eax
+    adc    edx,0
+    mov    ebx,[edi]        ; add in current word from *c
+    add    eax,ebx
+    adc    edx,0
+    mov    ebx,edx      ; high half of product becomes next carry
+
+    stosd           ; [es:edi] = ax; edi += 4;
+    dec    ecx          ; --a_len
+    jnz    L_20         ; jmp if a_len != 0
+L_21:
+    cmp    ebx,0        ; is carry zero?
+    jz     L_23
+    mov    eax,[edi]        ; add in current word from *c
+    add    eax,ebx
+    stosd           ; [es:edi] = ax; edi += 4;
+    jnc    L_23
+L_22:
+    mov    eax,[edi]        ; add in current word from *c
+    adc    eax,0
+    stosd           ; [es:edi] = ax; edi += 4;
+    jc     L_22
+L_23:
+    pop    ebx
+    pop    esi
+    pop    edi
+    leave
+    ret
+    nop
+s_mpv_mul_d_add_prop_sse2:
+    push   ebp
+    mov    ebp, esp
+    push   edi
+    push   esi
+    push   ebx
+    psubq  mm2, mm2     ; carry = 0
+    mov    ecx, [ebp+12]    ; ecx = a_len
+    movd   mm1, [ebp+16]    ; mm1 = b
+    mov    edi, [ebp+20]
+    cmp    ecx, 0
+    je     L_26         ; jmp if a_len == 0
+    mov    esi, [ebp+8]     ; esi = a
+    cld
+L_25:
+    movd   mm0, [esi]       ; mm0 = *a++
+    movd   mm3, [edi]       ; fetch the sum
+    add    esi, 4
+    pmuludq mm0, mm1        ; mm0 = b * *a++
+    paddq  mm2, mm0     ; add the carry
+    paddq  mm2, mm3     ; add *c++
+    movd   [edi], mm2       ; store the 32bit result
+    add    edi, 4
+    psrlq  mm2, 32      ; save the carry
+    dec    ecx          ; --a_len
+    jnz    L_25         ; jmp if a_len != 0
+L_26:
+    movd   ebx, mm2
+    cmp    ebx, 0       ; is carry zero?
+    jz     L_28
+    mov    eax, [edi]
+    add    eax, ebx
+    stosd
+    jnc    L_28
+L_27:
+    mov    eax, [edi]       ; add in current word from *c
+    adc    eax, 0
+    stosd           ; [es:edi] = ax; edi += 4;
+    jc     L_27
+L_28:
+    emms
+    pop    ebx
+    pop    esi
+    pop    edi
+    leave
+    ret
+    nop
+    }
+}
+
+/*
+ *   ebp - 20:  caller's esi
+ *   ebp - 16:  caller's edi
+ *   ebp - 12:
+ *   ebp - 8:   carry
+ *   ebp - 4:   a_len   local
+ *   ebp + 0:   caller's ebp
+ *   ebp + 4:   return address
+ *   ebp + 8:   pa  argument
+ *   ebp + 12:  a_len   argument
+ *   ebp + 16:  ps  argument
+ *   ebp + 20:
+ *   registers:
+ *      eax:
+ *  ebx:    carry
+ *  ecx:    a_len
+ *  edx:
+ *  esi:    a ptr
+ *  edi:    c ptr
+ */
+__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
+{
+    __asm {
+     mov    eax, is_sse
+     cmp    eax, 0
+     je     s_mpv_sqr_add_prop_x86
+     jg     s_mpv_sqr_add_prop_sse2
+     call   s_mpi_is_sse2
+     mov    is_sse, eax
+     cmp    eax, 0
+     jg     s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+     push   ebp
+     mov    ebp,esp
+     sub    esp,12
+     push   edi
+     push   esi
+     push   ebx
+     mov    ebx,0       ; carry = 0
+     mov    ecx,[ebp+12]    ; a_len
+     mov    edi,[ebp+16]    ; edi = ps
+     cmp    ecx,0
+     je     L_31        ; jump if a_len == 0
+     cld
+     mov    esi,[ebp+8]     ; esi = pa
+L_30:
+     lodsd          ; eax = [ds:si]; si += 4;
+     mul    eax
+
+     add    eax,ebx     ; add "carry"
+     adc    edx,0
+     mov    ebx,[edi]
+     add    eax,ebx     ; add low word from result
+     mov    ebx,[edi+4]
+     stosd          ; [es:di] = eax; di += 4;
+     adc    edx,ebx     ; add high word from result
+     mov    ebx,0
+     mov    eax,edx
+     adc    ebx,0
+     stosd          ; [es:di] = eax; di += 4;
+     dec    ecx         ; --a_len
+     jnz    L_30        ; jmp if a_len != 0
+L_31:
+    cmp    ebx,0        ; is carry zero?
+    jz     L_34
+    mov    eax,[edi]        ; add in current word from *c
+    add    eax,ebx
+    stosd           ; [es:edi] = ax; edi += 4;
+    jnc    L_34
+L_32:
+    mov    eax,[edi]        ; add in current word from *c
+    adc    eax,0
+    stosd           ; [es:edi] = ax; edi += 4;
+    jc     L_32
+L_34:
+    pop    ebx
+    pop    esi
+    pop    edi
+    leave
+    ret
+    nop
+s_mpv_sqr_add_prop_sse2:
+    push   ebp
+    mov    ebp, esp
+    push   edi
+    push   esi
+    push   ebx
+    psubq  mm2, mm2     ; carry = 0
+    mov    ecx, [ebp+12]    ; ecx = a_len
+    mov    edi, [ebp+16]
+    cmp    ecx, 0
+    je     L_36     ; jmp if a_len == 0
+    mov    esi, [ebp+8]     ; esi = a
+    cld
+L_35:
+    movd   mm0, [esi]       ; mm0 = *a
+    movd   mm3, [edi]       ; fetch the sum
+    add    esi, 4
+    pmuludq mm0, mm0        ; mm0 = sqr(a)
+    paddq  mm2, mm0     ; add the carry
+    paddq  mm2, mm3     ; add the low word
+    movd   mm3, [edi+4]
+    movd   [edi], mm2       ; store the 32bit result
+    psrlq  mm2, 32
+    paddq  mm2, mm3     ; add the high word
+    movd   [edi+4], mm2     ; store the 32bit result
+    psrlq  mm2, 32      ; save the carry.
+    add    edi, 8
+    dec    ecx          ; --a_len
+    jnz    L_35         ; jmp if a_len != 0
+L_36:
+    movd   ebx, mm2
+    cmp    ebx, 0       ; is carry zero?
+    jz     L_38
+    mov    eax, [edi]
+    add    eax, ebx
+    stosd
+    jnc    L_38
+L_37:
+    mov    eax, [edi]       ; add in current word from *c
+    adc    eax, 0
+    stosd           ; [es:edi] = ax; edi += 4;
+    jc     L_37
+L_38:
+    emms
+    pop    ebx
+    pop    esi
+    pop    edi
+    leave
+    ret
+    nop
+    }
+}
+
+/*
+ *  Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ *  so its high bit is 1.   This code is from NSPR.
+ *
+ *  Dump of assembler code for function s_mpv_div_2dx1d:
+ *
+ *   esp +  0:   Caller's ebx
+ *   esp +  4:  return address
+ *   esp +  8:  Nhi argument
+ *   esp + 12:  Nlo argument
+ *   esp + 16:  divisor argument
+ *   esp + 20:  qp  argument
+ *   esp + 24:   rp argument
+ *   registers:
+ *      eax:
+ *  ebx:    carry
+ *  ecx:    a_len
+ *  edx:
+ *  esi:    a ptr
+ *  edi:    c ptr
+ */
+__declspec(naked) mp_err
+    s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+                    mp_digit *qp, mp_digit *rp)
+{
+    __asm {
+       push   ebx
+       mov    edx,[esp+8]
+       mov    eax,[esp+12]
+       mov    ebx,[esp+16]
+       div    ebx
+       mov    ebx,[esp+20]
+       mov    [ebx],eax
+       mov    ebx,[esp+24]
+       mov    [ebx],edx
+       xor    eax,eax       ; return zero
+       pop    ebx
+       ret
+       nop
+    }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
new file mode 100644
index 000000000..b903e2564
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
@@ -0,0 +1,538 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call _s_mpi_is_sse to determine if we support sse 
+ #    instructions.
+ #  0 means to use x86 instructions
+ #  1 means to use sse2 instructions
+.type	is_sse,@object
+.size	is_sse,4
+is_sse: .long	-1 
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+#.ifndef NO_PIC
+#.macro GET   var,reg
+#    movl   \var@GOTOFF(%ebx),\reg
+#.endm
+#.macro PUT   reg,var
+#    movl   \reg,\var@GOTOFF(%ebx)
+#.endm
+#.else
+.macro GET   var,reg
+    movl   \var,\reg
+.endm
+.macro PUT   reg,var
+    movl   \reg,\var
+.endm
+#.endif
+
+.text
+
+
+ #  ebp - 36:	caller's esi
+ #  ebp - 32:	caller's edi
+ #  ebp - 28:	
+ #  ebp - 24:	
+ #  ebp - 20:	
+ #  ebp - 16:	
+ #  ebp - 12:	
+ #  ebp - 8:	
+ #  ebp - 4:	
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	_s_mpv_mul_d
+.type	_s_mpv_mul_d,@function
+_s_mpv_mul_d:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     _s_mpv_mul_d_x86
+    jg     _s_mpv_mul_d_sse2
+    call   _s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     _s_mpv_mul_d_sse2
+_s_mpv_mul_d_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     2f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+1:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    1b			# jmp if a_len != 0
+2:
+    mov    %ebx,0(%edi)		# *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+_s_mpv_mul_d_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     6f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+5:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    5b			# jmp if a_len != 0
+6:
+    movd   %mm2,0(%edi)		# *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 36:	caller's esi
+ #  ebp - 32:	caller's edi
+ #  ebp - 28:	
+ #  ebp - 24:	
+ #  ebp - 20:	
+ #  ebp - 16:	
+ #  ebp - 12:	
+ #  ebp - 8:	
+ #  ebp - 4:	
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	_s_mpv_mul_d_add
+.type	_s_mpv_mul_d_add,@function
+_s_mpv_mul_d_add:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     _s_mpv_mul_d_add_x86
+    jg     _s_mpv_mul_d_add_sse2
+    call   _s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     _s_mpv_mul_d_add_sse2
+_s_mpv_mul_d_add_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     11f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+10:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx		# add in current word from *c
+    add    %ebx,%eax		
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    10b			# jmp if a_len != 0
+11:
+    mov    %ebx,0(%edi)		# *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+_s_mpv_mul_d_add_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     16f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+15:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    movd   0(%edi),%mm0
+    paddq  %mm0,%mm2            # add the carry
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    15b			# jmp if a_len != 0
+16:
+    movd   %mm2,0(%edi)		# *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 8:	caller's esi
+ #  ebp - 4:	caller's edi
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	_s_mpv_mul_d_add_prop
+.type	_s_mpv_mul_d_add_prop,@function
+_s_mpv_mul_d_add_prop:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     _s_mpv_mul_d_add_prop_x86
+    jg     _s_mpv_mul_d_add_prop_sse2
+    call   _s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     _s_mpv_mul_d_add_prop_sse2
+_s_mpv_mul_d_add_prop_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     21f			# jmp if a_len == 0
+    cld
+    mov    8(%ebp),%esi		# esi = a
+20:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx		# add in current word from *c
+    add    %ebx,%eax		
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    20b			# jmp if a_len != 0
+21:
+    cmp    $0,%ebx		# is carry zero?
+    jz     23f
+    mov    0(%edi),%eax		# add in current word from *c
+    add	   %ebx,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jnc    23f
+22:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     22b
+23:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+_s_mpv_mul_d_add_prop_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     26f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+25:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    movd   0(%edi),%mm3		# fetch the sum
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    paddq  %mm3,%mm2            # add *c++
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    25b			# jmp if a_len != 0
+26:
+    movd   %mm2,%ebx
+    cmp    $0,%ebx		# is carry zero?
+    jz     28f
+    mov    0(%edi),%eax
+    add    %ebx, %eax
+    stosl
+    jnc    28f
+27:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     27b
+28:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+
+ #  ebp - 20:	caller's esi
+ #  ebp - 16:	caller's edi
+ #  ebp - 12:	
+ #  ebp - 8:	carry
+ #  ebp - 4:	a_len	local
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	pa	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	ps	argument
+ #  ebp + 20:	
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+
+.globl	_s_mpv_sqr_add_prop
+.type	_s_mpv_sqr_add_prop,@function
+_s_mpv_sqr_add_prop:
+     GET   is_sse,%eax
+     cmp    $0,%eax
+     je     _s_mpv_sqr_add_prop_x86
+     jg     _s_mpv_sqr_add_prop_sse2
+     call   _s_mpi_is_sse2
+     PUT    %eax,is_sse
+     cmp    $0,%eax
+     jg     _s_mpv_sqr_add_prop_sse2
+_s_mpv_sqr_add_prop_x86:
+     push   %ebp
+     mov    %esp,%ebp
+     sub    $12,%esp
+     push   %edi
+     push   %esi
+     push   %ebx
+     movl   $0,%ebx		# carry = 0
+     mov    12(%ebp),%ecx	# a_len
+     mov    16(%ebp),%edi	# edi = ps
+     cmp    $0,%ecx
+     je     31f			# jump if a_len == 0
+     cld
+     mov    8(%ebp),%esi	# esi = pa
+30:
+     lodsl			# %eax = [ds:si]; si += 4;
+     mull   %eax
+
+     add    %ebx,%eax		# add "carry"
+     adc    $0,%edx
+     mov    0(%edi),%ebx
+     add    %ebx,%eax		# add low word from result
+     mov    4(%edi),%ebx
+     stosl			# [es:di] = %eax; di += 4;
+     adc    %ebx,%edx		# add high word from result
+     movl   $0,%ebx
+     mov    %edx,%eax
+     adc    $0,%ebx
+     stosl			# [es:di] = %eax; di += 4;
+     dec    %ecx		# --a_len
+     jnz    30b			# jmp if a_len != 0
+31:
+    cmp    $0,%ebx		# is carry zero?
+    jz     34f
+    mov    0(%edi),%eax		# add in current word from *c
+    add	   %ebx,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jnc    34f
+32:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     32b
+34:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+_s_mpv_sqr_add_prop_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    16(%ebp),%edi
+    cmp    $0,%ecx
+    je     36f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+35:
+    movd   0(%esi),%mm0        # mm0 = *a
+    movd   0(%edi),%mm3	       # fetch the sum
+    add	   $4,%esi
+    pmuludq %mm0,%mm0          # mm0 = sqr(a)
+    paddq  %mm0,%mm2           # add the carry
+    paddq  %mm3,%mm2           # add the low word
+    movd   4(%edi),%mm3
+    movd   %mm2,0(%edi)        # store the 32bit result
+    psrlq  $32, %mm2	
+    paddq  %mm3,%mm2           # add the high word
+    movd   %mm2,4(%edi)        # store the 32bit result
+    psrlq  $32, %mm2	       # save the carry.
+    add    $8,%edi
+    dec    %ecx			# --a_len
+    jnz    35b			# jmp if a_len != 0
+36:
+    movd   %mm2,%ebx
+    cmp    $0,%ebx		# is carry zero?
+    jz     38f
+    mov    0(%edi),%eax
+    add    %ebx, %eax
+    stosl
+    jnc    38f
+37:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     37b
+38:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1.   This code is from NSPR.
+ #
+ # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # 		          mp_digit *qp, mp_digit *rp)
+
+ #  esp +  0:   Caller's ebx
+ #  esp +  4:	return address
+ #  esp +  8:	Nhi	argument
+ #  esp + 12:	Nlo	argument
+ #  esp + 16:	divisor	argument
+ #  esp + 20:	qp	argument
+ #  esp + 24:   rp	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+ # 
+
+.globl	_s_mpv_div_2dx1d
+.type	_s_mpv_div_2dx1d,@function
+_s_mpv_div_2dx1d:
+       push   %ebx
+       mov    8(%esp),%edx
+       mov    12(%esp),%eax
+       mov    16(%esp),%ebx
+       div    %ebx
+       mov    20(%esp),%ebx
+       mov    %eax,0(%ebx)
+       mov    24(%esp),%ebx
+       mov    %edx,0(%ebx)
+       xor    %eax,%eax		# return zero
+       pop    %ebx
+       ret    
+       nop
+  
diff --git a/security/nss/lib/freebl/mpi/mplogic.c b/security/nss/lib/freebl/mpi/mplogic.c
new file mode 100644
index 000000000..89fd03ae8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mplogic.c
@@ -0,0 +1,443 @@
+/*
+ *  mplogic.c
+ *
+ *  Bitwise logical operations on MPI values
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mplogic.h"
+
+/* {{{ Lookup table for population count */
+
+static unsigned char bitc[] = {
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+  mpl_not(a, b)    - compute b = ~a
+  mpl_and(a, b, c) - compute c = a & b
+  mpl_or(a, b, c)  - compute c = a | b
+  mpl_xor(a, b, c) - compute c = a ^ b
+ */
+
+/* {{{ mpl_not(a, b) */
+
+mp_err
+mpl_not(mp_int *a, mp_int *b)
+{
+    mp_err res;
+    unsigned int ix;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    if ((res = mp_copy(a, b)) != MP_OKAY)
+        return res;
+
+    /* This relies on the fact that the digit type is unsigned */
+    for (ix = 0; ix < USED(b); ix++)
+        DIGIT(b, ix) = ~DIGIT(b, ix);
+
+    s_mp_clamp(b);
+
+    return MP_OKAY;
+
+} /* end mpl_not() */
+
+/* }}} */
+
+/* {{{ mpl_and(a, b, c) */
+
+mp_err
+mpl_and(mp_int *a, mp_int *b, mp_int *c)
+{
+    mp_int *which, *other;
+    mp_err res;
+    unsigned int ix;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (USED(a) <= USED(b)) {
+        which = a;
+        other = b;
+    } else {
+        which = b;
+        other = a;
+    }
+
+    if ((res = mp_copy(which, c)) != MP_OKAY)
+        return res;
+
+    for (ix = 0; ix < USED(which); ix++)
+        DIGIT(c, ix) &= DIGIT(other, ix);
+
+    s_mp_clamp(c);
+
+    return MP_OKAY;
+
+} /* end mpl_and() */
+
+/* }}} */
+
+/* {{{ mpl_or(a, b, c) */
+
+mp_err
+mpl_or(mp_int *a, mp_int *b, mp_int *c)
+{
+    mp_int *which, *other;
+    mp_err res;
+    unsigned int ix;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (USED(a) >= USED(b)) {
+        which = a;
+        other = b;
+    } else {
+        which = b;
+        other = a;
+    }
+
+    if ((res = mp_copy(which, c)) != MP_OKAY)
+        return res;
+
+    for (ix = 0; ix < USED(which); ix++)
+        DIGIT(c, ix) |= DIGIT(other, ix);
+
+    return MP_OKAY;
+
+} /* end mpl_or() */
+
+/* }}} */
+
+/* {{{ mpl_xor(a, b, c) */
+
+mp_err
+mpl_xor(mp_int *a, mp_int *b, mp_int *c)
+{
+    mp_int *which, *other;
+    mp_err res;
+    unsigned int ix;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (USED(a) >= USED(b)) {
+        which = a;
+        other = b;
+    } else {
+        which = b;
+        other = a;
+    }
+
+    if ((res = mp_copy(which, c)) != MP_OKAY)
+        return res;
+
+    for (ix = 0; ix < USED(which); ix++)
+        DIGIT(c, ix) ^= DIGIT(other, ix);
+
+    s_mp_clamp(c);
+
+    return MP_OKAY;
+
+} /* end mpl_xor() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+  mpl_rsh(a, b, d)     - b = a >> d
+  mpl_lsh(a, b, d)     - b = a << d
+ */
+
+/* {{{ mpl_rsh(a, b, d) */
+
+mp_err
+mpl_rsh(const mp_int *a, mp_int *b, mp_digit d)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    if ((res = mp_copy(a, b)) != MP_OKAY)
+        return res;
+
+    s_mp_div_2d(b, d);
+
+    return MP_OKAY;
+
+} /* end mpl_rsh() */
+
+/* }}} */
+
+/* {{{ mpl_lsh(a, b, d) */
+
+mp_err
+mpl_lsh(const mp_int *a, mp_int *b, mp_digit d)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+    if ((res = mp_copy(a, b)) != MP_OKAY)
+        return res;
+
+    return s_mp_mul_2d(b, d);
+
+} /* end mpl_lsh() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+  mpl_num_set(a, num)
+
+  Count the number of set bits in the binary representation of a.
+  Returns MP_OKAY and sets 'num' to be the number of such bits, if
+  possible.  If num is NULL, the result is thrown away, but it is
+  not considered an error.
+
+  mpl_num_clear() does basically the same thing for clear bits.
+ */
+
+/* {{{ mpl_num_set(a, num) */
+
+mp_err
+mpl_num_set(mp_int *a, int *num)
+{
+    unsigned int ix;
+    int db, nset = 0;
+    mp_digit cur;
+    unsigned char reg;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    for (ix = 0; ix < USED(a); ix++) {
+        cur = DIGIT(a, ix);
+
+        for (db = 0; db < sizeof(mp_digit); db++) {
+            reg = (unsigned char)(cur >> (CHAR_BIT * db));
+
+            nset += bitc[reg];
+        }
+    }
+
+    if (num)
+        *num = nset;
+
+    return MP_OKAY;
+
+} /* end mpl_num_set() */
+
+/* }}} */
+
+/* {{{ mpl_num_clear(a, num) */
+
+mp_err
+mpl_num_clear(mp_int *a, int *num)
+{
+    unsigned int ix;
+    int db, nset = 0;
+    mp_digit cur;
+    unsigned char reg;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    for (ix = 0; ix < USED(a); ix++) {
+        cur = DIGIT(a, ix);
+
+        for (db = 0; db < sizeof(mp_digit); db++) {
+            reg = (unsigned char)(cur >> (CHAR_BIT * db));
+
+            nset += bitc[UCHAR_MAX - reg];
+        }
+    }
+
+    if (num)
+        *num = nset;
+
+    return MP_OKAY;
+
+} /* end mpl_num_clear() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+  mpl_parity(a)
+
+  Determines the bitwise parity of the value given.  Returns MP_EVEN
+  if an even number of digits are set, MP_ODD if an odd number are
+  set.
+ */
+
+/* {{{ mpl_parity(a) */
+
+mp_err
+mpl_parity(mp_int *a)
+{
+    unsigned int ix;
+    int par = 0;
+    mp_digit cur;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    for (ix = 0; ix < USED(a); ix++) {
+        int shft = (sizeof(mp_digit) * CHAR_BIT) / 2;
+
+        cur = DIGIT(a, ix);
+
+        /* Compute parity for current digit */
+        while (shft != 0) {
+            cur ^= (cur >> shft);
+            shft >>= 1;
+        }
+        cur &= 1;
+
+        /* XOR with running parity so far   */
+        par ^= cur;
+    }
+
+    if (par)
+        return MP_ODD;
+    else
+        return MP_EVEN;
+
+} /* end mpl_parity() */
+
+/* }}} */
+
+/*
+  mpl_set_bit
+
+  Returns MP_OKAY or some error code.
+  Grows a if needed to set a bit to 1.
+ */
+mp_err
+mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value)
+{
+    mp_size ix;
+    mp_err rv;
+    mp_digit mask;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    ix = bitNum / MP_DIGIT_BIT;
+    if (ix + 1 > MP_USED(a)) {
+        rv = s_mp_pad(a, ix + 1);
+        if (rv != MP_OKAY)
+            return rv;
+    }
+
+    bitNum = bitNum % MP_DIGIT_BIT;
+    mask = (mp_digit)1 << bitNum;
+    if (value)
+        MP_DIGIT(a, ix) |= mask;
+    else
+        MP_DIGIT(a, ix) &= ~mask;
+    s_mp_clamp(a);
+    return MP_OKAY;
+}
+
+/*
+  mpl_get_bit
+
+  returns 0 or 1 or some (negative) error code.
+ */
+mp_err
+mpl_get_bit(const mp_int *a, mp_size bitNum)
+{
+    mp_size bit, ix;
+    mp_err rv;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    ix = bitNum / MP_DIGIT_BIT;
+    ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE);
+
+    bit = bitNum % MP_DIGIT_BIT;
+    rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1;
+    return rv;
+}
+
+/*
+  mpl_get_bits
+  - Extracts numBits bits from a, where the least significant extracted bit
+  is bit lsbNum.  Returns a negative value if error occurs.
+  - Because sign bit is used to indicate error, maximum number of bits to
+  be returned is the lesser of (a) the number of bits in an mp_digit, or
+  (b) one less than the number of bits in an mp_err.
+  - lsbNum + numbits can be greater than the number of significant bits in
+  integer a, as long as bit lsbNum is in the high order digit of a.
+ */
+mp_err
+mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits)
+{
+    mp_size rshift = (lsbNum % MP_DIGIT_BIT);
+    mp_size lsWndx = (lsbNum / MP_DIGIT_BIT);
+    mp_digit *digit = MP_DIGITS(a) + lsWndx;
+    mp_digit mask = ((1 << numBits) - 1);
+
+    ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG);
+    ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE);
+
+    if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) ||
+        (lsWndx + 1 >= MP_USED(a))) {
+        mask &= (digit[0] >> rshift);
+    } else {
+        mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift)));
+    }
+    return (mp_err)mask;
+}
+
+/*
+  mpl_significant_bits
+  returns number of significnant bits in abs(a).
+  returns 1 if value is zero.
+ */
+mp_size
+mpl_significant_bits(const mp_int *a)
+{
+    mp_size bits = 0;
+    int ix;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    for (ix = MP_USED(a); ix > 0;) {
+        mp_digit d;
+        d = MP_DIGIT(a, --ix);
+        if (d) {
+            while (d) {
+                ++bits;
+                d >>= 1;
+            }
+            break;
+        }
+    }
+    bits += ix * MP_DIGIT_BIT;
+    if (!bits)
+        bits = 1;
+    return bits;
+}
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS                                                  */
diff --git a/security/nss/lib/freebl/mpi/mplogic.h b/security/nss/lib/freebl/mpi/mplogic.h
new file mode 100644
index 000000000..a4a6b7735
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mplogic.h
@@ -0,0 +1,52 @@
+/*
+ *  mplogic.h
+ *
+ *  Bitwise logical operations on MPI values
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MPLOGIC_
+#define _H_MPLOGIC_
+
+#include "mpi.h"
+
+/*
+  The logical operations treat an mp_int as if it were a bit vector,
+  without regard to its sign (an mp_int is represented in a signed
+  magnitude format).  Values are treated as if they had an infinite
+  string of zeros left of the most-significant bit.
+ */
+
+/* Parity results                    */
+
+#define MP_EVEN MP_YES
+#define MP_ODD MP_NO
+
+/* Bitwise functions                 */
+
+mp_err mpl_not(mp_int *a, mp_int *b);            /* one's complement  */
+mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND       */
+mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c);  /* bitwise OR        */
+mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR       */
+
+/* Shift functions                   */
+
+mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift    */
+mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift     */
+
+/* Bit count and parity              */
+
+mp_err mpl_num_set(mp_int *a, int *num);   /* count set bits    */
+mp_err mpl_num_clear(mp_int *a, int *num); /* count clear bits  */
+mp_err mpl_parity(mp_int *a);              /* determine parity  */
+
+/* Get & Set the value of a bit */
+
+mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value);
+mp_err mpl_get_bit(const mp_int *a, mp_size bitNum);
+mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits);
+mp_size mpl_significant_bits(const mp_int *a);
+
+#endif /* end _H_MPLOGIC_ */
diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c
new file mode 100644
index 000000000..06fd41b3a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpmontg.c
@@ -0,0 +1,1141 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file implements moduluar exponentiation using Montgomery's
+ * method for modular reduction.  This file implements the method
+ * described as "Improvement 2" in the paper "A Cryptogrpahic Library for
+ * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr.
+ * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90"
+ * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244,
+ * published by Springer Verlag.
+ */
+
+#define MP_USING_CACHE_SAFE_MOD_EXP 1
+#include <string.h>
+#include "mpi-priv.h"
+#include "mplogic.h"
+#include "mpprime.h"
+#ifdef MP_USING_MONT_MULF
+#include "montmulf.h"
+#endif
+#include <stddef.h> /* ptrdiff_t */
+#include <assert.h>
+
+#define STATIC
+
+#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
+
+/*! computes T = REDC(T), 2^b == R
+    \param T < RN
+*/
+mp_err
+s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
+{
+    mp_err res;
+    mp_size i;
+
+    i = (MP_USED(&mmm->N) << 1) + 1;
+    MP_CHECKOK(s_mp_pad(T, i));
+    for (i = 0; i < MP_USED(&mmm->N); ++i) {
+        mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
+        /* T += N * m_i * (MP_RADIX ** i); */
+        s_mp_mul_d_add_offset(&mmm->N, m_i, T, i);
+    }
+    s_mp_clamp(T);
+
+    /* T /= R */
+    s_mp_rshd(T, MP_USED(&mmm->N));
+
+    if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {
+        /* T = T - N */
+        MP_CHECKOK(s_mp_sub(T, &mmm->N));
+#ifdef DEBUG
+        if ((res = mp_cmp(T, &mmm->N)) >= 0) {
+            res = MP_UNDEF;
+            goto CLEANUP;
+        }
+#endif
+    }
+    res = MP_OKAY;
+CLEANUP:
+    return res;
+}
+
+#if !defined(MP_MONT_USE_MP_MUL)
+
+/*! c <- REDC( a * b ) mod N
+    \param a < N  i.e. "reduced"
+    \param b < N  i.e. "reduced"
+    \param mmm modulus N and n0' of N
+*/
+mp_err
+s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
+              mp_mont_modulus *mmm)
+{
+    mp_digit *pb;
+    mp_digit m_i;
+    mp_err res;
+    mp_size ib; /* "index b": index of current digit of B */
+    mp_size useda, usedb;
+
+    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+    if (MP_USED(a) < MP_USED(b)) {
+        const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
+        b = a;
+        a = xch;
+    }
+
+    MP_USED(c) = 1;
+    MP_DIGIT(c, 0) = 0;
+    ib = (MP_USED(&mmm->N) << 1) + 1;
+    if ((res = s_mp_pad(c, ib)) != MP_OKAY)
+        goto CLEANUP;
+
+    useda = MP_USED(a);
+    pb = MP_DIGITS(b);
+    s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
+    s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
+    m_i = MP_DIGIT(c, 0) * mmm->n0prime;
+    s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
+
+    /* Outer loop:  Digits of b */
+    usedb = MP_USED(b);
+    for (ib = 1; ib < usedb; ib++) {
+        mp_digit b_i = *pb++;
+
+        /* Inner product:  Digits of a */
+        if (b_i)
+            s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
+        m_i = MP_DIGIT(c, ib) * mmm->n0prime;
+        s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
+    }
+    if (usedb < MP_USED(&mmm->N)) {
+        for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) {
+            m_i = MP_DIGIT(c, ib) * mmm->n0prime;
+            s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
+        }
+    }
+    s_mp_clamp(c);
+    s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */
+    if (s_mp_cmp(c, &mmm->N) >= 0) {
+        MP_CHECKOK(s_mp_sub(c, &mmm->N));
+    }
+    res = MP_OKAY;
+
+CLEANUP:
+    return res;
+}
+#endif
+
+STATIC
+mp_err
+s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
+{
+    mp_err res;
+
+    /* xMont = x * R mod N   where  N is modulus */
+    MP_CHECKOK(mp_copy(x, xMont));
+    MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */
+    MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont));   /*         mod N */
+CLEANUP:
+    return res;
+}
+
+#ifdef MP_USING_MONT_MULF
+
+/* the floating point multiply is already cache safe,
+ * don't turn on cache safe unless we specifically
+ * force it */
+#ifndef MP_FORCE_CACHE_SAFE
+#undef MP_USING_CACHE_SAFE_MOD_EXP
+#endif
+
+unsigned int mp_using_mont_mulf = 1;
+
+/* computes montgomery square of the integer in mResult */
+#define SQR                                              \
+    conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \
+    mont_mulf_noconv(mResult, dm1, d16Tmp,               \
+                     dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
+
+/* computes montgomery product of x and the integer in mResult */
+#define MUL(x)                                   \
+    conv_i32_to_d32(dm1, mResult, nLen);         \
+    mont_mulf_noconv(mResult, dm1, oddPowers[x], \
+                     dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
+
+/* Do modular exponentiation using floating point multiply code. */
+mp_err
+mp_exptmod_f(const mp_int *montBase,
+             const mp_int *exponent,
+             const mp_int *modulus,
+             mp_int *result,
+             mp_mont_modulus *mmm,
+             int nLen,
+             mp_size bits_in_exponent,
+             mp_size window_bits,
+             mp_size odd_ints)
+{
+    mp_digit *mResult;
+    double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp;
+    double dn0;
+    mp_size i;
+    mp_err res;
+    int expOff;
+    int dSize = 0, oddPowSize, dTmpSize;
+    mp_int accum1;
+    double *oddPowers[MAX_ODD_INTS];
+
+    /* function for computing n0prime only works if n0 is odd */
+
+    MP_DIGITS(&accum1) = 0;
+
+    for (i = 0; i < MAX_ODD_INTS; ++i)
+        oddPowers[i] = 0;
+
+    MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+
+    mp_set(&accum1, 1);
+    MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+    MP_CHECKOK(s_mp_pad(&accum1, nLen));
+
+    oddPowSize = 2 * nLen + 1;
+    dTmpSize = 2 * oddPowSize;
+    dSize = sizeof(double) * (nLen * 4 + 1 +
+                              ((odd_ints + 1) * oddPowSize) + dTmpSize);
+    dBuf = (double *)malloc(dSize);
+    dm1 = dBuf;           /* array of d32 */
+    dn = dBuf + nLen;     /* array of d32 */
+    dSqr = dn + nLen;     /* array of d32 */
+    d16Tmp = dSqr + nLen; /* array of d16 */
+    dTmp = d16Tmp + oddPowSize;
+
+    for (i = 0; i < odd_ints; ++i) {
+        oddPowers[i] = dTmp;
+        dTmp += oddPowSize;
+    }
+    mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */
+
+    /* Make dn and dn0 */
+    conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);
+    dn0 = (double)(mmm->n0prime & 0xffff);
+
+    /* Make dSqr */
+    conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);
+    mont_mulf_noconv(mResult, dm1, oddPowers[0],
+                     dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
+    conv_i32_to_d32(dSqr, mResult, nLen);
+
+    for (i = 1; i < odd_ints; ++i) {
+        mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1],
+                         dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
+        conv_i32_to_d16(oddPowers[i], mResult, nLen);
+    }
+
+    s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */
+
+    for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
+        mp_size smallExp;
+        MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+        smallExp = (mp_size)res;
+
+        if (window_bits == 1) {
+            if (!smallExp) {
+                SQR;
+            } else if (smallExp & 1) {
+                SQR;
+                MUL(0);
+            } else {
+                abort();
+            }
+        } else if (window_bits == 4) {
+            if (!smallExp) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+            } else if (smallExp & 1) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 2);
+            } else if (smallExp & 2) {
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 4);
+                SQR;
+            } else if (smallExp & 4) {
+                SQR;
+                SQR;
+                MUL(smallExp / 8);
+                SQR;
+                SQR;
+            } else if (smallExp & 8) {
+                SQR;
+                MUL(smallExp / 16);
+                SQR;
+                SQR;
+                SQR;
+            } else {
+                abort();
+            }
+        } else if (window_bits == 5) {
+            if (!smallExp) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+            } else if (smallExp & 1) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 2);
+            } else if (smallExp & 2) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 4);
+                SQR;
+            } else if (smallExp & 4) {
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 8);
+                SQR;
+                SQR;
+            } else if (smallExp & 8) {
+                SQR;
+                SQR;
+                MUL(smallExp / 16);
+                SQR;
+                SQR;
+                SQR;
+            } else if (smallExp & 0x10) {
+                SQR;
+                MUL(smallExp / 32);
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+            } else {
+                abort();
+            }
+        } else if (window_bits == 6) {
+            if (!smallExp) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+            } else if (smallExp & 1) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 2);
+            } else if (smallExp & 2) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 4);
+                SQR;
+            } else if (smallExp & 4) {
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 8);
+                SQR;
+                SQR;
+            } else if (smallExp & 8) {
+                SQR;
+                SQR;
+                SQR;
+                MUL(smallExp / 16);
+                SQR;
+                SQR;
+                SQR;
+            } else if (smallExp & 0x10) {
+                SQR;
+                SQR;
+                MUL(smallExp / 32);
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+            } else if (smallExp & 0x20) {
+                SQR;
+                MUL(smallExp / 64);
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+                SQR;
+            } else {
+                abort();
+            }
+        } else {
+            abort();
+        }
+    }
+
+    s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */
+
+    res = s_mp_redc(&accum1, mmm);
+    mp_exch(&accum1, result);
+
+CLEANUP:
+    mp_clear(&accum1);
+    if (dBuf) {
+        if (dSize)
+            memset(dBuf, 0, dSize);
+        free(dBuf);
+    }
+
+    return res;
+}
+#undef SQR
+#undef MUL
+#endif
+
+#define SQR(a, b)             \
+    MP_CHECKOK(mp_sqr(a, b)); \
+    MP_CHECKOK(s_mp_redc(b, mmm))
+
+#if defined(MP_MONT_USE_MP_MUL)
+#define MUL(x, a, b)                           \
+    MP_CHECKOK(mp_mul(a, oddPowers + (x), b)); \
+    MP_CHECKOK(s_mp_redc(b, mmm))
+#else
+#define MUL(x, a, b) \
+    MP_CHECKOK(s_mp_mul_mont(a, oddPowers + (x), b, mmm))
+#endif
+
+#define SWAPPA  \
+    ptmp = pa1; \
+    pa1 = pa2;  \
+    pa2 = ptmp
+
+/* Do modular exponentiation using integer multiply code. */
+mp_err
+mp_exptmod_i(const mp_int *montBase,
+             const mp_int *exponent,
+             const mp_int *modulus,
+             mp_int *result,
+             mp_mont_modulus *mmm,
+             int nLen,
+             mp_size bits_in_exponent,
+             mp_size window_bits,
+             mp_size odd_ints)
+{
+    mp_int *pa1, *pa2, *ptmp;
+    mp_size i;
+    mp_err res;
+    int expOff;
+    mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS];
+
+    /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
+    /* oddPowers[i] = base ** (2*i + 1); */
+
+    MP_DIGITS(&accum1) = 0;
+    MP_DIGITS(&accum2) = 0;
+    MP_DIGITS(&power2) = 0;
+    for (i = 0; i < MAX_ODD_INTS; ++i) {
+        MP_DIGITS(oddPowers + i) = 0;
+    }
+
+    MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+    MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
+
+    MP_CHECKOK(mp_init_copy(&oddPowers[0], montBase));
+
+    MP_CHECKOK(mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2));
+    MP_CHECKOK(mp_sqr(montBase, &power2)); /* power2 = montBase ** 2 */
+    MP_CHECKOK(s_mp_redc(&power2, mmm));
+
+    for (i = 1; i < odd_ints; ++i) {
+        MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2));
+        MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i));
+        MP_CHECKOK(s_mp_redc(oddPowers + i, mmm));
+    }
+
+    /* set accumulator to montgomery residue of 1 */
+    mp_set(&accum1, 1);
+    MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+    pa1 = &accum1;
+    pa2 = &accum2;
+
+    for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
+        mp_size smallExp;
+        MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+        smallExp = (mp_size)res;
+
+        if (window_bits == 1) {
+            if (!smallExp) {
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 1) {
+                SQR(pa1, pa2);
+                MUL(0, pa2, pa1);
+            } else {
+                abort();
+            }
+        } else if (window_bits == 4) {
+            if (!smallExp) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+            } else if (smallExp & 1) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                MUL(smallExp / 2, pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 2) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                MUL(smallExp / 4, pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 4) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                MUL(smallExp / 8, pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 8) {
+                SQR(pa1, pa2);
+                MUL(smallExp / 16, pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else {
+                abort();
+            }
+        } else if (window_bits == 5) {
+            if (!smallExp) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 1) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                MUL(smallExp / 2, pa2, pa1);
+            } else if (smallExp & 2) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                MUL(smallExp / 4, pa1, pa2);
+                SQR(pa2, pa1);
+            } else if (smallExp & 4) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                MUL(smallExp / 8, pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+            } else if (smallExp & 8) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                MUL(smallExp / 16, pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+            } else if (smallExp & 0x10) {
+                SQR(pa1, pa2);
+                MUL(smallExp / 32, pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+            } else {
+                abort();
+            }
+        } else if (window_bits == 6) {
+            if (!smallExp) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+            } else if (smallExp & 1) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                MUL(smallExp / 2, pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 2) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                MUL(smallExp / 4, pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 4) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                MUL(smallExp / 8, pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 8) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                MUL(smallExp / 16, pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 0x10) {
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                MUL(smallExp / 32, pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else if (smallExp & 0x20) {
+                SQR(pa1, pa2);
+                MUL(smallExp / 64, pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SWAPPA;
+            } else {
+                abort();
+            }
+        } else {
+            abort();
+        }
+    }
+
+    res = s_mp_redc(pa1, mmm);
+    mp_exch(pa1, result);
+
+CLEANUP:
+    mp_clear(&accum1);
+    mp_clear(&accum2);
+    mp_clear(&power2);
+    for (i = 0; i < odd_ints; ++i) {
+        mp_clear(oddPowers + i);
+    }
+    return res;
+}
+#undef SQR
+#undef MUL
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+unsigned int mp_using_cache_safe_exp = 1;
+#endif
+
+mp_err
+mp_set_safe_modexp(int value)
+{
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+    mp_using_cache_safe_exp = value;
+    return MP_OKAY;
+#else
+    if (value == 0) {
+        return MP_OKAY;
+    }
+    return MP_BADARG;
+#endif
+}
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+#define WEAVE_WORD_SIZE 4
+
+/*
+ * mpi_to_weave takes an array of bignums, a matrix in which each bignum
+ * occupies all the columns of a row, and transposes it into a matrix in
+ * which each bignum occupies a column of every row.  The first row of the
+ * input matrix becomes the first column of the output matrix.  The n'th
+ * row of input becomes the n'th column of output.  The input data is said
+ * to be "interleaved" or "woven" into the output matrix.
+ *
+ * The array of bignums is left in this woven form.  Each time a single
+ * bignum value is needed, it is recreated by fetching the n'th column,
+ * forming a single row which is the new bignum.
+ *
+ * The purpose of this interleaving is make it impossible to determine which
+ * of the bignums is being used in any one operation by examining the pattern
+ * of cache misses.
+ *
+ * The weaving function does not transpose the entire input matrix in one call.
+ * It transposes 4 rows of mp_ints into their respective columns of output.
+ *
+ * This implementation treats each mp_int bignum as an array of mp_digits,
+ * It stores those bytes as a column of mp_digits in the output matrix.  It
+ * doesn't care if the machine uses big-endian or little-endian byte ordering
+ * within mp_digits.
+ *
+ * "bignums" is an array of mp_ints.
+ * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.
+ *
+ * "weaved" is the weaved output matrix.
+ * The first byte of bignums[0] is stored in weaved[0].
+ *
+ * "nBignums" is the total number of bignums in the array of which "bignums"
+ * is a part.
+ *
+ * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array.
+ * mp_ints that use less than nDigits digits are logically padded with zeros
+ * while being stored in the weaved array.
+ */
+mp_err mpi_to_weave(const mp_int *bignums,
+                    mp_digit *weaved,
+                    mp_size nDigits,  /* in each mp_int of input */
+                    mp_size nBignums) /* in the entire source array */
+{
+    mp_size i;
+    mp_digit *endDest = weaved + (nDigits * nBignums);
+
+    for (i = 0; i < WEAVE_WORD_SIZE; i++) {
+        mp_size used = MP_USED(&bignums[i]);
+        mp_digit *pSrc = MP_DIGITS(&bignums[i]);
+        mp_digit *endSrc = pSrc + used;
+        mp_digit *pDest = weaved + i;
+
+        ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);
+        ARGCHK(used <= nDigits, MP_BADARG);
+
+        for (; pSrc < endSrc; pSrc++) {
+            *pDest = *pSrc;
+            pDest += nBignums;
+        }
+        while (pDest < endDest) {
+            *pDest = 0;
+            pDest += nBignums;
+        }
+    }
+
+    return MP_OKAY;
+}
+
+/*
+ * These functions return 0xffffffff if the output is true, and 0 otherwise.
+ */
+#define CONST_TIME_MSB(x) (0L - ((x) >> (8 * sizeof(x) - 1)))
+#define CONST_TIME_EQ_Z(x) CONST_TIME_MSB(~(x) & ((x)-1))
+#define CONST_TIME_EQ(a, b) CONST_TIME_EQ_Z((a) ^ (b))
+
+/* Reverse the operation above for one mp_int.
+ * Reconstruct one mp_int from its column in the weaved array.
+ * Every read accesses every element of the weaved array, in order to
+ * avoid timing attacks based on patterns of memory accesses.
+ */
+mp_err weave_to_mpi(mp_int *a,              /* out, result */
+                    const mp_digit *weaved, /* in, byte matrix */
+                    mp_size index,          /* which column to read */
+                    mp_size nDigits,        /* number of mp_digits in each bignum */
+                    mp_size nBignums)       /* width of the matrix */
+{
+    /* these are indices, but need to be the same size as mp_digit
+     * because of the CONST_TIME operations */
+    mp_digit i, j;
+    mp_digit d;
+    mp_digit *pDest = MP_DIGITS(a);
+
+    MP_SIGN(a) = MP_ZPOS;
+    MP_USED(a) = nDigits;
+
+    assert(weaved != NULL);
+
+    /* Fetch the proper column in constant time, indexing over the whole array */
+    for (i = 0; i < nDigits; ++i) {
+        d = 0;
+        for (j = 0; j < nBignums; ++j) {
+            d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index);
+        }
+        pDest[i] = d;
+    }
+
+    s_mp_clamp(a);
+    return MP_OKAY;
+}
+
+#define SQR(a, b)             \
+    MP_CHECKOK(mp_sqr(a, b)); \
+    MP_CHECKOK(s_mp_redc(b, mmm))
+
+#if defined(MP_MONT_USE_MP_MUL)
+#define MUL_NOWEAVE(x, a, b)     \
+    MP_CHECKOK(mp_mul(a, x, b)); \
+    MP_CHECKOK(s_mp_redc(b, mmm))
+#else
+#define MUL_NOWEAVE(x, a, b) \
+    MP_CHECKOK(s_mp_mul_mont(a, x, b, mmm))
+#endif
+
+#define MUL(x, a, b)                                               \
+    MP_CHECKOK(weave_to_mpi(&tmp, powers, (x), nLen, num_powers)); \
+    MUL_NOWEAVE(&tmp, a, b)
+
+#define SWAPPA  \
+    ptmp = pa1; \
+    pa1 = pa2;  \
+    pa2 = ptmp
+#define MP_ALIGN(x, y) ((((ptrdiff_t)(x)) + ((y)-1)) & (((ptrdiff_t)0) - (y)))
+
+/* Do modular exponentiation using integer multiply code. */
+mp_err
+mp_exptmod_safe_i(const mp_int *montBase,
+                  const mp_int *exponent,
+                  const mp_int *modulus,
+                  mp_int *result,
+                  mp_mont_modulus *mmm,
+                  int nLen,
+                  mp_size bits_in_exponent,
+                  mp_size window_bits,
+                  mp_size num_powers)
+{
+    mp_int *pa1, *pa2, *ptmp;
+    mp_size i;
+    mp_size first_window;
+    mp_err res;
+    int expOff;
+    mp_int accum1, accum2, accum[WEAVE_WORD_SIZE];
+    mp_int tmp;
+    mp_digit *powersArray = NULL;
+    mp_digit *powers = NULL;
+
+    MP_DIGITS(&accum1) = 0;
+    MP_DIGITS(&accum2) = 0;
+    MP_DIGITS(&accum[0]) = 0;
+    MP_DIGITS(&accum[1]) = 0;
+    MP_DIGITS(&accum[2]) = 0;
+    MP_DIGITS(&accum[3]) = 0;
+    MP_DIGITS(&tmp) = 0;
+
+    /* grab the first window value. This allows us to preload accumulator1
+   * and save a conversion, some squares and a multiple*/
+    MP_CHECKOK(mpl_get_bits(exponent,
+                            bits_in_exponent - window_bits, window_bits));
+    first_window = (mp_size)res;
+
+    MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+    MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
+
+    /* build the first WEAVE_WORD powers inline */
+    /* if WEAVE_WORD_SIZE is not 4, this code will have to change */
+    if (num_powers > 2) {
+        MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2));
+        MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2));
+        MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
+        MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
+        mp_set(&accum[0], 1);
+        MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0]));
+        MP_CHECKOK(mp_copy(montBase, &accum[1]));
+        SQR(montBase, &accum[2]);
+        MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
+        powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1));
+        if (!powersArray) {
+            res = MP_MEM;
+            goto CLEANUP;
+        }
+        /* powers[i] = base ** (i); */
+        powers = (mp_digit *)MP_ALIGN(powersArray, num_powers);
+        MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers));
+        if (first_window < 4) {
+            MP_CHECKOK(mp_copy(&accum[first_window], &accum1));
+            first_window = num_powers;
+        }
+    } else {
+        if (first_window == 0) {
+            mp_set(&accum1, 1);
+            MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+        } else {
+            /* assert first_window == 1? */
+            MP_CHECKOK(mp_copy(montBase, &accum1));
+        }
+    }
+
+    /*
+     * calculate all the powers in the powers array.
+     * this adds 2**(k-1)-2 square operations over just calculating the
+     * odd powers where k is the window size in the two other mp_modexpt
+     * implementations in this file. We will get some of that
+     * back by not needing the first 'k' squares and one multiply for the
+     * first window.
+     * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if
+     * num_powers > 2, in which case powers will have been allocated.
+     */
+    for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {
+        int acc_index = i & (WEAVE_WORD_SIZE - 1); /* i % WEAVE_WORD_SIZE */
+        if (i & 1) {
+            MUL_NOWEAVE(montBase, &accum[acc_index - 1], &accum[acc_index]);
+            /* we've filled the array do our 'per array' processing */
+            if (acc_index == (WEAVE_WORD_SIZE - 1)) {
+                MP_CHECKOK(mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE - 1),
+                                        nLen, num_powers));
+
+                if (first_window <= i) {
+                    MP_CHECKOK(mp_copy(&accum[first_window & (WEAVE_WORD_SIZE - 1)],
+                                       &accum1));
+                    first_window = num_powers;
+                }
+            }
+        } else {
+            /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source
+             * and target are the same so we need to copy.. After that, the
+             * value is overwritten, so we need to fetch it from the stored
+             * weave array */
+            if (i > 2 * WEAVE_WORD_SIZE) {
+                MP_CHECKOK(weave_to_mpi(&accum2, powers, i / 2, nLen, num_powers));
+                SQR(&accum2, &accum[acc_index]);
+            } else {
+                int half_power_index = (i / 2) & (WEAVE_WORD_SIZE - 1);
+                if (half_power_index == acc_index) {
+                    /* copy is cheaper than weave_to_mpi */
+                    MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));
+                    SQR(&accum2, &accum[acc_index]);
+                } else {
+                    SQR(&accum[half_power_index], &accum[acc_index]);
+                }
+            }
+        }
+    }
+/* if the accum1 isn't set, Then there is something wrong with our logic
+   * above and is an internal programming error.
+   */
+#if MP_ARGCHK == 2
+    assert(MP_USED(&accum1) != 0);
+#endif
+
+    /* set accumulator to montgomery residue of 1 */
+    pa1 = &accum1;
+    pa2 = &accum2;
+
+    /* tmp is not used if window_bits == 1. */
+    if (window_bits != 1) {
+        MP_CHECKOK(mp_init_size(&tmp, 3 * nLen + 2));
+    }
+
+    for (expOff = bits_in_exponent - window_bits * 2; expOff >= 0; expOff -= window_bits) {
+        mp_size smallExp;
+        MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+        smallExp = (mp_size)res;
+
+        /* handle unroll the loops */
+        switch (window_bits) {
+            case 1:
+                if (!smallExp) {
+                    SQR(pa1, pa2);
+                    SWAPPA;
+                } else if (smallExp & 1) {
+                    SQR(pa1, pa2);
+                    MUL_NOWEAVE(montBase, pa2, pa1);
+                } else {
+                    abort();
+                }
+                break;
+            case 6:
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+            /* fall through */
+            case 4:
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                MUL(smallExp, pa1, pa2);
+                SWAPPA;
+                break;
+            case 5:
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                SQR(pa2, pa1);
+                SQR(pa1, pa2);
+                MUL(smallExp, pa2, pa1);
+                break;
+            default:
+                abort(); /* could do a loop? */
+        }
+    }
+
+    res = s_mp_redc(pa1, mmm);
+    mp_exch(pa1, result);
+
+CLEANUP:
+    mp_clear(&accum1);
+    mp_clear(&accum2);
+    mp_clear(&accum[0]);
+    mp_clear(&accum[1]);
+    mp_clear(&accum[2]);
+    mp_clear(&accum[3]);
+    mp_clear(&tmp);
+    /* PORT_Memset(powers,0,num_powers*nLen*sizeof(mp_digit)); */
+    free(powersArray);
+    return res;
+}
+#undef SQR
+#undef MUL
+#endif
+
+mp_err
+mp_exptmod(const mp_int *inBase, const mp_int *exponent,
+           const mp_int *modulus, mp_int *result)
+{
+    const mp_int *base;
+    mp_size bits_in_exponent, i, window_bits, odd_ints;
+    mp_err res;
+    int nLen;
+    mp_int montBase, goodBase;
+    mp_mont_modulus mmm;
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+    static unsigned int max_window_bits;
+#endif
+
+    /* function for computing n0prime only works if n0 is odd */
+    if (!mp_isodd(modulus))
+        return s_mp_exptmod(inBase, exponent, modulus, result);
+
+    MP_DIGITS(&montBase) = 0;
+    MP_DIGITS(&goodBase) = 0;
+
+    if (mp_cmp(inBase, modulus) < 0) {
+        base = inBase;
+    } else {
+        MP_CHECKOK(mp_init(&goodBase));
+        base = &goodBase;
+        MP_CHECKOK(mp_mod(inBase, modulus, &goodBase));
+    }
+
+    nLen = MP_USED(modulus);
+    MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2));
+
+    mmm.N = *modulus; /* a copy of the mp_int struct */
+
+    /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
+    **        where n0 = least significant mp_digit of N, the modulus.
+    */
+    mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0));
+
+    MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase));
+
+    bits_in_exponent = mpl_significant_bits(exponent);
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+    if (mp_using_cache_safe_exp) {
+        if (bits_in_exponent > 780)
+            window_bits = 6;
+        else if (bits_in_exponent > 256)
+            window_bits = 5;
+        else if (bits_in_exponent > 20)
+            window_bits = 4;
+        /* RSA public key exponents are typically under 20 bits (common values
+         * are: 3, 17, 65537) and a 4-bit window is inefficient
+         */
+        else
+            window_bits = 1;
+    } else
+#endif
+        if (bits_in_exponent > 480)
+        window_bits = 6;
+    else if (bits_in_exponent > 160)
+        window_bits = 5;
+    else if (bits_in_exponent > 20)
+        window_bits = 4;
+    /* RSA public key exponents are typically under 20 bits (common values
+     * are: 3, 17, 65537) and a 4-bit window is inefficient
+     */
+    else
+        window_bits = 1;
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+    /*
+     * clamp the window size based on
+     * the cache line size.
+     */
+    if (!max_window_bits) {
+        unsigned long cache_size = s_mpi_getProcessorLineSize();
+        /* processor has no cache, use 'fast' code always */
+        if (cache_size == 0) {
+            mp_using_cache_safe_exp = 0;
+        }
+        if ((cache_size == 0) || (cache_size >= 64)) {
+            max_window_bits = 6;
+        } else if (cache_size >= 32) {
+            max_window_bits = 5;
+        } else if (cache_size >= 16) {
+            max_window_bits = 4;
+        } else
+            max_window_bits = 1; /* should this be an assert? */
+    }
+
+    /* clamp the window size down before we caclulate bits_in_exponent */
+    if (mp_using_cache_safe_exp) {
+        if (window_bits > max_window_bits) {
+            window_bits = max_window_bits;
+        }
+    }
+#endif
+
+    odd_ints = 1 << (window_bits - 1);
+    i = bits_in_exponent % window_bits;
+    if (i != 0) {
+        bits_in_exponent += window_bits - i;
+    }
+
+#ifdef MP_USING_MONT_MULF
+    if (mp_using_mont_mulf) {
+        MP_CHECKOK(s_mp_pad(&montBase, nLen));
+        res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen,
+                           bits_in_exponent, window_bits, odd_ints);
+    } else
+#endif
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+        if (mp_using_cache_safe_exp) {
+        res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen,
+                                bits_in_exponent, window_bits, 1 << window_bits);
+    } else
+#endif
+        res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen,
+                           bits_in_exponent, window_bits, odd_ints);
+
+CLEANUP:
+    mp_clear(&montBase);
+    mp_clear(&goodBase);
+    /* Don't mp_clear mmm.N because it is merely a copy of modulus.
+    ** Just zap it.
+    */
+    memset(&mmm, 0, sizeof mmm);
+    return res;
+}
diff --git a/security/nss/lib/freebl/mpi/mpprime.c b/security/nss/lib/freebl/mpi/mpprime.c
new file mode 100644
index 000000000..58287192e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpprime.c
@@ -0,0 +1,599 @@
+/*
+ *  mpprime.c
+ *
+ *  Utilities for finding and working with prime and pseudo-prime
+ *  integers
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define SMALL_TABLE 0 /* determines size of hard-wired prime table */
+
+#define RANDOM() rand()
+
+#include "primes.c" /* pull in the prime digit table */
+
+/*
+   Test if any of a given vector of digits divides a.  If not, MP_NO
+   is returned; otherwise, MP_YES is returned and 'which' is set to
+   the index of the integer in the vector which divided a.
+ */
+mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which);
+
+/* {{{ mpp_divis(a, b) */
+
+/*
+  mpp_divis(a, b)
+
+  Returns MP_YES if a is divisible by b, or MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis(mp_int *a, mp_int *b)
+{
+    mp_err res;
+    mp_int rem;
+
+    if ((res = mp_init(&rem)) != MP_OKAY)
+        return res;
+
+    if ((res = mp_mod(a, b, &rem)) != MP_OKAY)
+        goto CLEANUP;
+
+    if (mp_cmp_z(&rem) == 0)
+        res = MP_YES;
+    else
+        res = MP_NO;
+
+CLEANUP:
+    mp_clear(&rem);
+    return res;
+
+} /* end mpp_divis() */
+
+/* }}} */
+
+/* {{{ mpp_divis_d(a, d) */
+
+/*
+  mpp_divis_d(a, d)
+
+  Return MP_YES if a is divisible by d, or MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis_d(mp_int *a, mp_digit d)
+{
+    mp_err res;
+    mp_digit rem;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    if (d == 0)
+        return MP_NO;
+
+    if ((res = mp_mod_d(a, d, &rem)) != MP_OKAY)
+        return res;
+
+    if (rem == 0)
+        return MP_YES;
+    else
+        return MP_NO;
+
+} /* end mpp_divis_d() */
+
+/* }}} */
+
+/* {{{ mpp_random(a) */
+
+/*
+  mpp_random(a)
+
+  Assigns a random value to a.  This value is generated using the
+  standard C library's rand() function, so it should not be used for
+  cryptographic purposes, but it should be fine for primality testing,
+  since all we really care about there is good statistical properties.
+
+  As many digits as a currently has are filled with random digits.
+ */
+
+mp_err
+mpp_random(mp_int *a)
+
+{
+    mp_digit next = 0;
+    unsigned int ix, jx;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    for (ix = 0; ix < USED(a); ix++) {
+        for (jx = 0; jx < sizeof(mp_digit); jx++) {
+            next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX);
+        }
+        DIGIT(a, ix) = next;
+    }
+
+    return MP_OKAY;
+
+} /* end mpp_random() */
+
+/* }}} */
+
+/* {{{ mpp_random_size(a, prec) */
+
+mp_err
+mpp_random_size(mp_int *a, mp_size prec)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && prec > 0, MP_BADARG);
+
+    if ((res = s_mp_pad(a, prec)) != MP_OKAY)
+        return res;
+
+    return mpp_random(a);
+
+} /* end mpp_random_size() */
+
+/* }}} */
+
+/* {{{ mpp_divis_vector(a, vec, size, which) */
+
+/*
+  mpp_divis_vector(a, vec, size, which)
+
+  Determines if a is divisible by any of the 'size' digits in vec.
+  Returns MP_YES and sets 'which' to the index of the offending digit,
+  if it is; returns MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which)
+{
+    ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG);
+
+    return s_mpp_divp(a, vec, size, which);
+
+} /* end mpp_divis_vector() */
+
+/* }}} */
+
+/* {{{ mpp_divis_primes(a, np) */
+
+/*
+  mpp_divis_primes(a, np)
+
+  Test whether a is divisible by any of the first 'np' primes.  If it
+  is, returns MP_YES and sets *np to the value of the digit that did
+  it.  If not, returns MP_NO.
+ */
+mp_err
+mpp_divis_primes(mp_int *a, mp_digit *np)
+{
+    int size, which;
+    mp_err res;
+
+    ARGCHK(a != NULL && np != NULL, MP_BADARG);
+
+    size = (int)*np;
+    if (size > prime_tab_size)
+        size = prime_tab_size;
+
+    res = mpp_divis_vector(a, prime_tab, size, &which);
+    if (res == MP_YES)
+        *np = prime_tab[which];
+
+    return res;
+
+} /* end mpp_divis_primes() */
+
+/* }}} */
+
+/* {{{ mpp_fermat(a, w) */
+
+/*
+  Using w as a witness, try pseudo-primality testing based on Fermat's
+  little theorem.  If a is prime, and (w, a) = 1, then w^a == w (mod
+  a).  So, we compute z = w^a (mod a) and compare z to w; if they are
+  equal, the test passes and we return MP_YES.  Otherwise, we return
+  MP_NO.
+ */
+mp_err
+mpp_fermat(mp_int *a, mp_digit w)
+{
+    mp_int base, test;
+    mp_err res;
+
+    if ((res = mp_init(&base)) != MP_OKAY)
+        return res;
+
+    mp_set(&base, w);
+
+    if ((res = mp_init(&test)) != MP_OKAY)
+        goto TEST;
+
+    /* Compute test = base^a (mod a) */
+    if ((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY)
+        goto CLEANUP;
+
+    if (mp_cmp(&base, &test) == 0)
+        res = MP_YES;
+    else
+        res = MP_NO;
+
+CLEANUP:
+    mp_clear(&test);
+TEST:
+    mp_clear(&base);
+
+    return res;
+
+} /* end mpp_fermat() */
+
+/* }}} */
+
+/*
+  Perform the fermat test on each of the primes in a list until
+  a) one of them shows a is not prime, or
+  b) the list is exhausted.
+  Returns:  MP_YES if it passes tests.
+        MP_NO  if fermat test reveals it is composite
+        Some MP error code if some other error occurs.
+ */
+mp_err
+mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes)
+{
+    mp_err rv = MP_YES;
+
+    while (nPrimes-- > 0 && rv == MP_YES) {
+        rv = mpp_fermat(a, *primes++);
+    }
+    return rv;
+}
+
+/* {{{ mpp_pprime(a, nt) */
+
+/*
+  mpp_pprime(a, nt)
+
+  Performs nt iteration of the Miller-Rabin probabilistic primality
+  test on a.  Returns MP_YES if the tests pass, MP_NO if one fails.
+  If MP_NO is returned, the number is definitely composite.  If MP_YES
+  is returned, it is probably prime (but that is not guaranteed).
+ */
+
+mp_err
+mpp_pprime(mp_int *a, int nt)
+{
+    mp_err res;
+    mp_int x, amo, m, z; /* "amo" = "a minus one" */
+    int iter;
+    unsigned int jx;
+    mp_size b;
+
+    ARGCHK(a != NULL, MP_BADARG);
+
+    MP_DIGITS(&x) = 0;
+    MP_DIGITS(&amo) = 0;
+    MP_DIGITS(&m) = 0;
+    MP_DIGITS(&z) = 0;
+
+    /* Initialize temporaries... */
+    MP_CHECKOK(mp_init(&amo));
+    /* Compute amo = a - 1 for what follows...    */
+    MP_CHECKOK(mp_sub_d(a, 1, &amo));
+
+    b = mp_trailing_zeros(&amo);
+    if (!b) { /* a was even ? */
+        res = MP_NO;
+        goto CLEANUP;
+    }
+
+    MP_CHECKOK(mp_init_size(&x, MP_USED(a)));
+    MP_CHECKOK(mp_init(&z));
+    MP_CHECKOK(mp_init(&m));
+    MP_CHECKOK(mp_div_2d(&amo, b, &m, 0));
+
+    /* Do the test nt times... */
+    for (iter = 0; iter < nt; iter++) {
+
+        /* Choose a random value for 1 < x < a      */
+        MP_CHECKOK(s_mp_pad(&x, USED(a)));
+        mpp_random(&x);
+        MP_CHECKOK(mp_mod(&x, a, &x));
+        if (mp_cmp_d(&x, 1) <= 0) {
+            iter--;   /* don't count this iteration */
+            continue; /* choose a new x */
+        }
+
+        /* Compute z = (x ** m) mod a               */
+        MP_CHECKOK(mp_exptmod(&x, &m, a, &z));
+
+        if (mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) {
+            res = MP_YES;
+            continue;
+        }
+
+        res = MP_NO; /* just in case the following for loop never executes. */
+        for (jx = 1; jx < b; jx++) {
+            /* z = z^2 (mod a) */
+            MP_CHECKOK(mp_sqrmod(&z, a, &z));
+            res = MP_NO; /* previous line set res to MP_YES */
+
+            if (mp_cmp_d(&z, 1) == 0) {
+                break;
+            }
+            if (mp_cmp(&z, &amo) == 0) {
+                res = MP_YES;
+                break;
+            }
+        } /* end testing loop */
+
+        /* If the test passes, we will continue iterating, but a failed
+           test means the candidate is definitely NOT prime, so we will
+           immediately break out of this loop
+         */
+        if (res == MP_NO)
+            break;
+
+    } /* end iterations loop */
+
+CLEANUP:
+    mp_clear(&m);
+    mp_clear(&z);
+    mp_clear(&x);
+    mp_clear(&amo);
+    return res;
+
+} /* end mpp_pprime() */
+
+/* }}} */
+
+/* Produce table of composites from list of primes and trial value.
+** trial must be odd. List of primes must not include 2.
+** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest
+** prime in list of primes.  After this function is finished,
+** if sieve[i] is non-zero, then (trial + 2*i) is composite.
+** Each prime used in the sieve costs one division of trial, and eliminates
+** one or more values from the search space. (3 eliminates 1/3 of the values
+** alone!)  Each value left in the search space costs 1 or more modular
+** exponentations.  So, these divisions are a bargain!
+*/
+mp_err
+mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
+          unsigned char *sieve, mp_size nSieve)
+{
+    mp_err res;
+    mp_digit rem;
+    mp_size ix;
+    unsigned long offset;
+
+    memset(sieve, 0, nSieve);
+
+    for (ix = 0; ix < nPrimes; ix++) {
+        mp_digit prime = primes[ix];
+        mp_size i;
+        if ((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY)
+            return res;
+
+        if (rem == 0) {
+            offset = 0;
+        } else {
+            offset = prime - rem;
+        }
+
+        for (i = offset; i < nSieve * 2; i += prime) {
+            if (i % 2 == 0) {
+                sieve[i / 2] = 1;
+            }
+        }
+    }
+
+    return MP_OKAY;
+}
+
+#define SIEVE_SIZE 32 * 1024
+
+mp_err
+mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong,
+               unsigned long *nTries)
+{
+    mp_digit np;
+    mp_err res;
+    unsigned int i = 0;
+    mp_int trial;
+    mp_int q;
+    mp_size num_tests;
+    unsigned char *sieve;
+
+    ARGCHK(start != 0, MP_BADARG);
+    ARGCHK(nBits > 16, MP_RANGE);
+
+    sieve = malloc(SIEVE_SIZE);
+    ARGCHK(sieve != NULL, MP_MEM);
+
+    MP_DIGITS(&trial) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_CHECKOK(mp_init(&trial));
+    MP_CHECKOK(mp_init(&q));
+    /* values originally taken from table 4.4,
+   * HandBook of Applied Cryptography, augmented by FIPS-186
+   * requirements, Table C.2 and C.3 */
+    if (nBits >= 2000) {
+        num_tests = 3;
+    } else if (nBits >= 1536) {
+        num_tests = 4;
+    } else if (nBits >= 1024) {
+        num_tests = 5;
+    } else if (nBits >= 550) {
+        num_tests = 6;
+    } else if (nBits >= 450) {
+        num_tests = 7;
+    } else if (nBits >= 400) {
+        num_tests = 8;
+    } else if (nBits >= 350) {
+        num_tests = 9;
+    } else if (nBits >= 300) {
+        num_tests = 10;
+    } else if (nBits >= 250) {
+        num_tests = 20;
+    } else if (nBits >= 200) {
+        num_tests = 41;
+    } else if (nBits >= 100) {
+        num_tests = 38; /* funny anomaly in the FIPS tables, for aux primes, the
+                         * required more iterations for larger aux primes */
+    } else
+        num_tests = 50;
+
+    if (strong)
+        --nBits;
+    MP_CHECKOK(mpl_set_bit(start, nBits - 1, 1));
+    MP_CHECKOK(mpl_set_bit(start, 0, 1));
+    for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) {
+        MP_CHECKOK(mpl_set_bit(start, i, 0));
+    }
+    /* start sieveing with prime value of 3. */
+    MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1,
+                         sieve, SIEVE_SIZE));
+
+#ifdef DEBUG_SIEVE
+    res = 0;
+    for (i = 0; i < SIEVE_SIZE; ++i) {
+        if (!sieve[i])
+            ++res;
+    }
+    fprintf(stderr, "sieve found %d potential primes.\n", res);
+#define FPUTC(x, y) fputc(x, y)
+#else
+#define FPUTC(x, y)
+#endif
+
+    res = MP_NO;
+    for (i = 0; i < SIEVE_SIZE; ++i) {
+        if (sieve[i]) /* this number is composite */
+            continue;
+        MP_CHECKOK(mp_add_d(start, 2 * i, &trial));
+        FPUTC('.', stderr);
+        /* run a Fermat test */
+        res = mpp_fermat(&trial, 2);
+        if (res != MP_OKAY) {
+            if (res == MP_NO)
+                continue; /* was composite */
+            goto CLEANUP;
+        }
+
+        FPUTC('+', stderr);
+        /* If that passed, run some Miller-Rabin tests  */
+        res = mpp_pprime(&trial, num_tests);
+        if (res != MP_OKAY) {
+            if (res == MP_NO)
+                continue; /* was composite */
+            goto CLEANUP;
+        }
+        FPUTC('!', stderr);
+
+        if (!strong)
+            break; /* success !! */
+
+        /* At this point, we have strong evidence that our candidate
+           is itself prime.  If we want a strong prime, we need now
+           to test q = 2p + 1 for primality...
+        */
+        MP_CHECKOK(mp_mul_2(&trial, &q));
+        MP_CHECKOK(mp_add_d(&q, 1, &q));
+
+        /* Test q for small prime divisors ... */
+        np = prime_tab_size;
+        res = mpp_divis_primes(&q, &np);
+        if (res == MP_YES) { /* is composite */
+            mp_clear(&q);
+            continue;
+        }
+        if (res != MP_NO)
+            goto CLEANUP;
+
+        /* And test with Fermat, as with its parent ... */
+        res = mpp_fermat(&q, 2);
+        if (res != MP_YES) {
+            mp_clear(&q);
+            if (res == MP_NO)
+                continue; /* was composite */
+            goto CLEANUP;
+        }
+
+        /* And test with Miller-Rabin, as with its parent ... */
+        res = mpp_pprime(&q, num_tests);
+        if (res != MP_YES) {
+            mp_clear(&q);
+            if (res == MP_NO)
+                continue; /* was composite */
+            goto CLEANUP;
+        }
+
+        /* If it passed, we've got a winner */
+        mp_exch(&q, &trial);
+        mp_clear(&q);
+        break;
+
+    } /* end of loop through sieved values */
+    if (res == MP_YES)
+        mp_exch(&trial, start);
+CLEANUP:
+    mp_clear(&trial);
+    mp_clear(&q);
+    if (nTries)
+        *nTries += i;
+    if (sieve != NULL) {
+        memset(sieve, 0, SIEVE_SIZE);
+        free(sieve);
+    }
+    return res;
+}
+
+/*========================================================================*/
+/*------------------------------------------------------------------------*/
+/* Static functions visible only to the library internally                */
+
+/* {{{ s_mpp_divp(a, vec, size, which) */
+
+/*
+   Test for divisibility by members of a vector of digits.  Returns
+   MP_NO if a is not divisible by any of them; returns MP_YES and sets
+   'which' to the index of the offender, if it is.  Will stop on the
+   first digit against which a is divisible.
+ */
+
+mp_err
+s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which)
+{
+    mp_err res;
+    mp_digit rem;
+
+    int ix;
+
+    for (ix = 0; ix < size; ix++) {
+        if ((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY)
+            return res;
+
+        if (rem == 0) {
+            if (which)
+                *which = ix;
+            return MP_YES;
+        }
+    }
+
+    return MP_NO;
+
+} /* end s_mpp_divp() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS                                                  */
diff --git a/security/nss/lib/freebl/mpi/mpprime.h b/security/nss/lib/freebl/mpi/mpprime.h
new file mode 100644
index 000000000..c47c61836
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpprime.h
@@ -0,0 +1,38 @@
+/*
+ *  mpprime.h
+ *
+ *  Utilities for finding and working with prime and pseudo-prime
+ *  integers
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MP_PRIME_
+#define _H_MP_PRIME_
+
+#include "mpi.h"
+
+extern const int prime_tab_size; /* number of primes available */
+extern const mp_digit prime_tab[];
+
+/* Tests for divisibility    */
+mp_err mpp_divis(mp_int *a, mp_int *b);
+mp_err mpp_divis_d(mp_int *a, mp_digit d);
+
+/* Random selection          */
+mp_err mpp_random(mp_int *a);
+mp_err mpp_random_size(mp_int *a, mp_size prec);
+
+/* Pseudo-primality testing  */
+mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which);
+mp_err mpp_divis_primes(mp_int *a, mp_digit *np);
+mp_err mpp_fermat(mp_int *a, mp_digit w);
+mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes);
+mp_err mpp_pprime(mp_int *a, int nt);
+mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
+                 unsigned char *sieve, mp_size nSieve);
+mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong,
+                      unsigned long *nTries);
+
+#endif /* end _H_MP_PRIME_ */
diff --git a/security/nss/lib/freebl/mpi/mpv_sparc.c b/security/nss/lib/freebl/mpi/mpv_sparc.c
new file mode 100644
index 000000000..423311b65
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparc.c
@@ -0,0 +1,221 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "vis_proto.h"
+
+/***************************************************************/
+
+typedef int t_s32;
+typedef unsigned int t_u32;
+#if defined(__sparcv9)
+typedef long t_s64;
+typedef unsigned long t_u64;
+#else
+typedef long long t_s64;
+typedef unsigned long long t_u64;
+#endif
+typedef double t_d64;
+
+/***************************************************************/
+
+typedef union {
+    t_d64 d64;
+    struct {
+        t_s32 i0;
+        t_s32 i1;
+    } i32s;
+} d64_2_i32;
+
+/***************************************************************/
+
+#define BUFF_SIZE 256
+
+#define A_BITS 19
+#define A_MASK ((1 << A_BITS) - 1)
+
+/***************************************************************/
+
+static t_u64 mask_cnst[] = {
+    0x8000000080000000ull
+};
+
+/***************************************************************/
+
+#define DEF_VARS(N)                     \
+    t_d64 *py = (t_d64 *)y;             \
+    t_d64 mask = *((t_d64 *)mask_cnst); \
+    t_d64 ca = (1u << 31) - 1;          \
+    t_d64 da = (t_d64)a;                \
+    t_s64 buff[N], s;                   \
+    d64_2_i32 dy
+
+/***************************************************************/
+
+#define MUL_U32_S64_2(i)                           \
+    dy.d64 = vis_fxnor(mask, py[i]);               \
+    buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \
+    buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da
+
+#define MUL_U32_S64_2_D(i)                \
+    dy.d64 = vis_fxnor(mask, py[i]);      \
+    d0 = ca - (t_d64)dy.i32s.i0;          \
+    d1 = ca - (t_d64)dy.i32s.i1;          \
+    buff[4 * (i)] = (t_s64)(d0 * da);     \
+    buff[4 * (i) + 1] = (t_s64)(d0 * db); \
+    buff[4 * (i) + 2] = (t_s64)(d1 * da); \
+    buff[4 * (i) + 3] = (t_s64)(d1 * db)
+
+/***************************************************************/
+
+#define ADD_S64_U32(i)      \
+    s = buff[i] + x[i] + c; \
+    z[i] = s;               \
+    c = (s >> 32)
+
+#define ADD_S64_U32_D(i)                                                      \
+    s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \
+    z[i] = s;                                                                 \
+    uc = ((t_u64)s >> 32)
+
+/***************************************************************/
+
+#define MUL_U32_S64_8(i)  \
+    MUL_U32_S64_2(i);     \
+    MUL_U32_S64_2(i + 1); \
+    MUL_U32_S64_2(i + 2); \
+    MUL_U32_S64_2(i + 3)
+
+#define MUL_U32_S64_D_8(i)  \
+    MUL_U32_S64_2_D(i);     \
+    MUL_U32_S64_2_D(i + 1); \
+    MUL_U32_S64_2_D(i + 2); \
+    MUL_U32_S64_2_D(i + 3)
+
+/***************************************************************/
+
+#define ADD_S64_U32_8(i) \
+    ADD_S64_U32(i);      \
+    ADD_S64_U32(i + 1);  \
+    ADD_S64_U32(i + 2);  \
+    ADD_S64_U32(i + 3);  \
+    ADD_S64_U32(i + 4);  \
+    ADD_S64_U32(i + 5);  \
+    ADD_S64_U32(i + 6);  \
+    ADD_S64_U32(i + 7)
+
+#define ADD_S64_U32_D_8(i) \
+    ADD_S64_U32_D(i);      \
+    ADD_S64_U32_D(i + 1);  \
+    ADD_S64_U32_D(i + 2);  \
+    ADD_S64_U32_D(i + 3);  \
+    ADD_S64_U32_D(i + 4);  \
+    ADD_S64_U32_D(i + 5);  \
+    ADD_S64_U32_D(i + 6);  \
+    ADD_S64_U32_D(i + 7)
+
+/***************************************************************/
+
+t_u32
+mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
+{
+    if (a < (1 << A_BITS)) {
+
+        if (n == 8) {
+            DEF_VARS(8);
+            t_s32 c = 0;
+
+            MUL_U32_S64_8(0);
+            ADD_S64_U32_8(0);
+
+            return c;
+
+        } else if (n == 16) {
+            DEF_VARS(16);
+            t_s32 c = 0;
+
+            MUL_U32_S64_8(0);
+            MUL_U32_S64_8(4);
+            ADD_S64_U32_8(0);
+            ADD_S64_U32_8(8);
+
+            return c;
+
+        } else {
+            DEF_VARS(BUFF_SIZE);
+            t_s32 i, c = 0;
+
+#pragma pipeloop(0)
+            for (i = 0; i < (n + 1) / 2; i++) {
+                MUL_U32_S64_2(i);
+            }
+
+#pragma pipeloop(0)
+            for (i = 0; i < n; i++) {
+                ADD_S64_U32(i);
+            }
+
+            return c;
+        }
+    } else {
+
+        if (n == 8) {
+            DEF_VARS(2 * 8);
+            t_d64 d0, d1, db;
+            t_u32 uc = 0;
+
+            da = (t_d64)(a & A_MASK);
+            db = (t_d64)(a >> A_BITS);
+
+            MUL_U32_S64_D_8(0);
+            ADD_S64_U32_D_8(0);
+
+            return uc;
+
+        } else if (n == 16) {
+            DEF_VARS(2 * 16);
+            t_d64 d0, d1, db;
+            t_u32 uc = 0;
+
+            da = (t_d64)(a & A_MASK);
+            db = (t_d64)(a >> A_BITS);
+
+            MUL_U32_S64_D_8(0);
+            MUL_U32_S64_D_8(4);
+            ADD_S64_U32_D_8(0);
+            ADD_S64_U32_D_8(8);
+
+            return uc;
+
+        } else {
+            DEF_VARS(2 * BUFF_SIZE);
+            t_d64 d0, d1, db;
+            t_u32 i, uc = 0;
+
+            da = (t_d64)(a & A_MASK);
+            db = (t_d64)(a >> A_BITS);
+
+#pragma pipeloop(0)
+            for (i = 0; i < (n + 1) / 2; i++) {
+                MUL_U32_S64_2_D(i);
+            }
+
+#pragma pipeloop(0)
+            for (i = 0; i < n; i++) {
+                ADD_S64_U32_D(i);
+            }
+
+            return uc;
+        }
+    }
+}
+
+/***************************************************************/
+
+t_u32
+mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
+{
+    return mul_add(x, x, y, n, a);
+}
+
+/***************************************************************/
diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv8.s b/security/nss/lib/freebl/mpi/mpv_sparcv8.s
new file mode 100644
index 000000000..66122a1d9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparcv8.s
@@ -0,0 +1,1607 @@
+! Inner multiply loop functions for hybrid 32/64-bit Sparc v8plus CPUs.
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   3 ( 0  0) */		.file	"mpv_sparc.c"
+/* 000000	  14 ( 0  0) */		.align	8
+!
+! SUBROUTINE .L_const_seg_900000106
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+        .L_const_seg_900000106:		/* frequency 1.0 confidence 0.0 */
+/* 000000	  19 ( 0  0) */		.word	1127219200,0
+/* 0x0008	  20 ( 0  0) */		.word	1105199103,-4194304
+/* 0x0010	  21 ( 0  0) */		.align	16
+/* 0x0010	  27 ( 0  0) */		.global	mul_add
+
+!
+! ENTRY mul_add
+!
+
+        .global mul_add
+        mul_add:		/* frequency 1.0 confidence 0.0 */
+/* 0x0010	  29 ( 0  1) */		sethi	%hi(0x1800),%g1
+/* 0x0014	  30 ( 0  1) */		sethi	%hi(mask_cnst),%g2
+/* 0x0018	  31 ( 1  2) */		xor	%g1,-984,%g1
+/* 0x001c	  32 ( 1  2) */		add	%g2,%lo(mask_cnst),%g2
+/* 0x0020	  33 ( 2  4) */		save	%sp,%g1,%sp
+
+!
+! ENTRY .L900000154
+!
+
+        .L900000154:		/* frequency 1.0 confidence 0.0 */
+/* 0x0024	  35 ( 0  2) */		call	(.+0x8)	! params = 	! Result = 
+/* 0x0028	     ( 1  2) */		sethi	%hi((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5
+/* 0x002c	 177 ( 2  3) */		sethi	%hi(.L_const_seg_900000106),%g3
+/* 0x0030	 178 ( 2  3) */		add	%g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5
+/* 0x0034	 179 ( 3  4) */		or	%g0,%i4,%o1
+/* 0x0038	 180 ( 3  4) */		st	%o1,[%fp+84]
+/* 0x003c	 181 ( 3  4) */		add	%g5,%o7,%o3
+/* 0x0040	 182 ( 4  5) */		add	%g3,%lo(.L_const_seg_900000106),%g3
+/* 0x0044	 183 ( 4  6) */		ld	[%o3+%g2],%g2
+/* 0x0048	 184 ( 4  5) */		or	%g0,%i3,%o2
+/* 0x004c	 185 ( 5  6) */		sethi	%hi(0x80000),%g4
+/* 0x0050	 186 ( 5  7) */		ld	[%o3+%g3],%o0
+/* 0x0054	 187 ( 5  6) */		or	%g0,%i2,%g5
+/* 0x0058	 188 ( 6  7) */		or	%g0,%o2,%o3
+/* 0x005c	 189 ( 6 10) */		ldd	[%g2],%f0
+/* 0x0060	 190 ( 6  7) */		subcc	%o1,%g4,%g0
+/* 0x0064	 191 ( 6  7) */		bcc,pn	%icc,.L77000048	! tprob=0.50
+/* 0x0068	     ( 7  8) */		subcc	%o2,8,%g0
+/* 0x006c	 193 ( 7  8) */		bne,pn	%icc,.L77000037	! tprob=0.50
+/* 0x0070	     ( 8 12) */		ldd	[%o0],%f8
+/* 0x0074	 195 ( 9 13) */		ldd	[%g5],%f4
+/* 0x0078	 196 (10 14) */		ldd	[%g5+8],%f6
+/* 0x007c	 197 (11 15) */		ldd	[%g5+16],%f10
+/* 0x0080	 198 (11 14) */		fmovs	%f8,%f12
+/* 0x0084	 199 (12 16) */		fxnor	%f0,%f4,%f4
+/* 0x0088	 200 (12 14) */		ld	[%fp+84],%f13
+/* 0x008c	 201 (13 17) */		ldd	[%o0+8],%f14
+/* 0x0090	 202 (13 17) */		fxnor	%f0,%f6,%f6
+/* 0x0094	 203 (14 18) */		ldd	[%g5+24],%f16
+/* 0x0098	 204 (14 18) */		fxnor	%f0,%f10,%f10
+/* 0x009c	 208 (15 17) */		ld	[%i1],%g2
+/* 0x00a0	 209 (15 20) */		fsubd	%f12,%f8,%f8
+/* 0x00a4	 210 (16 21) */		fitod	%f4,%f18
+/* 0x00a8	 211 (16 18) */		ld	[%i1+4],%g3
+/* 0x00ac	 212 (17 22) */		fitod	%f5,%f4
+/* 0x00b0	 213 (17 19) */		ld	[%i1+8],%g4
+/* 0x00b4	 214 (18 23) */		fitod	%f6,%f20
+/* 0x00b8	 215 (18 20) */		ld	[%i1+12],%g5
+/* 0x00bc	 216 (19 21) */		ld	[%i1+16],%o0
+/* 0x00c0	 217 (19 24) */		fitod	%f7,%f6
+/* 0x00c4	 218 (20 22) */		ld	[%i1+20],%o1
+/* 0x00c8	 219 (20 24) */		fxnor	%f0,%f16,%f16
+/* 0x00cc	 220 (21 26) */		fsubd	%f14,%f18,%f12
+/* 0x00d0	 221 (21 23) */		ld	[%i1+24],%o2
+/* 0x00d4	 222 (22 27) */		fsubd	%f14,%f4,%f4
+/* 0x00d8	 223 (22 24) */		ld	[%i1+28],%o3
+/* 0x00dc	 224 (23 28) */		fitod	%f10,%f18
+/* 0x00e0	 225 (24 29) */		fsubd	%f14,%f20,%f20
+/* 0x00e4	 226 (25 30) */		fitod	%f11,%f10
+/* 0x00e8	 227 (26 31) */		fsubd	%f14,%f6,%f6
+/* 0x00ec	 228 (26 31) */		fmuld	%f12,%f8,%f12
+/* 0x00f0	 229 (27 32) */		fitod	%f16,%f22
+/* 0x00f4	 230 (27 32) */		fmuld	%f4,%f8,%f4
+/* 0x00f8	 231 (28 33) */		fsubd	%f14,%f18,%f18
+/* 0x00fc	 232 (29 34) */		fitod	%f17,%f16
+/* 0x0100	 233 (29 34) */		fmuld	%f20,%f8,%f20
+/* 0x0104	 234 (30 35) */		fsubd	%f14,%f10,%f10
+/* 0x0108	 235 (31 36) */		fdtox	%f12,%f12
+/* 0x010c	 236 (31 32) */		std	%f12,[%sp+152]
+/* 0x0110	 237 (31 36) */		fmuld	%f6,%f8,%f6
+/* 0x0114	 238 (32 37) */		fdtox	%f4,%f4
+/* 0x0118	 239 (32 33) */		std	%f4,[%sp+144]
+/* 0x011c	 240 (33 38) */		fsubd	%f14,%f22,%f4
+/* 0x0120	 241 (33 38) */		fmuld	%f18,%f8,%f12
+/* 0x0124	 242 (34 39) */		fdtox	%f20,%f18
+/* 0x0128	 243 (34 35) */		std	%f18,[%sp+136]
+/* 0x012c	 244 (35 37) */		ldx	[%sp+152],%o4
+/* 0x0130	 245 (35 40) */		fsubd	%f14,%f16,%f14
+/* 0x0134	 246 (35 40) */		fmuld	%f10,%f8,%f10
+/* 0x0138	 247 (36 41) */		fdtox	%f6,%f6
+/* 0x013c	 248 (36 37) */		std	%f6,[%sp+128]
+/* 0x0140	 249 (37 39) */		ldx	[%sp+144],%o5
+/* 0x0144	 250 (37 38) */		add	%o4,%g2,%o4
+/* 0x0148	 251 (38 39) */		st	%o4,[%i0]
+/* 0x014c	 252 (38 39) */		srax	%o4,32,%g2
+/* 0x0150	 253 (38 43) */		fdtox	%f12,%f6
+/* 0x0154	 254 (38 43) */		fmuld	%f4,%f8,%f4
+/* 0x0158	 255 (39 40) */		std	%f6,[%sp+120]
+/* 0x015c	 256 (39 40) */		add	%o5,%g3,%g3
+/* 0x0160	 257 (40 42) */		ldx	[%sp+136],%o7
+/* 0x0164	 258 (40 41) */		add	%g3,%g2,%g2
+/* 0x0168	 259 (40 45) */		fmuld	%f14,%f8,%f6
+/* 0x016c	 260 (40 45) */		fdtox	%f10,%f8
+/* 0x0170	 261 (41 42) */		std	%f8,[%sp+112]
+/* 0x0174	 262 (41 42) */		srax	%g2,32,%o5
+/* 0x0178	 263 (42 44) */		ldx	[%sp+128],%g3
+/* 0x017c	 264 (42 43) */		add	%o7,%g4,%g4
+/* 0x0180	 265 (43 44) */		st	%g2,[%i0+4]
+/* 0x0184	 266 (43 44) */		add	%g4,%o5,%g4
+/* 0x0188	 267 (43 48) */		fdtox	%f4,%f4
+/* 0x018c	 268 (44 46) */		ldx	[%sp+120],%o5
+/* 0x0190	 269 (44 45) */		add	%g3,%g5,%g3
+/* 0x0194	 270 (44 45) */		srax	%g4,32,%g5
+/* 0x0198	 271 (45 46) */		std	%f4,[%sp+104]
+/* 0x019c	 272 (45 46) */		add	%g3,%g5,%g3
+/* 0x01a0	 273 (45 50) */		fdtox	%f6,%f4
+/* 0x01a4	 274 (46 47) */		std	%f4,[%sp+96]
+/* 0x01a8	 275 (46 47) */		add	%o5,%o0,%o0
+/* 0x01ac	 276 (46 47) */		srax	%g3,32,%o5
+/* 0x01b0	 277 (47 49) */		ldx	[%sp+112],%g5
+/* 0x01b4	 278 (47 48) */		add	%o0,%o5,%o0
+/* 0x01b8	 279 (48 49) */		st	%g4,[%i0+8]
+/* 0x01bc	 280 (49 51) */		ldx	[%sp+104],%o5
+/* 0x01c0	 281 (49 50) */		add	%g5,%o1,%o1
+/* 0x01c4	 282 (49 50) */		srax	%o0,32,%g5
+/* 0x01c8	 283 (50 51) */		st	%o0,[%i0+16]
+/* 0x01cc	 284 (50 51) */		add	%o1,%g5,%o1
+/* 0x01d0	 285 (51 53) */		ldx	[%sp+96],%g5
+/* 0x01d4	 286 (51 52) */		add	%o5,%o2,%o2
+/* 0x01d8	 287 (51 52) */		srax	%o1,32,%o5
+/* 0x01dc	 288 (52 53) */		st	%o1,[%i0+20]
+/* 0x01e0	 289 (52 53) */		add	%o2,%o5,%o2
+/* 0x01e4	 290 (53 54) */		st	%o2,[%i0+24]
+/* 0x01e8	 291 (53 54) */		srax	%o2,32,%g4
+/* 0x01ec	 292 (53 54) */		add	%g5,%o3,%g2
+/* 0x01f0	 293 (54 55) */		st	%g3,[%i0+12]
+/* 0x01f4	 294 (54 55) */		add	%g2,%g4,%g2
+/* 0x01f8	 295 (55 56) */		st	%g2,[%i0+28]
+/* 0x01fc	 299 (55 56) */		srax	%g2,32,%o7
+/* 0x0200	 300 (56 57) */		or	%g0,%o7,%i0
+/* 0x0204	     (57 64) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x0208	     (59 61) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L77000037
+!
+
+        .L77000037:		/* frequency 1.0 confidence 0.0 */
+/* 0x020c	 307 ( 0  1) */		subcc	%o2,16,%g0
+/* 0x0210	 308 ( 0  1) */		bne,pn	%icc,.L77000076	! tprob=0.50
+/* 0x0214	     ( 1  5) */		ldd	[%o0],%f8
+/* 0x0218	 310 ( 2  6) */		ldd	[%g5],%f4
+/* 0x021c	 311 ( 3  7) */		ldd	[%g5+8],%f6
+/* 0x0220	 317 ( 4  8) */		ldd	[%o0+8],%f14
+/* 0x0224	 318 ( 4  7) */		fmovs	%f8,%f12
+/* 0x0228	 319 ( 5  7) */		ld	[%fp+84],%f13
+/* 0x022c	 320 ( 5  9) */		fxnor	%f0,%f4,%f4
+/* 0x0230	 321 ( 6 10) */		ldd	[%g5+16],%f10
+/* 0x0234	 322 ( 6 10) */		fxnor	%f0,%f6,%f6
+/* 0x0238	 323 ( 7 11) */		ldd	[%g5+24],%f16
+/* 0x023c	 324 ( 8 12) */		ldd	[%g5+32],%f20
+/* 0x0240	 325 ( 8 13) */		fsubd	%f12,%f8,%f8
+/* 0x0244	 331 ( 9 11) */		ld	[%i1+40],%o7
+/* 0x0248	 332 ( 9 14) */		fitod	%f4,%f18
+/* 0x024c	 333 (10 14) */		ldd	[%g5+40],%f22
+/* 0x0250	 334 (10 15) */		fitod	%f5,%f4
+/* 0x0254	 335 (11 12) */		stx	%o7,[%sp+96]
+/* 0x0258	 336 (11 16) */		fitod	%f6,%f24
+/* 0x025c	 337 (12 14) */		ld	[%i1+44],%o7
+/* 0x0260	 338 (12 16) */		fxnor	%f0,%f10,%f10
+/* 0x0264	 339 (13 17) */		ldd	[%g5+48],%f26
+/* 0x0268	 340 (13 18) */		fitod	%f7,%f6
+/* 0x026c	 341 (14 15) */		stx	%o7,[%sp+104]
+/* 0x0270	 342 (14 19) */		fsubd	%f14,%f18,%f18
+/* 0x0274	 343 (15 17) */		ld	[%i1+48],%o7
+/* 0x0278	 344 (15 20) */		fsubd	%f14,%f4,%f4
+/* 0x027c	 345 (16 18) */		ld	[%i1+36],%o5
+/* 0x0280	 346 (16 21) */		fitod	%f10,%f28
+/* 0x0284	 347 (17 18) */		stx	%o7,[%sp+112]
+/* 0x0288	 348 (17 21) */		fxnor	%f0,%f16,%f16
+/* 0x028c	 349 (18 20) */		ld	[%i1],%g2
+/* 0x0290	 350 (18 23) */		fsubd	%f14,%f24,%f24
+/* 0x0294	 351 (19 20) */		stx	%o5,[%sp+120]
+/* 0x0298	 352 (19 24) */		fitod	%f11,%f10
+/* 0x029c	 353 (19 24) */		fmuld	%f18,%f8,%f18
+/* 0x02a0	 354 (20 22) */		ld	[%i1+52],%o5
+/* 0x02a4	 355 (20 25) */		fsubd	%f14,%f6,%f6
+/* 0x02a8	 356 (20 25) */		fmuld	%f4,%f8,%f4
+/* 0x02ac	 357 (21 26) */		fitod	%f16,%f30
+/* 0x02b0	 358 (22 26) */		fxnor	%f0,%f20,%f20
+/* 0x02b4	 359 (22 24) */		ld	[%i1+4],%g3
+/* 0x02b8	 360 (23 27) */		ldd	[%g5+56],%f2
+/* 0x02bc	 361 (23 28) */		fsubd	%f14,%f28,%f28
+/* 0x02c0	 362 (23 28) */		fmuld	%f24,%f8,%f24
+/* 0x02c4	 363 (24 25) */		stx	%o5,[%sp+128]
+/* 0x02c8	 364 (24 29) */		fdtox	%f18,%f18
+/* 0x02cc	 365 (25 26) */		std	%f18,[%sp+272]
+/* 0x02d0	 366 (25 30) */		fitod	%f17,%f16
+/* 0x02d4	 367 (25 30) */		fmuld	%f6,%f8,%f6
+/* 0x02d8	 368 (26 31) */		fsubd	%f14,%f10,%f10
+/* 0x02dc	 369 (27 32) */		fitod	%f20,%f18
+/* 0x02e0	 370 (28 33) */		fdtox	%f4,%f4
+/* 0x02e4	 371 (28 29) */		std	%f4,[%sp+264]
+/* 0x02e8	 372 (28 33) */		fmuld	%f28,%f8,%f28
+/* 0x02ec	 373 (29 31) */		ld	[%i1+8],%g4
+/* 0x02f0	 374 (29 34) */		fsubd	%f14,%f30,%f4
+/* 0x02f4	 375 (30 34) */		fxnor	%f0,%f22,%f22
+/* 0x02f8	 376 (30 32) */		ld	[%i1+12],%g5
+/* 0x02fc	 377 (31 33) */		ld	[%i1+16],%o0
+/* 0x0300	 378 (31 36) */		fitod	%f21,%f20
+/* 0x0304	 379 (31 36) */		fmuld	%f10,%f8,%f10
+/* 0x0308	 380 (32 34) */		ld	[%i1+20],%o1
+/* 0x030c	 381 (32 37) */		fdtox	%f24,%f24
+/* 0x0310	 382 (33 34) */		std	%f24,[%sp+256]
+/* 0x0314	 383 (33 38) */		fsubd	%f14,%f16,%f16
+/* 0x0318	 384 (34 36) */		ldx	[%sp+272],%o7
+/* 0x031c	 385 (34 39) */		fdtox	%f6,%f6
+/* 0x0320	 386 (34 39) */		fmuld	%f4,%f8,%f4
+/* 0x0324	 387 (35 36) */		std	%f6,[%sp+248]
+/* 0x0328	 388 (35 40) */		fitod	%f22,%f24
+/* 0x032c	 389 (36 38) */		ld	[%i1+32],%o4
+/* 0x0330	 390 (36 41) */		fsubd	%f14,%f18,%f6
+/* 0x0334	 391 (36 37) */		add	%o7,%g2,%g2
+/* 0x0338	 392 (37 39) */		ldx	[%sp+264],%o7
+/* 0x033c	 393 (37 41) */		fxnor	%f0,%f26,%f26
+/* 0x0340	 394 (37 38) */		srax	%g2,32,%o5
+/* 0x0344	 395 (38 39) */		st	%g2,[%i0]
+/* 0x0348	 396 (38 43) */		fitod	%f23,%f18
+/* 0x034c	 397 (38 43) */		fmuld	%f16,%f8,%f16
+/* 0x0350	 398 (39 41) */		ldx	[%sp+248],%g2
+/* 0x0354	 399 (39 44) */		fdtox	%f28,%f22
+/* 0x0358	 400 (39 40) */		add	%o7,%g3,%g3
+/* 0x035c	 401 (40 42) */		ldx	[%sp+256],%o7
+/* 0x0360	 402 (40 45) */		fsubd	%f14,%f20,%f20
+/* 0x0364	 403 (40 41) */		add	%g3,%o5,%g3
+/* 0x0368	 404 (41 42) */		std	%f22,[%sp+240]
+/* 0x036c	 405 (41 46) */		fitod	%f26,%f22
+/* 0x0370	 406 (41 42) */		srax	%g3,32,%o5
+/* 0x0374	 407 (41 42) */		add	%g2,%g5,%g2
+/* 0x0378	 408 (42 43) */		st	%g3,[%i0+4]
+/* 0x037c	 409 (42 47) */		fdtox	%f10,%f10
+/* 0x0380	 410 (42 43) */		add	%o7,%g4,%g4
+/* 0x0384	 411 (42 47) */		fmuld	%f6,%f8,%f6
+/* 0x0388	 412 (43 44) */		std	%f10,[%sp+232]
+/* 0x038c	 413 (43 47) */		fxnor	%f0,%f2,%f12
+/* 0x0390	 414 (43 44) */		add	%g4,%o5,%g4
+/* 0x0394	 415 (44 45) */		st	%g4,[%i0+8]
+/* 0x0398	 416 (44 45) */		srax	%g4,32,%o5
+/* 0x039c	 417 (44 49) */		fsubd	%f14,%f24,%f10
+/* 0x03a0	 418 (45 47) */		ldx	[%sp+240],%o7
+/* 0x03a4	 419 (45 50) */		fdtox	%f4,%f4
+/* 0x03a8	 420 (45 46) */		add	%g2,%o5,%g2
+/* 0x03ac	 421 (45 50) */		fmuld	%f20,%f8,%f20
+/* 0x03b0	 422 (46 47) */		std	%f4,[%sp+224]
+/* 0x03b4	 423 (46 47) */		srax	%g2,32,%g5
+/* 0x03b8	 424 (46 51) */		fsubd	%f14,%f18,%f4
+/* 0x03bc	 425 (47 48) */		st	%g2,[%i0+12]
+/* 0x03c0	 426 (47 52) */		fitod	%f27,%f24
+/* 0x03c4	 427 (47 48) */		add	%o7,%o0,%g3
+/* 0x03c8	 428 (48 50) */		ldx	[%sp+232],%o5
+/* 0x03cc	 429 (48 53) */		fdtox	%f16,%f16
+/* 0x03d0	 430 (48 49) */		add	%g3,%g5,%g2
+/* 0x03d4	 431 (49 50) */		std	%f16,[%sp+216]
+/* 0x03d8	 432 (49 50) */		srax	%g2,32,%g4
+/* 0x03dc	 433 (49 54) */		fitod	%f12,%f18
+/* 0x03e0	 434 (49 54) */		fmuld	%f10,%f8,%f10
+/* 0x03e4	 435 (50 51) */		st	%g2,[%i0+16]
+/* 0x03e8	 436 (50 55) */		fsubd	%f14,%f22,%f16
+/* 0x03ec	 437 (50 51) */		add	%o5,%o1,%g2
+/* 0x03f0	 438 (51 53) */		ld	[%i1+24],%o2
+/* 0x03f4	 439 (51 56) */		fitod	%f13,%f12
+/* 0x03f8	 440 (51 52) */		add	%g2,%g4,%g2
+/* 0x03fc	 441 (51 56) */		fmuld	%f4,%f8,%f22
+/* 0x0400	 442 (52 54) */		ldx	[%sp+224],%g3
+/* 0x0404	 443 (52 53) */		srax	%g2,32,%g4
+/* 0x0408	 444 (52 57) */		fdtox	%f6,%f6
+/* 0x040c	 445 (53 54) */		std	%f6,[%sp+208]
+/* 0x0410	 446 (53 58) */		fdtox	%f20,%f6
+/* 0x0414	 447 (54 55) */		stx	%o4,[%sp+136]
+/* 0x0418	 448 (54 59) */		fsubd	%f14,%f24,%f4
+/* 0x041c	 449 (55 56) */		std	%f6,[%sp+200]
+/* 0x0420	 450 (55 60) */		fsubd	%f14,%f18,%f6
+/* 0x0424	 451 (55 60) */		fmuld	%f16,%f8,%f16
+/* 0x0428	 452 (56 57) */		st	%g2,[%i0+20]
+/* 0x042c	 453 (56 57) */		add	%g3,%o2,%g2
+/* 0x0430	 454 (56 61) */		fdtox	%f10,%f10
+/* 0x0434	 455 (57 59) */		ld	[%i1+28],%o3
+/* 0x0438	 456 (57 58) */		add	%g2,%g4,%g2
+/* 0x043c	 457 (58 60) */		ldx	[%sp+216],%g5
+/* 0x0440	 458 (58 59) */		srax	%g2,32,%g4
+/* 0x0444	 459 (59 60) */		std	%f10,[%sp+192]
+/* 0x0448	 460 (59 64) */		fsubd	%f14,%f12,%f10
+/* 0x044c	 461 (59 64) */		fmuld	%f4,%f8,%f4
+/* 0x0450	 462 (60 61) */		st	%g2,[%i0+24]
+/* 0x0454	 463 (60 61) */		add	%g5,%o3,%g2
+/* 0x0458	 464 (60 65) */		fdtox	%f22,%f12
+/* 0x045c	 465 (60 65) */		fmuld	%f6,%f8,%f6
+/* 0x0460	 466 (61 63) */		ldx	[%sp+136],%o0
+/* 0x0464	 467 (61 62) */		add	%g2,%g4,%g2
+/* 0x0468	 468 (62 64) */		ldx	[%sp+208],%g3
+/* 0x046c	 469 (62 63) */		srax	%g2,32,%g4
+/* 0x0470	 470 (63 65) */		ldx	[%sp+120],%o1
+/* 0x0474	 471 (64 66) */		ldx	[%sp+200],%g5
+/* 0x0478	 472 (64 65) */		add	%g3,%o0,%g3
+/* 0x047c	 473 (64 69) */		fdtox	%f4,%f4
+/* 0x0480	 474 (64 69) */		fmuld	%f10,%f8,%f8
+/* 0x0484	 475 (65 66) */		std	%f12,[%sp+184]
+/* 0x0488	 476 (65 66) */		add	%g3,%g4,%g3
+/* 0x048c	 477 (65 70) */		fdtox	%f16,%f12
+/* 0x0490	 478 (66 67) */		std	%f12,[%sp+176]
+/* 0x0494	 479 (66 67) */		srax	%g3,32,%o0
+/* 0x0498	 480 (66 67) */		add	%g5,%o1,%g5
+/* 0x049c	 481 (67 69) */		ldx	[%sp+192],%o2
+/* 0x04a0	 482 (67 68) */		add	%g5,%o0,%g5
+/* 0x04a4	 483 (68 70) */		ldx	[%sp+96],%g4
+/* 0x04a8	 484 (68 69) */		srax	%g5,32,%o1
+/* 0x04ac	 485 (69 71) */		ld	[%i1+56],%o4
+/* 0x04b0	 486 (70 72) */		ldx	[%sp+104],%o0
+/* 0x04b4	 487 (70 71) */		add	%o2,%g4,%g4
+/* 0x04b8	 488 (71 72) */		std	%f4,[%sp+168]
+/* 0x04bc	 489 (71 72) */		add	%g4,%o1,%g4
+/* 0x04c0	 490 (71 76) */		fdtox	%f6,%f4
+/* 0x04c4	 491 (72 74) */		ldx	[%sp+184],%o3
+/* 0x04c8	 492 (72 73) */		srax	%g4,32,%o2
+/* 0x04cc	 493 (73 75) */		ldx	[%sp+112],%o1
+/* 0x04d0	 494 (74 75) */		std	%f4,[%sp+160]
+/* 0x04d4	 495 (74 75) */		add	%o3,%o0,%o0
+/* 0x04d8	 496 (74 79) */		fdtox	%f8,%f4
+/* 0x04dc	 497 (75 77) */		ldx	[%sp+176],%o5
+/* 0x04e0	 498 (75 76) */		add	%o0,%o2,%o0
+/* 0x04e4	 499 (76 77) */		stx	%o4,[%sp+144]
+/* 0x04e8	 500 (77 78) */		st	%g2,[%i0+28]
+/* 0x04ec	 501 (77 78) */		add	%o5,%o1,%g2
+/* 0x04f0	 502 (77 78) */		srax	%o0,32,%o1
+/* 0x04f4	 503 (78 79) */		std	%f4,[%sp+152]
+/* 0x04f8	 504 (78 79) */		add	%g2,%o1,%o1
+/* 0x04fc	 505 (79 81) */		ldx	[%sp+168],%o7
+/* 0x0500	 506 (79 80) */		srax	%o1,32,%o3
+/* 0x0504	 507 (80 82) */		ldx	[%sp+128],%o2
+/* 0x0508	 508 (81 83) */		ld	[%i1+60],%o4
+/* 0x050c	 509 (82 83) */		add	%o7,%o2,%o2
+/* 0x0510	 510 (83 84) */		add	%o2,%o3,%o2
+/* 0x0514	 511 (83 85) */		ldx	[%sp+144],%o5
+/* 0x0518	 512 (84 86) */		ldx	[%sp+160],%g2
+/* 0x051c	 513 (85 87) */		ldx	[%sp+152],%o3
+/* 0x0520	 514 (86 87) */		st	%g3,[%i0+32]
+/* 0x0524	 515 (86 87) */		add	%g2,%o5,%g2
+/* 0x0528	 516 (86 87) */		srax	%o2,32,%o5
+/* 0x052c	 517 (87 88) */		st	%g5,[%i0+36]
+/* 0x0530	 518 (87 88) */		add	%g2,%o5,%g2
+/* 0x0534	 519 (87 88) */		add	%o3,%o4,%g3
+/* 0x0538	 520 (88 89) */		st	%o0,[%i0+44]
+/* 0x053c	 521 (88 89) */		srax	%g2,32,%g5
+/* 0x0540	 522 (89 90) */		st	%o1,[%i0+48]
+/* 0x0544	 523 (89 90) */		add	%g3,%g5,%g3
+/* 0x0548	 524 (90 91) */		st	%o2,[%i0+52]
+/* 0x054c	 528 (90 91) */		srax	%g3,32,%o7
+/* 0x0550	 529 (91 92) */		st	%g4,[%i0+40]
+/* 0x0554	 530 (92 93) */		st	%g2,[%i0+56]
+/* 0x0558	 531 (93 94) */		st	%g3,[%i0+60]
+/* 0x055c	 532 (93 94) */		or	%g0,%o7,%i0
+/* 0x0560	     (94 101) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x0564	     (96 98) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L77000076
+!
+
+        .L77000076:		/* frequency 1.0 confidence 0.0 */
+/* 0x0568	 540 ( 0  4) */		ldd	[%o0],%f6
+/* 0x056c	 546 ( 0  1) */		add	%o2,1,%g2
+/* 0x0570	 547 ( 0  3) */		fmovd	%f0,%f14
+/* 0x0574	 548 ( 0  1) */		or	%g0,0,%o7
+/* 0x0578	 549 ( 1  3) */		ld	[%fp+84],%f9
+/* 0x057c	 550 ( 1  2) */		srl	%g2,31,%g3
+/* 0x0580	 551 ( 1  2) */		add	%fp,-2264,%o5
+/* 0x0584	 552 ( 2  3) */		add	%g2,%g3,%g2
+/* 0x0588	 553 ( 2  6) */		ldd	[%o0+8],%f18
+/* 0x058c	 554 ( 2  3) */		add	%fp,-2256,%o4
+/* 0x0590	 555 ( 3  6) */		fmovs	%f6,%f8
+/* 0x0594	 556 ( 3  4) */		sra	%g2,1,%o1
+/* 0x0598	 557 ( 3  4) */		or	%g0,0,%g2
+/* 0x059c	 558 ( 4  5) */		subcc	%o1,0,%g0
+/* 0x05a0	 559 ( 4  5) */		sub	%o1,1,%o2
+/* 0x05a4	 563 ( 5  6) */		add	%g5,32,%o0
+/* 0x05a8	 564 ( 6 11) */		fsubd	%f8,%f6,%f16
+/* 0x05ac	 565 ( 6  7) */		ble,pt	%icc,.L900000161	! tprob=0.50
+/* 0x05b0	     ( 6  7) */		subcc	%o3,0,%g0
+/* 0x05b4	 567 ( 7  8) */		subcc	%o1,7,%g0
+/* 0x05b8	 568 ( 7  8) */		bl,pn	%icc,.L77000077	! tprob=0.50
+/* 0x05bc	     ( 7  8) */		sub	%o1,2,%o1
+/* 0x05c0	 570 ( 8 12) */		ldd	[%g5],%f2
+/* 0x05c4	 571 ( 9 13) */		ldd	[%g5+8],%f4
+/* 0x05c8	 572 ( 9 10) */		or	%g0,5,%g2
+/* 0x05cc	 573 (10 14) */		ldd	[%g5+16],%f0
+/* 0x05d0	 574 (11 15) */		fxnor	%f14,%f2,%f2
+/* 0x05d4	 575 (11 15) */		ldd	[%g5+24],%f12
+/* 0x05d8	 576 (12 16) */		fxnor	%f14,%f4,%f6
+/* 0x05dc	 577 (12 16) */		ldd	[%g5+32],%f10
+/* 0x05e0	 578 (13 17) */		fxnor	%f14,%f0,%f8
+/* 0x05e4	 579 (15 20) */		fitod	%f3,%f0
+/* 0x05e8	 580 (16 21) */		fitod	%f2,%f4
+/* 0x05ec	 581 (17 22) */		fitod	%f7,%f2
+/* 0x05f0	 582 (18 23) */		fitod	%f6,%f6
+/* 0x05f4	 583 (20 25) */		fsubd	%f18,%f0,%f0
+/* 0x05f8	 584 (21 26) */		fsubd	%f18,%f4,%f4
+
+!
+! ENTRY .L900000149
+!
+
+        .L900000149:		/* frequency 1.0 confidence 0.0 */
+/* 0x05fc	 586 ( 0  4) */		fxnor	%f14,%f12,%f22
+/* 0x0600	 587 ( 0  5) */		fmuld	%f4,%f16,%f4
+/* 0x0604	 588 ( 0  1) */		add	%g2,2,%g2
+/* 0x0608	 589 ( 0  1) */		add	%o4,32,%o4
+/* 0x060c	 590 ( 1  6) */		fitod	%f9,%f24
+/* 0x0610	 591 ( 1  6) */		fmuld	%f0,%f16,%f20
+/* 0x0614	 592 ( 1  2) */		add	%o0,8,%o0
+/* 0x0618	 593 ( 1  2) */		subcc	%g2,%o1,%g0
+/* 0x061c	 594 ( 2  6) */		ldd	[%o0],%f12
+/* 0x0620	 595 ( 2  7) */		fsubd	%f18,%f2,%f0
+/* 0x0624	 596 ( 2  3) */		add	%o5,32,%o5
+/* 0x0628	 597 ( 3  8) */		fsubd	%f18,%f6,%f2
+/* 0x062c	 598 ( 5 10) */		fdtox	%f4,%f4
+/* 0x0630	 599 ( 6 11) */		fdtox	%f20,%f6
+/* 0x0634	 600 ( 6  7) */		std	%f4,[%o5-32]
+/* 0x0638	 601 ( 7 12) */		fitod	%f8,%f4
+/* 0x063c	 602 ( 7  8) */		std	%f6,[%o4-32]
+/* 0x0640	 603 ( 8 12) */		fxnor	%f14,%f10,%f8
+/* 0x0644	 604 ( 8 13) */		fmuld	%f2,%f16,%f6
+/* 0x0648	 605 ( 9 14) */		fitod	%f23,%f2
+/* 0x064c	 606 ( 9 14) */		fmuld	%f0,%f16,%f20
+/* 0x0650	 607 ( 9 10) */		add	%o0,8,%o0
+/* 0x0654	 608 (10 14) */		ldd	[%o0],%f10
+/* 0x0658	 609 (10 15) */		fsubd	%f18,%f24,%f0
+/* 0x065c	 610 (12 17) */		fsubd	%f18,%f4,%f4
+/* 0x0660	 611 (13 18) */		fdtox	%f6,%f6
+/* 0x0664	 612 (14 19) */		fdtox	%f20,%f20
+/* 0x0668	 613 (14 15) */		std	%f6,[%o5-16]
+/* 0x066c	 614 (15 20) */		fitod	%f22,%f6
+/* 0x0670	 615 (15 16) */		ble,pt	%icc,.L900000149	! tprob=0.50
+/* 0x0674	     (15 16) */		std	%f20,[%o4-16]
+
+!
+! ENTRY .L900000152
+!
+
+        .L900000152:		/* frequency 1.0 confidence 0.0 */
+/* 0x0678	 618 ( 0  4) */		fxnor	%f14,%f12,%f12
+/* 0x067c	 619 ( 0  5) */		fmuld	%f0,%f16,%f22
+/* 0x0680	 620 ( 0  1) */		add	%o5,80,%o5
+/* 0x0684	 621 ( 0  1) */		add	%o4,80,%o4
+/* 0x0688	 622 ( 1  5) */		fxnor	%f14,%f10,%f0
+/* 0x068c	 623 ( 1  6) */		fmuld	%f4,%f16,%f24
+/* 0x0690	 624 ( 1  2) */		subcc	%g2,%o2,%g0
+/* 0x0694	 625 ( 1  2) */		add	%o0,8,%g5
+/* 0x0698	 626 ( 2  7) */		fitod	%f8,%f20
+/* 0x069c	 627 ( 3  8) */		fitod	%f9,%f8
+/* 0x06a0	 628 ( 4  9) */		fsubd	%f18,%f6,%f6
+/* 0x06a4	 629 ( 5 10) */		fitod	%f12,%f26
+/* 0x06a8	 630 ( 6 11) */		fitod	%f13,%f4
+/* 0x06ac	 631 ( 7 12) */		fsubd	%f18,%f2,%f12
+/* 0x06b0	 632 ( 8 13) */		fitod	%f0,%f2
+/* 0x06b4	 633 ( 9 14) */		fitod	%f1,%f0
+/* 0x06b8	 634 (10 15) */		fsubd	%f18,%f20,%f10
+/* 0x06bc	 635 (10 15) */		fmuld	%f6,%f16,%f20
+/* 0x06c0	 636 (11 16) */		fsubd	%f18,%f8,%f8
+/* 0x06c4	 637 (12 17) */		fsubd	%f18,%f26,%f6
+/* 0x06c8	 638 (12 17) */		fmuld	%f12,%f16,%f12
+/* 0x06cc	 639 (13 18) */		fsubd	%f18,%f4,%f4
+/* 0x06d0	 640 (14 19) */		fsubd	%f18,%f2,%f2
+/* 0x06d4	 641 (15 20) */		fsubd	%f18,%f0,%f0
+/* 0x06d8	 642 (15 20) */		fmuld	%f10,%f16,%f10
+/* 0x06dc	 643 (16 21) */		fdtox	%f24,%f24
+/* 0x06e0	 644 (16 17) */		std	%f24,[%o5-80]
+/* 0x06e4	 645 (16 21) */		fmuld	%f8,%f16,%f8
+/* 0x06e8	 646 (17 22) */		fdtox	%f22,%f22
+/* 0x06ec	 647 (17 18) */		std	%f22,[%o4-80]
+/* 0x06f0	 648 (17 22) */		fmuld	%f6,%f16,%f6
+/* 0x06f4	 649 (18 23) */		fdtox	%f20,%f20
+/* 0x06f8	 650 (18 19) */		std	%f20,[%o5-64]
+/* 0x06fc	 651 (18 23) */		fmuld	%f4,%f16,%f4
+/* 0x0700	 652 (19 24) */		fdtox	%f12,%f12
+/* 0x0704	 653 (19 20) */		std	%f12,[%o4-64]
+/* 0x0708	 654 (19 24) */		fmuld	%f2,%f16,%f2
+/* 0x070c	 655 (20 25) */		fdtox	%f10,%f10
+/* 0x0710	 656 (20 21) */		std	%f10,[%o5-48]
+/* 0x0714	 657 (20 25) */		fmuld	%f0,%f16,%f0
+/* 0x0718	 658 (21 26) */		fdtox	%f8,%f8
+/* 0x071c	 659 (21 22) */		std	%f8,[%o4-48]
+/* 0x0720	 660 (22 27) */		fdtox	%f6,%f6
+/* 0x0724	 661 (22 23) */		std	%f6,[%o5-32]
+/* 0x0728	 662 (23 28) */		fdtox	%f4,%f4
+/* 0x072c	 663 (23 24) */		std	%f4,[%o4-32]
+/* 0x0730	 664 (24 29) */		fdtox	%f2,%f2
+/* 0x0734	 665 (24 25) */		std	%f2,[%o5-16]
+/* 0x0738	 666 (25 30) */		fdtox	%f0,%f0
+/* 0x073c	 667 (25 26) */		bg,pn	%icc,.L77000043	! tprob=0.50
+/* 0x0740	     (25 26) */		std	%f0,[%o4-16]
+
+!
+! ENTRY .L77000077
+!
+
+        .L77000077:		/* frequency 1.0 confidence 0.0 */
+/* 0x0744	 670 ( 0  4) */		ldd	[%g5],%f0
+
+!
+! ENTRY .L900000160
+!
+
+        .L900000160:		/* frequency 1.0 confidence 0.0 */
+/* 0x0748	 672 ( 0  4) */		fxnor	%f14,%f0,%f0
+/* 0x074c	 673 ( 0  1) */		add	%g2,1,%g2
+/* 0x0750	 674 ( 0  1) */		add	%g5,8,%g5
+/* 0x0754	 675 ( 1  2) */		subcc	%g2,%o2,%g0
+/* 0x0758	 676 ( 4  9) */		fitod	%f0,%f2
+/* 0x075c	 677 ( 5 10) */		fitod	%f1,%f0
+/* 0x0760	 678 ( 9 14) */		fsubd	%f18,%f2,%f2
+/* 0x0764	 679 (10 15) */		fsubd	%f18,%f0,%f0
+/* 0x0768	 680 (14 19) */		fmuld	%f2,%f16,%f2
+/* 0x076c	 681 (15 20) */		fmuld	%f0,%f16,%f0
+/* 0x0770	 682 (19 24) */		fdtox	%f2,%f2
+/* 0x0774	 683 (19 20) */		std	%f2,[%o5]
+/* 0x0778	 684 (19 20) */		add	%o5,16,%o5
+/* 0x077c	 685 (20 25) */		fdtox	%f0,%f0
+/* 0x0780	 686 (20 21) */		std	%f0,[%o4]
+/* 0x0784	 687 (20 21) */		add	%o4,16,%o4
+/* 0x0788	 688 (20 21) */		ble,a,pt	%icc,.L900000160	! tprob=0.50
+/* 0x078c	     (23 27) */		ldd	[%g5],%f0
+
+!
+! ENTRY .L77000043
+!
+
+        .L77000043:		/* frequency 1.0 confidence 0.0 */
+/* 0x0790	 696 ( 0  1) */		subcc	%o3,0,%g0
+
+!
+! ENTRY .L900000161
+!
+
+        .L900000161:		/* frequency 1.0 confidence 0.0 */
+/* 0x0794	 698 ( 0  1) */		ble,a,pt	%icc,.L900000159	! tprob=0.50
+/* 0x0798	     ( 0  1) */		or	%g0,%o7,%i0
+/* 0x079c	 703 ( 0  2) */		ldx	[%fp-2256],%o2
+/* 0x07a0	 704 ( 0  1) */		or	%g0,%i1,%g3
+/* 0x07a4	 705 ( 1  2) */		sub	%o3,1,%o5
+/* 0x07a8	 706 ( 1  2) */		or	%g0,0,%g4
+/* 0x07ac	 707 ( 2  3) */		add	%fp,-2264,%g5
+/* 0x07b0	 708 ( 2  3) */		or	%g0,%i0,%g2
+/* 0x07b4	 709 ( 3  4) */		subcc	%o3,6,%g0
+/* 0x07b8	 710 ( 3  4) */		sub	%o5,2,%o4
+/* 0x07bc	 711 ( 3  4) */		bl,pn	%icc,.L77000078	! tprob=0.50
+/* 0x07c0	     ( 3  5) */		ldx	[%fp-2264],%o0
+/* 0x07c4	 713 ( 4  6) */		ld	[%g3],%o1
+/* 0x07c8	 714 ( 4  5) */		add	%g2,4,%g2
+/* 0x07cc	 715 ( 4  5) */		or	%g0,3,%g4
+/* 0x07d0	 716 ( 5  7) */		ld	[%g3+4],%o3
+/* 0x07d4	 717 ( 5  6) */		add	%g3,8,%g3
+/* 0x07d8	 718 ( 5  6) */		add	%fp,-2240,%g5
+/* 0x07dc	 719 ( 6  7) */		add	%o0,%o1,%o0
+/* 0x07e0	 720 ( 6  8) */		ldx	[%fp-2248],%o1
+/* 0x07e4	 721 ( 7  8) */		st	%o0,[%g2-4]
+/* 0x07e8	 722 ( 7  8) */		srax	%o0,32,%o0
+
+!
+! ENTRY .L900000145
+!
+
+        .L900000145:		/* frequency 1.0 confidence 0.0 */
+/* 0x07ec	 724 ( 0  2) */		ld	[%g3],%o7
+/* 0x07f0	 725 ( 0  1) */		add	%o2,%o3,%o2
+/* 0x07f4	 726 ( 0  1) */		sra	%o0,0,%o3
+/* 0x07f8	 727 ( 1  3) */		ldx	[%g5],%o0
+/* 0x07fc	 728 ( 1  2) */		add	%o2,%o3,%o2
+/* 0x0800	 729 ( 1  2) */		add	%g4,3,%g4
+/* 0x0804	 730 ( 2  3) */		st	%o2,[%g2]
+/* 0x0808	 731 ( 2  3) */		srax	%o2,32,%o3
+/* 0x080c	 732 ( 2  3) */		subcc	%g4,%o4,%g0
+/* 0x0810	 733 ( 3  5) */		ld	[%g3+4],%o2
+/* 0x0814	 734 ( 4  5) */		stx	%o2,[%sp+96]
+/* 0x0818	 735 ( 4  5) */		add	%o1,%o7,%o1
+/* 0x081c	 736 ( 5  7) */		ldx	[%g5+8],%o2
+/* 0x0820	 737 ( 5  6) */		add	%o1,%o3,%o1
+/* 0x0824	 738 ( 5  6) */		add	%g2,12,%g2
+/* 0x0828	 739 ( 6  7) */		st	%o1,[%g2-8]
+/* 0x082c	 740 ( 6  7) */		srax	%o1,32,%o7
+/* 0x0830	 741 ( 6  7) */		add	%g3,12,%g3
+/* 0x0834	 742 ( 7  9) */		ld	[%g3-4],%o3
+/* 0x0838	 743 ( 8 10) */		ldx	[%sp+96],%o1
+/* 0x083c	 744 (10 11) */		add	%o0,%o1,%o0
+/* 0x0840	 745 (10 12) */		ldx	[%g5+16],%o1
+/* 0x0844	 746 (11 12) */		add	%o0,%o7,%o0
+/* 0x0848	 747 (11 12) */		add	%g5,24,%g5
+/* 0x084c	 748 (11 12) */		st	%o0,[%g2-4]
+/* 0x0850	 749 (11 12) */		ble,pt	%icc,.L900000145	! tprob=0.50
+/* 0x0854	     (12 13) */		srax	%o0,32,%o0
+
+!
+! ENTRY .L900000148
+!
+
+        .L900000148:		/* frequency 1.0 confidence 0.0 */
+/* 0x0858	 752 ( 0  1) */		add	%o2,%o3,%o2
+/* 0x085c	 753 ( 0  1) */		sra	%o0,0,%o3
+/* 0x0860	 754 ( 0  2) */		ld	[%g3],%o0
+/* 0x0864	 755 ( 1  2) */		add	%o2,%o3,%o3
+/* 0x0868	 756 ( 1  2) */		add	%g2,8,%g2
+/* 0x086c	 757 ( 2  3) */		srax	%o3,32,%o2
+/* 0x0870	 758 ( 2  3) */		st	%o3,[%g2-8]
+/* 0x0874	 759 ( 2  3) */		add	%o1,%o0,%o0
+/* 0x0878	 760 ( 3  4) */		add	%o0,%o2,%o0
+/* 0x087c	 761 ( 3  4) */		st	%o0,[%g2-4]
+/* 0x0880	 762 ( 3  4) */		subcc	%g4,%o5,%g0
+/* 0x0884	 763 ( 3  4) */		bg,pn	%icc,.L77000061	! tprob=0.50
+/* 0x0888	     ( 4  5) */		srax	%o0,32,%o7
+/* 0x088c	 765 ( 4  5) */		add	%g3,4,%g3
+
+!
+! ENTRY .L77000078
+!
+
+        .L77000078:		/* frequency 1.0 confidence 0.0 */
+/* 0x0890	 767 ( 0  2) */		ld	[%g3],%o2
+
+!
+! ENTRY .L900000158
+!
+
+        .L900000158:		/* frequency 1.0 confidence 0.0 */
+/* 0x0894	 769 ( 0  2) */		ldx	[%g5],%o0
+/* 0x0898	 770 ( 0  1) */		sra	%o7,0,%o1
+/* 0x089c	 771 ( 0  1) */		add	%g4,1,%g4
+/* 0x08a0	 772 ( 1  2) */		add	%g3,4,%g3
+/* 0x08a4	 773 ( 1  2) */		add	%g5,8,%g5
+/* 0x08a8	 774 ( 2  3) */		add	%o0,%o2,%o0
+/* 0x08ac	 775 ( 2  3) */		subcc	%g4,%o5,%g0
+/* 0x08b0	 776 ( 3  4) */		add	%o0,%o1,%o0
+/* 0x08b4	 777 ( 3  4) */		st	%o0,[%g2]
+/* 0x08b8	 778 ( 3  4) */		add	%g2,4,%g2
+/* 0x08bc	 779 ( 4  5) */		srax	%o0,32,%o7
+/* 0x08c0	 780 ( 4  5) */		ble,a,pt	%icc,.L900000158	! tprob=0.50
+/* 0x08c4	     ( 4  6) */		ld	[%g3],%o2
+
+!
+! ENTRY .L77000047
+!
+
+        .L77000047:		/* frequency 1.0 confidence 0.0 */
+/* 0x08c8	 783 ( 0  1) */		or	%g0,%o7,%i0
+/* 0x08cc	     ( 1  8) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x08d0	     ( 3  5) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L77000048
+!
+
+        .L77000048:		/* frequency 1.0 confidence 0.0 */
+/* 0x08d4	 794 ( 0  1) */		bne,pn	%icc,.L77000050	! tprob=0.50
+/* 0x08d8	     ( 0  1) */		sethi	%hi(0xfff80000),%g2
+/* 0x08dc	 796 ( 0  4) */		ldd	[%g5],%f4
+/* 0x08e0	 804 ( 0  1) */		srl	%o1,19,%g3
+/* 0x08e4	 805 ( 1  2) */		st	%g3,[%sp+240]
+/* 0x08e8	 806 ( 1  2) */		andn	%o1,%g2,%g2
+/* 0x08ec	 807 ( 2  6) */		ldd	[%o0],%f8
+/* 0x08f0	 808 ( 3  4) */		st	%g2,[%sp+244]
+/* 0x08f4	 809 ( 3  7) */		fxnor	%f0,%f4,%f4
+/* 0x08f8	 810 ( 4  8) */		ldd	[%g5+8],%f6
+/* 0x08fc	 814 ( 5  9) */		ldd	[%o0+8],%f18
+/* 0x0900	 815 ( 5  8) */		fmovs	%f8,%f12
+/* 0x0904	 816 ( 6 10) */		ldd	[%g5+16],%f10
+/* 0x0908	 817 ( 6  9) */		fmovs	%f8,%f16
+/* 0x090c	 818 ( 7 11) */		ldd	[%g5+24],%f20
+/* 0x0910	 819 ( 7 12) */		fitod	%f4,%f14
+/* 0x0914	 823 ( 8 10) */		ld	[%i1],%g2
+/* 0x0918	 824 ( 8 13) */		fitod	%f5,%f4
+/* 0x091c	 825 ( 9 11) */		ld	[%sp+240],%f13
+/* 0x0920	 826 ( 9 13) */		fxnor	%f0,%f6,%f6
+/* 0x0924	 827 (10 12) */		ld	[%sp+244],%f17
+/* 0x0928	 828 (10 14) */		fxnor	%f0,%f10,%f10
+/* 0x092c	 829 (11 13) */		ld	[%i1+28],%o3
+/* 0x0930	 830 (11 15) */		fxnor	%f0,%f20,%f20
+/* 0x0934	 831 (12 14) */		ld	[%i1+4],%g3
+/* 0x0938	 832 (12 17) */		fsubd	%f12,%f8,%f12
+/* 0x093c	 833 (13 14) */		stx	%o3,[%sp+96]
+/* 0x0940	 834 (13 18) */		fsubd	%f18,%f14,%f14
+/* 0x0944	 835 (14 16) */		ld	[%i1+8],%g4
+/* 0x0948	 836 (14 19) */		fsubd	%f16,%f8,%f8
+/* 0x094c	 837 (15 17) */		ld	[%i1+12],%g5
+/* 0x0950	 838 (15 20) */		fsubd	%f18,%f4,%f4
+/* 0x0954	 839 (16 18) */		ld	[%i1+16],%o0
+/* 0x0958	 840 (16 21) */		fitod	%f6,%f22
+/* 0x095c	 841 (17 19) */		ld	[%i1+20],%o1
+/* 0x0960	 842 (17 22) */		fitod	%f7,%f6
+/* 0x0964	 843 (18 20) */		ld	[%i1+24],%o2
+/* 0x0968	 844 (18 23) */		fitod	%f10,%f16
+/* 0x096c	 845 (18 23) */		fmuld	%f14,%f12,%f24
+/* 0x0970	 846 (19 24) */		fitod	%f20,%f28
+/* 0x0974	 847 (19 24) */		fmuld	%f14,%f8,%f14
+/* 0x0978	 848 (20 25) */		fitod	%f11,%f10
+/* 0x097c	 849 (20 25) */		fmuld	%f4,%f12,%f26
+/* 0x0980	 850 (21 26) */		fsubd	%f18,%f22,%f22
+/* 0x0984	 851 (21 26) */		fmuld	%f4,%f8,%f4
+/* 0x0988	 852 (22 27) */		fsubd	%f18,%f6,%f6
+/* 0x098c	 853 (23 28) */		fdtox	%f24,%f24
+/* 0x0990	 854 (23 24) */		std	%f24,[%sp+224]
+/* 0x0994	 855 (24 29) */		fdtox	%f14,%f14
+/* 0x0998	 856 (24 25) */		std	%f14,[%sp+232]
+/* 0x099c	 857 (25 30) */		fdtox	%f26,%f14
+/* 0x09a0	 858 (25 26) */		std	%f14,[%sp+208]
+/* 0x09a4	 859 (26 28) */		ldx	[%sp+224],%o4
+/* 0x09a8	 860 (26 31) */		fitod	%f21,%f20
+/* 0x09ac	 861 (26 31) */		fmuld	%f22,%f12,%f30
+/* 0x09b0	 862 (27 29) */		ldx	[%sp+232],%o5
+/* 0x09b4	 863 (27 32) */		fsubd	%f18,%f16,%f16
+/* 0x09b8	 864 (27 32) */		fmuld	%f22,%f8,%f22
+/* 0x09bc	 865 (28 29) */		sllx	%o4,19,%o4
+/* 0x09c0	 866 (28 33) */		fdtox	%f4,%f4
+/* 0x09c4	 867 (28 29) */		std	%f4,[%sp+216]
+/* 0x09c8	 868 (28 33) */		fmuld	%f6,%f12,%f24
+/* 0x09cc	 869 (29 34) */		fsubd	%f18,%f28,%f26
+/* 0x09d0	 870 (29 30) */		add	%o5,%o4,%o4
+/* 0x09d4	 871 (29 34) */		fmuld	%f6,%f8,%f6
+/* 0x09d8	 872 (30 35) */		fsubd	%f18,%f10,%f10
+/* 0x09dc	 873 (30 31) */		add	%o4,%g2,%g2
+/* 0x09e0	 874 (30 31) */		st	%g2,[%i0]
+/* 0x09e4	 875 (31 33) */		ldx	[%sp+208],%o7
+/* 0x09e8	 876 (31 32) */		srlx	%g2,32,%o5
+/* 0x09ec	 877 (31 36) */		fsubd	%f18,%f20,%f18
+/* 0x09f0	 878 (32 37) */		fdtox	%f30,%f28
+/* 0x09f4	 879 (32 33) */		std	%f28,[%sp+192]
+/* 0x09f8	 880 (32 37) */		fmuld	%f16,%f12,%f14
+/* 0x09fc	 881 (33 34) */		sllx	%o7,19,%o4
+/* 0x0a00	 882 (33 35) */		ldx	[%sp+216],%o7
+/* 0x0a04	 883 (33 38) */		fdtox	%f22,%f20
+/* 0x0a08	 884 (33 38) */		fmuld	%f16,%f8,%f16
+/* 0x0a0c	 885 (34 35) */		std	%f20,[%sp+200]
+/* 0x0a10	 886 (34 39) */		fdtox	%f24,%f20
+/* 0x0a14	 887 (34 39) */		fmuld	%f26,%f12,%f22
+/* 0x0a18	 888 (35 36) */		std	%f20,[%sp+176]
+/* 0x0a1c	 889 (35 36) */		add	%o7,%o4,%o4
+/* 0x0a20	 890 (35 40) */		fdtox	%f6,%f6
+/* 0x0a24	 891 (35 40) */		fmuld	%f10,%f12,%f4
+/* 0x0a28	 892 (36 38) */		ldx	[%sp+192],%o3
+/* 0x0a2c	 893 (36 37) */		add	%o4,%g3,%g3
+/* 0x0a30	 894 (36 41) */		fmuld	%f10,%f8,%f10
+/* 0x0a34	 895 (37 38) */		std	%f6,[%sp+184]
+/* 0x0a38	 896 (37 38) */		add	%g3,%o5,%g3
+/* 0x0a3c	 897 (37 42) */		fdtox	%f14,%f6
+/* 0x0a40	 898 (37 42) */		fmuld	%f26,%f8,%f20
+/* 0x0a44	 899 (38 40) */		ldx	[%sp+200],%o4
+/* 0x0a48	 900 (38 39) */		sllx	%o3,19,%o3
+/* 0x0a4c	 901 (38 39) */		srlx	%g3,32,%o5
+/* 0x0a50	 902 (38 43) */		fdtox	%f16,%f14
+/* 0x0a54	 903 (39 40) */		std	%f6,[%sp+160]
+/* 0x0a58	 904 (39 44) */		fmuld	%f18,%f12,%f12
+/* 0x0a5c	 905 (40 42) */		ldx	[%sp+176],%o7
+/* 0x0a60	 906 (40 41) */		add	%o4,%o3,%o3
+/* 0x0a64	 907 (40 45) */		fdtox	%f4,%f16
+/* 0x0a68	 908 (40 45) */		fmuld	%f18,%f8,%f18
+/* 0x0a6c	 909 (41 42) */		std	%f14,[%sp+168]
+/* 0x0a70	 910 (41 42) */		add	%o3,%g4,%g4
+/* 0x0a74	 911 (41 46) */		fdtox	%f10,%f4
+/* 0x0a78	 912 (42 44) */		ldx	[%sp+184],%o3
+/* 0x0a7c	 913 (42 43) */		sllx	%o7,19,%o4
+/* 0x0a80	 914 (42 43) */		add	%g4,%o5,%g4
+/* 0x0a84	 915 (42 47) */		fdtox	%f22,%f14
+/* 0x0a88	 916 (43 44) */		std	%f16,[%sp+144]
+/* 0x0a8c	 917 (43 44) */		srlx	%g4,32,%o5
+/* 0x0a90	 918 (43 48) */		fdtox	%f20,%f6
+/* 0x0a94	 919 (44 46) */		ldx	[%sp+160],%o7
+/* 0x0a98	 920 (44 45) */		add	%o3,%o4,%o3
+/* 0x0a9c	 921 (44 49) */		fdtox	%f12,%f16
+/* 0x0aa0	 922 (45 46) */		std	%f4,[%sp+152]
+/* 0x0aa4	 923 (45 46) */		add	%o3,%g5,%g5
+/* 0x0aa8	 924 (45 50) */		fdtox	%f18,%f8
+/* 0x0aac	 925 (46 48) */		ldx	[%sp+168],%o3
+/* 0x0ab0	 926 (46 47) */		sllx	%o7,19,%o4
+/* 0x0ab4	 927 (46 47) */		add	%g5,%o5,%g5
+/* 0x0ab8	 928 (47 48) */		std	%f14,[%sp+128]
+/* 0x0abc	 929 (47 48) */		srlx	%g5,32,%o5
+/* 0x0ac0	 930 (48 49) */		std	%f6,[%sp+136]
+/* 0x0ac4	 931 (48 49) */		add	%o3,%o4,%o3
+/* 0x0ac8	 932 (49 50) */		std	%f16,[%sp+112]
+/* 0x0acc	 933 (49 50) */		add	%o3,%o0,%o0
+/* 0x0ad0	 934 (50 52) */		ldx	[%sp+144],%o7
+/* 0x0ad4	 935 (50 51) */		add	%o0,%o5,%o0
+/* 0x0ad8	 936 (51 53) */		ldx	[%sp+152],%o3
+/* 0x0adc	 937 (52 53) */		std	%f8,[%sp+120]
+/* 0x0ae0	 938 (52 53) */		sllx	%o7,19,%o4
+/* 0x0ae4	 939 (52 53) */		srlx	%o0,32,%o7
+/* 0x0ae8	 940 (53 54) */		stx	%o0,[%sp+104]
+/* 0x0aec	 941 (53 54) */		add	%o3,%o4,%o3
+/* 0x0af0	 942 (54 56) */		ldx	[%sp+128],%o5
+/* 0x0af4	 943 (54 55) */		add	%o3,%o1,%o1
+/* 0x0af8	 944 (55 57) */		ldx	[%sp+136],%o0
+/* 0x0afc	 945 (55 56) */		add	%o1,%o7,%o1
+/* 0x0b00	 946 (56 57) */		st	%g3,[%i0+4]
+/* 0x0b04	 947 (56 57) */		sllx	%o5,19,%o3
+/* 0x0b08	 948 (57 59) */		ldx	[%sp+112],%o4
+/* 0x0b0c	 949 (57 58) */		add	%o0,%o3,%o3
+/* 0x0b10	 950 (58 60) */		ldx	[%sp+120],%o0
+/* 0x0b14	 951 (58 59) */		add	%o3,%o2,%o2
+/* 0x0b18	 952 (58 59) */		srlx	%o1,32,%o3
+/* 0x0b1c	 953 (59 60) */		st	%o1,[%i0+20]
+/* 0x0b20	 954 (59 60) */		sllx	%o4,19,%g2
+/* 0x0b24	 955 (59 60) */		add	%o2,%o3,%o2
+/* 0x0b28	 956 (60 62) */		ldx	[%sp+96],%o4
+/* 0x0b2c	 957 (60 61) */		srlx	%o2,32,%g3
+/* 0x0b30	 958 (60 61) */		add	%o0,%g2,%g2
+/* 0x0b34	 959 (61 63) */		ldx	[%sp+104],%o0
+/* 0x0b38	 960 (62 63) */		st	%o2,[%i0+24]
+/* 0x0b3c	 961 (62 63) */		add	%g2,%o4,%g2
+/* 0x0b40	 962 (63 64) */		st	%o0,[%i0+16]
+/* 0x0b44	 963 (63 64) */		add	%g2,%g3,%g2
+/* 0x0b48	 964 (64 65) */		st	%g4,[%i0+8]
+/* 0x0b4c	 968 (64 65) */		srlx	%g2,32,%o7
+/* 0x0b50	 969 (65 66) */		st	%g5,[%i0+12]
+/* 0x0b54	 970 (66 67) */		st	%g2,[%i0+28]
+/* 0x0b58	 971 (66 67) */		or	%g0,%o7,%i0
+/* 0x0b5c	     (67 74) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x0b60	     (69 71) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L77000050
+!
+
+        .L77000050:		/* frequency 1.0 confidence 0.0 */
+/* 0x0b64	 978 ( 0  1) */		subcc	%o2,16,%g0
+/* 0x0b68	 979 ( 0  1) */		bne,pn	%icc,.L77000073	! tprob=0.50
+/* 0x0b6c	     ( 0  1) */		sethi	%hi(0xfff80000),%g2
+/* 0x0b70	 981 ( 1  5) */		ldd	[%g5],%f4
+/* 0x0b74	 982 ( 2  6) */		ldd	[%g5+8],%f6
+/* 0x0b78	 989 ( 2  3) */		andn	%o1,%g2,%g2
+/* 0x0b7c	 993 ( 2  3) */		srl	%o1,19,%g3
+/* 0x0b80	 994 ( 3  7) */		ldd	[%g5+16],%f8
+/* 0x0b84	 995 ( 4  8) */		fxnor	%f0,%f4,%f4
+/* 0x0b88	 996 ( 4  5) */		st	%g2,[%sp+356]
+/* 0x0b8c	 997 ( 5  9) */		ldd	[%o0],%f20
+/* 0x0b90	 998 ( 5  9) */		fxnor	%f0,%f6,%f6
+/* 0x0b94	 999 ( 6  7) */		st	%g3,[%sp+352]
+/* 0x0b98	1000 ( 6 10) */		fxnor	%f0,%f8,%f8
+/* 0x0b9c	1005 ( 7 11) */		ldd	[%o0+8],%f30
+/* 0x0ba0	1006 ( 8 13) */		fitod	%f4,%f22
+/* 0x0ba4	1007 ( 8 12) */		ldd	[%g5+24],%f10
+/* 0x0ba8	1008 ( 9 12) */		fmovs	%f20,%f24
+/* 0x0bac	1009 ( 9 13) */		ldd	[%g5+32],%f12
+/* 0x0bb0	1010 (10 15) */		fitod	%f5,%f4
+/* 0x0bb4	1011 (10 14) */		ldd	[%g5+40],%f14
+/* 0x0bb8	1012 (11 14) */		fmovs	%f20,%f26
+/* 0x0bbc	1013 (11 15) */		ldd	[%g5+48],%f16
+/* 0x0bc0	1014 (12 14) */		ld	[%sp+356],%f25
+/* 0x0bc4	1015 (12 17) */		fitod	%f6,%f28
+/* 0x0bc8	1016 (13 15) */		ld	[%sp+352],%f27
+/* 0x0bcc	1017 (13 18) */		fitod	%f8,%f32
+/* 0x0bd0	1018 (14 19) */		fsubd	%f30,%f22,%f22
+/* 0x0bd4	1019 (14 18) */		ldd	[%g5+56],%f18
+/* 0x0bd8	1020 (15 20) */		fsubd	%f24,%f20,%f24
+/* 0x0bdc	1021 (16 21) */		fsubd	%f26,%f20,%f20
+/* 0x0be0	1022 (17 22) */		fsubd	%f30,%f4,%f4
+/* 0x0be4	1023 (18 23) */		fsubd	%f30,%f28,%f26
+/* 0x0be8	1024 (19 24) */		fitod	%f7,%f6
+/* 0x0bec	1025 (20 25) */		fsubd	%f30,%f32,%f28
+/* 0x0bf0	1026 (20 25) */		fmuld	%f22,%f24,%f32
+/* 0x0bf4	1027 (21 26) */		fmuld	%f22,%f20,%f22
+/* 0x0bf8	1028 (21 25) */		fxnor	%f0,%f10,%f10
+/* 0x0bfc	1029 (22 27) */		fmuld	%f4,%f24,%f44
+/* 0x0c00	1030 (22 27) */		fitod	%f9,%f8
+/* 0x0c04	1031 (23 28) */		fmuld	%f4,%f20,%f4
+/* 0x0c08	1032 (23 27) */		fxnor	%f0,%f12,%f12
+/* 0x0c0c	1033 (24 29) */		fsubd	%f30,%f6,%f6
+/* 0x0c10	1034 (24 29) */		fmuld	%f26,%f24,%f46
+/* 0x0c14	1035 (25 30) */		fitod	%f10,%f34
+/* 0x0c18	1036 (26 31) */		fdtox	%f22,%f22
+/* 0x0c1c	1037 (26 27) */		std	%f22,[%sp+336]
+/* 0x0c20	1038 (27 32) */		fmuld	%f26,%f20,%f22
+/* 0x0c24	1039 (27 32) */		fdtox	%f44,%f26
+/* 0x0c28	1040 (27 28) */		std	%f26,[%sp+328]
+/* 0x0c2c	1041 (28 33) */		fdtox	%f4,%f4
+/* 0x0c30	1042 (28 29) */		std	%f4,[%sp+320]
+/* 0x0c34	1043 (29 34) */		fmuld	%f6,%f24,%f26
+/* 0x0c38	1044 (29 34) */		fsubd	%f30,%f8,%f8
+/* 0x0c3c	1045 (30 35) */		fdtox	%f46,%f4
+/* 0x0c40	1046 (30 31) */		std	%f4,[%sp+312]
+/* 0x0c44	1047 (31 36) */		fmuld	%f28,%f24,%f4
+/* 0x0c48	1048 (31 36) */		fdtox	%f32,%f32
+/* 0x0c4c	1049 (31 32) */		std	%f32,[%sp+344]
+/* 0x0c50	1050 (32 37) */		fitod	%f11,%f10
+/* 0x0c54	1051 (32 37) */		fmuld	%f6,%f20,%f32
+/* 0x0c58	1052 (33 38) */		fsubd	%f30,%f34,%f34
+/* 0x0c5c	1053 (34 39) */		fdtox	%f22,%f6
+/* 0x0c60	1054 (34 35) */		std	%f6,[%sp+304]
+/* 0x0c64	1058 (35 40) */		fitod	%f12,%f36
+/* 0x0c68	1059 (35 40) */		fmuld	%f28,%f20,%f6
+/* 0x0c6c	1060 (36 41) */		fdtox	%f26,%f22
+/* 0x0c70	1061 (36 37) */		std	%f22,[%sp+296]
+/* 0x0c74	1062 (37 42) */		fmuld	%f8,%f24,%f22
+/* 0x0c78	1063 (37 42) */		fdtox	%f4,%f4
+/* 0x0c7c	1064 (37 38) */		std	%f4,[%sp+280]
+/* 0x0c80	1065 (38 43) */		fmuld	%f8,%f20,%f8
+/* 0x0c84	1066 (38 43) */		fsubd	%f30,%f10,%f10
+/* 0x0c88	1067 (39 44) */		fmuld	%f34,%f24,%f4
+/* 0x0c8c	1068 (39 44) */		fitod	%f13,%f12
+/* 0x0c90	1069 (40 45) */		fsubd	%f30,%f36,%f36
+/* 0x0c94	1070 (41 46) */		fdtox	%f6,%f6
+/* 0x0c98	1071 (41 42) */		std	%f6,[%sp+272]
+/* 0x0c9c	1072 (42 46) */		fxnor	%f0,%f14,%f14
+/* 0x0ca0	1073 (42 47) */		fmuld	%f34,%f20,%f6
+/* 0x0ca4	1074 (43 48) */		fdtox	%f22,%f22
+/* 0x0ca8	1075 (43 44) */		std	%f22,[%sp+264]
+/* 0x0cac	1076 (44 49) */		fdtox	%f8,%f8
+/* 0x0cb0	1077 (44 45) */		std	%f8,[%sp+256]
+/* 0x0cb4	1078 (44 49) */		fmuld	%f10,%f24,%f22
+/* 0x0cb8	1079 (45 50) */		fdtox	%f4,%f4
+/* 0x0cbc	1080 (45 46) */		std	%f4,[%sp+248]
+/* 0x0cc0	1081 (45 50) */		fmuld	%f10,%f20,%f8
+/* 0x0cc4	1082 (46 51) */		fsubd	%f30,%f12,%f4
+/* 0x0cc8	1083 (46 51) */		fmuld	%f36,%f24,%f10
+/* 0x0ccc	1084 (47 52) */		fitod	%f14,%f38
+/* 0x0cd0	1085 (48 53) */		fdtox	%f6,%f6
+/* 0x0cd4	1086 (48 49) */		std	%f6,[%sp+240]
+/* 0x0cd8	1087 (49 54) */		fdtox	%f22,%f12
+/* 0x0cdc	1088 (49 50) */		std	%f12,[%sp+232]
+/* 0x0ce0	1089 (49 54) */		fmuld	%f36,%f20,%f6
+/* 0x0ce4	1090 (50 55) */		fdtox	%f8,%f8
+/* 0x0ce8	1091 (50 51) */		std	%f8,[%sp+224]
+/* 0x0cec	1092 (51 56) */		fdtox	%f10,%f22
+/* 0x0cf0	1093 (51 52) */		std	%f22,[%sp+216]
+/* 0x0cf4	1094 (51 56) */		fmuld	%f4,%f24,%f8
+/* 0x0cf8	1095 (52 57) */		fitod	%f15,%f14
+/* 0x0cfc	1096 (52 57) */		fmuld	%f4,%f20,%f4
+/* 0x0d00	1097 (53 58) */		fsubd	%f30,%f38,%f22
+/* 0x0d04	1098 (54 58) */		fxnor	%f0,%f16,%f16
+/* 0x0d08	1099 (55 60) */		fdtox	%f6,%f6
+/* 0x0d0c	1100 (55 56) */		std	%f6,[%sp+208]
+/* 0x0d10	1101 (56 61) */		fdtox	%f8,%f6
+/* 0x0d14	1102 (56 57) */		std	%f6,[%sp+200]
+/* 0x0d18	1103 (57 62) */		fsubd	%f30,%f14,%f10
+/* 0x0d1c	1104 (58 63) */		fitod	%f16,%f40
+/* 0x0d20	1105 (58 63) */		fmuld	%f22,%f24,%f6
+/* 0x0d24	1106 (59 64) */		fdtox	%f4,%f4
+/* 0x0d28	1107 (59 60) */		std	%f4,[%sp+192]
+/* 0x0d2c	1108 (60 65) */		fitod	%f17,%f16
+/* 0x0d30	1109 (60 65) */		fmuld	%f22,%f20,%f4
+/* 0x0d34	1110 (61 65) */		fxnor	%f0,%f18,%f18
+/* 0x0d38	1111 (62 67) */		fdtox	%f32,%f32
+/* 0x0d3c	1112 (62 63) */		std	%f32,[%sp+288]
+/* 0x0d40	1113 (62 67) */		fmuld	%f10,%f24,%f8
+/* 0x0d44	1114 (63 68) */		fdtox	%f6,%f6
+/* 0x0d48	1115 (63 64) */		std	%f6,[%sp+184]
+/* 0x0d4c	1116 (63 68) */		fmuld	%f10,%f20,%f22
+/* 0x0d50	1117 (64 69) */		fsubd	%f30,%f40,%f6
+/* 0x0d54	1118 (65 70) */		fdtox	%f4,%f4
+/* 0x0d58	1119 (65 66) */		std	%f4,[%sp+176]
+/* 0x0d5c	1120 (66 71) */		fsubd	%f30,%f16,%f10
+/* 0x0d60	1121 (67 72) */		fdtox	%f8,%f4
+/* 0x0d64	1122 (67 68) */		std	%f4,[%sp+168]
+/* 0x0d68	1123 (68 73) */		fdtox	%f22,%f4
+/* 0x0d6c	1124 (68 69) */		std	%f4,[%sp+160]
+/* 0x0d70	1125 (69 74) */		fitod	%f18,%f42
+/* 0x0d74	1126 (69 74) */		fmuld	%f6,%f24,%f4
+/* 0x0d78	1127 (70 75) */		fmuld	%f6,%f20,%f22
+/* 0x0d7c	1128 (71 76) */		fmuld	%f10,%f24,%f6
+/* 0x0d80	1129 (72 77) */		fmuld	%f10,%f20,%f8
+/* 0x0d84	1130 (74 79) */		fdtox	%f4,%f4
+/* 0x0d88	1131 (74 75) */		std	%f4,[%sp+152]
+/* 0x0d8c	1132 (75 80) */		fsubd	%f30,%f42,%f4
+/* 0x0d90	1133 (76 81) */		fdtox	%f6,%f6
+/* 0x0d94	1134 (76 77) */		std	%f6,[%sp+136]
+/* 0x0d98	1135 (77 82) */		fdtox	%f22,%f22
+/* 0x0d9c	1136 (77 78) */		std	%f22,[%sp+144]
+/* 0x0da0	1137 (78 83) */		fdtox	%f8,%f22
+/* 0x0da4	1138 (78 79) */		std	%f22,[%sp+128]
+/* 0x0da8	1139 (79 84) */		fitod	%f19,%f22
+/* 0x0dac	1140 (80 85) */		fmuld	%f4,%f24,%f6
+/* 0x0db0	1141 (81 86) */		fmuld	%f4,%f20,%f4
+/* 0x0db4	1142 (84 89) */		fsubd	%f30,%f22,%f22
+/* 0x0db8	1143 (85 90) */		fdtox	%f6,%f6
+/* 0x0dbc	1144 (85 86) */		std	%f6,[%sp+120]
+/* 0x0dc0	1145 (86 91) */		fdtox	%f4,%f4
+/* 0x0dc4	1146 (86 87) */		std	%f4,[%sp+112]
+/* 0x0dc8	1150 (87 89) */		ldx	[%sp+336],%g2
+/* 0x0dcc	1151 (88 90) */		ldx	[%sp+344],%g3
+/* 0x0dd0	1152 (89 91) */		ld	[%i1],%g4
+/* 0x0dd4	1153 (89 90) */		sllx	%g2,19,%g2
+/* 0x0dd8	1154 (89 94) */		fmuld	%f22,%f20,%f4
+/* 0x0ddc	1155 (90 92) */		ldx	[%sp+328],%g5
+/* 0x0de0	1156 (90 91) */		add	%g3,%g2,%g2
+/* 0x0de4	1157 (90 95) */		fmuld	%f22,%f24,%f6
+/* 0x0de8	1158 (91 93) */		ldx	[%sp+320],%g3
+/* 0x0dec	1159 (91 92) */		add	%g2,%g4,%g4
+/* 0x0df0	1160 (92 94) */		ldx	[%sp+304],%o0
+/* 0x0df4	1161 (93 94) */		st	%g4,[%i0]
+/* 0x0df8	1162 (93 94) */		sllx	%g3,19,%g2
+/* 0x0dfc	1163 (93 94) */		srlx	%g4,32,%g4
+/* 0x0e00	1164 (94 96) */		ld	[%i1+4],%g3
+/* 0x0e04	1165 (94 95) */		add	%g5,%g2,%g2
+/* 0x0e08	1166 (94 99) */		fdtox	%f4,%f4
+/* 0x0e0c	1167 (95 97) */		ldx	[%sp+312],%g5
+/* 0x0e10	1168 (95 100) */		fdtox	%f6,%f6
+/* 0x0e14	1169 (96 98) */		ldx	[%sp+288],%o1
+/* 0x0e18	1170 (96 97) */		add	%g2,%g3,%g2
+/* 0x0e1c	1171 (96 97) */		sllx	%o0,19,%g3
+/* 0x0e20	1172 (97 99) */		ldx	[%sp+272],%o2
+/* 0x0e24	1173 (97 98) */		add	%g2,%g4,%g2
+/* 0x0e28	1174 (97 98) */		add	%g5,%g3,%g3
+/* 0x0e2c	1175 (98 100) */		ld	[%i1+8],%g4
+/* 0x0e30	1176 (98 99) */		srlx	%g2,32,%o0
+/* 0x0e34	1177 (99 101) */		ldx	[%sp+296],%g5
+/* 0x0e38	1178 (100 101) */		st	%g2,[%i0+4]
+/* 0x0e3c	1179 (100 101) */		sllx	%o2,19,%g2
+/* 0x0e40	1180 (100 101) */		add	%g3,%g4,%g3
+/* 0x0e44	1181 (101 103) */		ldx	[%sp+256],%o2
+/* 0x0e48	1182 (101 102) */		sllx	%o1,19,%g4
+/* 0x0e4c	1183 (101 102) */		add	%g3,%o0,%g3
+/* 0x0e50	1184 (102 104) */		ld	[%i1+12],%o0
+/* 0x0e54	1185 (102 103) */		srlx	%g3,32,%o1
+/* 0x0e58	1186 (102 103) */		add	%g5,%g4,%g4
+/* 0x0e5c	1187 (103 105) */		ldx	[%sp+280],%g5
+/* 0x0e60	1188 (104 105) */		st	%g3,[%i0+8]
+/* 0x0e64	1189 (104 105) */		sllx	%o2,19,%g3
+/* 0x0e68	1190 (104 105) */		add	%g4,%o0,%g4
+/* 0x0e6c	1191 (105 107) */		ld	[%i1+16],%o0
+/* 0x0e70	1192 (105 106) */		add	%g5,%g2,%g2
+/* 0x0e74	1193 (105 106) */		add	%g4,%o1,%g4
+/* 0x0e78	1194 (106 108) */		ldx	[%sp+264],%g5
+/* 0x0e7c	1195 (106 107) */		srlx	%g4,32,%o1
+/* 0x0e80	1196 (107 109) */		ldx	[%sp+240],%o2
+/* 0x0e84	1197 (107 108) */		add	%g2,%o0,%g2
+/* 0x0e88	1198 (108 110) */		ld	[%i1+20],%o0
+/* 0x0e8c	1199 (108 109) */		add	%g5,%g3,%g3
+/* 0x0e90	1200 (108 109) */		add	%g2,%o1,%g2
+/* 0x0e94	1201 (109 111) */		ldx	[%sp+248],%g5
+/* 0x0e98	1202 (109 110) */		srlx	%g2,32,%o1
+/* 0x0e9c	1203 (110 111) */		st	%g4,[%i0+12]
+/* 0x0ea0	1204 (110 111) */		sllx	%o2,19,%g4
+/* 0x0ea4	1205 (110 111) */		add	%g3,%o0,%g3
+/* 0x0ea8	1206 (111 113) */		ld	[%i1+24],%o0
+/* 0x0eac	1207 (111 112) */		add	%g5,%g4,%g4
+/* 0x0eb0	1208 (111 112) */		add	%g3,%o1,%g3
+/* 0x0eb4	1209 (112 114) */		ldx	[%sp+224],%o2
+/* 0x0eb8	1210 (112 113) */		srlx	%g3,32,%o1
+/* 0x0ebc	1211 (113 115) */		ldx	[%sp+232],%g5
+/* 0x0ec0	1212 (113 114) */		add	%g4,%o0,%g4
+/* 0x0ec4	1213 (114 115) */		st	%g2,[%i0+16]
+/* 0x0ec8	1214 (114 115) */		sllx	%o2,19,%g2
+/* 0x0ecc	1215 (114 115) */		add	%g4,%o1,%g4
+/* 0x0ed0	1216 (115 117) */		ld	[%i1+28],%o0
+/* 0x0ed4	1217 (115 116) */		srlx	%g4,32,%o1
+/* 0x0ed8	1218 (115 116) */		add	%g5,%g2,%g2
+/* 0x0edc	1222 (116 118) */		ldx	[%sp+208],%o2
+/* 0x0ee0	1223 (117 119) */		ldx	[%sp+216],%g5
+/* 0x0ee4	1224 (117 118) */		add	%g2,%o0,%g2
+/* 0x0ee8	1225 (118 119) */		st	%g3,[%i0+20]
+/* 0x0eec	1226 (118 119) */		sllx	%o2,19,%g3
+/* 0x0ef0	1227 (118 119) */		add	%g2,%o1,%g2
+/* 0x0ef4	1228 (119 121) */		ld	[%i1+32],%o0
+/* 0x0ef8	1229 (119 120) */		srlx	%g2,32,%o1
+/* 0x0efc	1230 (119 120) */		add	%g5,%g3,%g3
+/* 0x0f00	1231 (120 122) */		ldx	[%sp+192],%o2
+/* 0x0f04	1232 (121 123) */		ldx	[%sp+200],%g5
+/* 0x0f08	1233 (121 122) */		add	%g3,%o0,%g3
+/* 0x0f0c	1234 (122 123) */		st	%g4,[%i0+24]
+/* 0x0f10	1235 (122 123) */		sllx	%o2,19,%g4
+/* 0x0f14	1236 (122 123) */		add	%g3,%o1,%g3
+/* 0x0f18	1237 (123 125) */		ld	[%i1+36],%o0
+/* 0x0f1c	1238 (123 124) */		srlx	%g3,32,%o1
+/* 0x0f20	1239 (123 124) */		add	%g5,%g4,%g4
+/* 0x0f24	1240 (124 126) */		ldx	[%sp+176],%o2
+/* 0x0f28	1241 (125 127) */		ldx	[%sp+184],%g5
+/* 0x0f2c	1242 (125 126) */		add	%g4,%o0,%g4
+/* 0x0f30	1243 (126 127) */		st	%g2,[%i0+28]
+/* 0x0f34	1244 (126 127) */		sllx	%o2,19,%g2
+/* 0x0f38	1245 (126 127) */		add	%g4,%o1,%g4
+/* 0x0f3c	1246 (127 129) */		ld	[%i1+40],%o0
+/* 0x0f40	1247 (127 128) */		srlx	%g4,32,%o1
+/* 0x0f44	1248 (127 128) */		add	%g5,%g2,%g2
+/* 0x0f48	1249 (128 130) */		ldx	[%sp+160],%o2
+/* 0x0f4c	1250 (129 131) */		ldx	[%sp+168],%g5
+/* 0x0f50	1251 (129 130) */		add	%g2,%o0,%g2
+/* 0x0f54	1252 (130 131) */		st	%g3,[%i0+32]
+/* 0x0f58	1253 (130 131) */		sllx	%o2,19,%g3
+/* 0x0f5c	1254 (130 131) */		add	%g2,%o1,%g2
+/* 0x0f60	1255 (131 133) */		ld	[%i1+44],%o0
+/* 0x0f64	1256 (131 132) */		srlx	%g2,32,%o1
+/* 0x0f68	1257 (131 132) */		add	%g5,%g3,%g3
+/* 0x0f6c	1258 (132 134) */		ldx	[%sp+144],%o2
+/* 0x0f70	1259 (133 135) */		ldx	[%sp+152],%g5
+/* 0x0f74	1260 (133 134) */		add	%g3,%o0,%g3
+/* 0x0f78	1261 (134 135) */		st	%g4,[%i0+36]
+/* 0x0f7c	1262 (134 135) */		sllx	%o2,19,%g4
+/* 0x0f80	1263 (134 135) */		add	%g3,%o1,%g3
+/* 0x0f84	1264 (135 137) */		ld	[%i1+48],%o0
+/* 0x0f88	1265 (135 136) */		srlx	%g3,32,%o1
+/* 0x0f8c	1266 (135 136) */		add	%g5,%g4,%g4
+/* 0x0f90	1267 (136 138) */		ldx	[%sp+128],%o2
+/* 0x0f94	1268 (137 139) */		ldx	[%sp+136],%g5
+/* 0x0f98	1269 (137 138) */		add	%g4,%o0,%g4
+/* 0x0f9c	1270 (138 139) */		std	%f4,[%sp+96]
+/* 0x0fa0	1271 (138 139) */		add	%g4,%o1,%g4
+/* 0x0fa4	1272 (139 140) */		st	%g2,[%i0+40]
+/* 0x0fa8	1273 (139 140) */		sllx	%o2,19,%g2
+/* 0x0fac	1274 (139 140) */		srlx	%g4,32,%o1
+/* 0x0fb0	1275 (140 142) */		ld	[%i1+52],%o0
+/* 0x0fb4	1276 (140 141) */		add	%g5,%g2,%g2
+/* 0x0fb8	1277 (141 142) */		std	%f6,[%sp+104]
+/* 0x0fbc	1278 (142 144) */		ldx	[%sp+120],%g5
+/* 0x0fc0	1279 (142 143) */		add	%g2,%o0,%g2
+/* 0x0fc4	1280 (143 144) */		st	%g3,[%i0+44]
+/* 0x0fc8	1281 (143 144) */		add	%g2,%o1,%g2
+/* 0x0fcc	1282 (144 146) */		ldx	[%sp+112],%o2
+/* 0x0fd0	1283 (144 145) */		srlx	%g2,32,%o1
+/* 0x0fd4	1284 (145 147) */		ld	[%i1+56],%o0
+/* 0x0fd8	1285 (146 147) */		st	%g4,[%i0+48]
+/* 0x0fdc	1286 (146 147) */		sllx	%o2,19,%g3
+/* 0x0fe0	1287 (147 149) */		ldx	[%sp+96],%o2
+/* 0x0fe4	1288 (147 148) */		add	%g5,%g3,%g3
+/* 0x0fe8	1289 (148 150) */		ldx	[%sp+104],%g5
+/* 0x0fec	1290 (148 149) */		add	%g3,%o0,%g3
+/* 0x0ff0	1291 (149 151) */		ld	[%i1+60],%o0
+/* 0x0ff4	1292 (149 150) */		sllx	%o2,19,%g4
+/* 0x0ff8	1293 (149 150) */		add	%g3,%o1,%g3
+/* 0x0ffc	1294 (150 151) */		st	%g2,[%i0+52]
+/* 0x1000	1295 (150 151) */		srlx	%g3,32,%o1
+/* 0x1004	1296 (150 151) */		add	%g5,%g4,%g4
+/* 0x1008	1297 (151 152) */		st	%g3,[%i0+56]
+/* 0x100c	1298 (151 152) */		add	%g4,%o0,%g2
+/* 0x1010	1299 (152 153) */		add	%g2,%o1,%g2
+/* 0x1014	1300 (152 153) */		st	%g2,[%i0+60]
+/* 0x1018	1304 (153 154) */		srlx	%g2,32,%o7
+
+!
+! ENTRY .L77000061
+!
+
+        .L77000061:		/* frequency 1.0 confidence 0.0 */
+/* 0x119c	1437 ( 0  1) */		or	%g0,%o7,%i0
+
+!
+! ENTRY .L900000159
+!
+
+        .L900000159:		/* frequency 1.0 confidence 0.0 */
+/* 0x11a0	     ( 0  7) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x11a4	     ( 2  4) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L77000073
+!
+
+        .L77000073:		/* frequency 1.0 confidence 0.0 */
+	or	%g0, %i4, %o2
+	or	%g0, %o0, %o1
+	or	%g0, %i3, %o0
+
+!
+! ENTRY .L77000052
+!
+
+        .L77000052:		/* frequency 1.0 confidence 0.0 */
+/* 0x1028	1318 ( 0  1) */		andn	%o2,%g2,%g2
+/* 0x102c	1319 ( 0  1) */		st	%g2,[%sp+96]
+/* 0x1030	1325 ( 0  1) */		add	%o0,1,%g3
+/* 0x1034	1326 ( 0  1) */		fmovd	%f0,%f14
+/* 0x1038	1327 ( 1  2) */		srl	%o2,19,%g2
+/* 0x103c	1328 ( 1  2) */		st	%g2,[%sp+92]
+/* 0x1040	1329 ( 1  2) */		or	%g0,0,%o5
+/* 0x1044	1330 ( 2  3) */		srl	%g3,31,%g2
+/* 0x1048	1331 ( 2  5) */		ldd	[%o1],%f6
+/* 0x104c	1335 ( 2  3) */		sethi	%hi(0x1800),%g1
+/* 0x1050	1336 ( 3  4) */		add	%g3,%g2,%g2
+/* 0x1054	1337 ( 3  4) */		xor	%g1,-304,%g1
+/* 0x1058	1338 ( 3  6) */		ldd	[%o1+8],%f20
+/* 0x105c	1339 ( 4  5) */		sra	%g2,1,%o3
+/* 0x1060	1340 ( 4  5) */		fmovs	%f6,%f8
+/* 0x1064	1341 ( 4  5) */		add	%g1,%fp,%g3
+/* 0x1068	1342 ( 5  6) */		fmovs	%f6,%f10
+/* 0x106c	1343 ( 5  7) */		ld	[%sp+96],%f9
+/* 0x1070	1344 ( 5  6) */		subcc	%o3,0,%g0
+/* 0x1074	1345 ( 6  8) */		ld	[%sp+92],%f11
+/* 0x1078	1346 ( 6  7) */		sethi	%hi(0x1800),%g1
+/* 0x107c	1347 ( 6  7) */		or	%g0,%i2,%o1
+/* 0x1080	1348 ( 7 10) */		fsubd	%f8,%f6,%f18
+/* 0x1084	1349 ( 7  8) */		xor	%g1,-296,%g1
+/* 0x1088	1350 ( 7  8) */		or	%g0,0,%g4
+/* 0x108c	1351 ( 8 11) */		fsubd	%f10,%f6,%f16
+/* 0x1090	1352 ( 8  9) */		bleu,pt	%icc,.L990000162	! tprob=0.50
+/* 0x1094	     ( 8  9) */		subcc	%o0,0,%g0
+/* 0x1098	1354 ( 9 10) */		add	%g1,%fp,%g2
+/* 0x109c	1355 ( 9 10) */		sethi	%hi(0x1800),%g1
+/* 0x10a0	1356 (10 11) */		xor	%g1,-288,%g1
+/* 0x10a4	1357 (10 11) */		subcc	%o3,7,%g0
+/* 0x10a8	1358 (11 12) */		add	%g1,%fp,%o7
+/* 0x10ac	1359 (11 12) */		sethi	%hi(0x1800),%g1
+/* 0x10b0	1360 (12 13) */		xor	%g1,-280,%g1
+/* 0x10b4	1361 (13 14) */		add	%g1,%fp,%o4
+/* 0x10b8	1362 (13 14) */		bl,pn	%icc,.L77000054	! tprob=0.50
+/* 0x10bc	     (13 14) */		sub	%o3,2,%o2
+/* 0x10c0	1364 (14 17) */		ldd	[%o1],%f2
+/* 0x10c4	1365 (14 15) */		add	%o1,16,%g5
+/* 0x10c8	1366 (14 15) */		or	%g0,4,%g4
+/* 0x10cc	1367 (15 18) */		ldd	[%o1+8],%f0
+/* 0x10d0	1368 (15 16) */		add	%o1,8,%o1
+/* 0x10d4	1369 (16 18) */		fxnor	%f14,%f2,%f6
+/* 0x10d8	1370 (16 19) */		ldd	[%g5],%f4
+/* 0x10dc	1371 (16 17) */		add	%o1,16,%o1
+/* 0x10e0	1372 (17 19) */		fxnor	%f14,%f0,%f12
+/* 0x10e4	1373 (17 20) */		ldd	[%o1],%f0
+/* 0x10e8	1374 (17 18) */		add	%o1,8,%o1
+/* 0x10ec	1375 (18 21) */		fitod	%f7,%f2
+/* 0x10f0	1376 (19 22) */		fitod	%f6,%f6
+/* 0x10f4	1377 (20 22) */		fxnor	%f14,%f4,%f10
+/* 0x10f8	1378 (21 24) */		fsubd	%f20,%f2,%f2
+/* 0x10fc	1379 (22 24) */		fxnor	%f14,%f0,%f8
+/* 0x1100	1380 (23 26) */		fitod	%f13,%f4
+/* 0x1104	1381 (24 27) */		fsubd	%f20,%f6,%f6
+/* 0x1108	1382 (24 27) */		fmuld	%f2,%f16,%f0
+
+!
+! ENTRY .L990000154
+!
+
+        .L990000154:		/* frequency 1.0 confidence 0.0 */
+/* 0x110c	1384 ( 0  3) */		ldd	[%o1],%f24
+/* 0x1110	1385 ( 0  1) */		add	%g4,3,%g4
+/* 0x1114	1386 ( 0  1) */		add	%o4,96,%o4
+/* 0x1118	1387 ( 1  4) */		fitod	%f11,%f22
+/* 0x111c	1388 ( 2  5) */		fsubd	%f20,%f4,%f26
+/* 0x1120	1389 ( 2  3) */		subcc	%g4,%o2,%g0
+/* 0x1124	1390 ( 2  3) */		add	%o7,96,%o7
+/* 0x1128	1391 ( 2  5) */		fmuld	%f6,%f18,%f28
+/* 0x112c	1392 ( 3  6) */		fmuld	%f6,%f16,%f6
+/* 0x1130	1393 ( 3  4) */		add	%g2,96,%g2
+/* 0x1134	1394 ( 3  4) */		add	%g3,96,%g3
+/* 0x1138	1395 ( 4  7) */		fdtox	%f0,%f0
+/* 0x113c	1396 ( 5  8) */		fitod	%f12,%f4
+/* 0x1140	1397 ( 5  8) */		fmuld	%f2,%f18,%f2
+/* 0x1144	1398 ( 6  9) */		fdtox	%f28,%f12
+/* 0x1148	1399 ( 7 10) */		fdtox	%f6,%f6
+/* 0x114c	1400 ( 7  8) */		std	%f12,[%g3-96]
+/* 0x1150	1401 ( 8  9) */		std	%f6,[%g2-96]
+/* 0x1154	1402 ( 8 11) */		fdtox	%f2,%f2
+/* 0x1158	1403 ( 9 12) */		fsubd	%f20,%f4,%f6
+/* 0x115c	1404 ( 9 10) */		std	%f2,[%o7-96]
+/* 0x1160	1405 ( 9 10) */		add	%o1,8,%o1
+/* 0x1164	1406 (10 12) */		fxnor	%f14,%f24,%f12
+/* 0x1168	1407 (10 13) */		fmuld	%f26,%f16,%f4
+/* 0x116c	1408 (10 11) */		std	%f0,[%o4-96]
+/* 0x1170	1409 (11 14) */		ldd	[%o1],%f0
+/* 0x1174	1410 (11 14) */		fitod	%f9,%f2
+/* 0x1178	1411 (12 15) */		fsubd	%f20,%f22,%f28
+/* 0x117c	1412 (12 15) */		fmuld	%f6,%f18,%f24
+/* 0x1180	1413 (13 16) */		fmuld	%f6,%f16,%f22
+/* 0x1184	1414 (13 16) */		fdtox	%f4,%f4
+/* 0x1188	1415 (14 17) */		fitod	%f10,%f6
+/* 0x118c	1416 (14 17) */		fmuld	%f26,%f18,%f10
+/* 0x1190	1417 (15 18) */		fdtox	%f24,%f24
+/* 0x1194	1418 (16 19) */		fdtox	%f22,%f22
+/* 0x1198	1419 (16 17) */		std	%f24,[%g3-64]
+/* 0x119c	1420 (17 18) */		std	%f22,[%g2-64]
+/* 0x11a0	1421 (17 20) */		fdtox	%f10,%f10
+/* 0x11a4	1422 (18 21) */		fsubd	%f20,%f6,%f6
+/* 0x11a8	1423 (18 19) */		std	%f10,[%o7-64]
+/* 0x11ac	1424 (18 19) */		add	%o1,8,%o1
+/* 0x11b0	1425 (19 21) */		fxnor	%f14,%f0,%f10
+/* 0x11b4	1426 (19 22) */		fmuld	%f28,%f16,%f0
+/* 0x11b8	1427 (19 20) */		std	%f4,[%o4-64]
+/* 0x11bc	1428 (20 23) */		ldd	[%o1],%f22
+/* 0x11c0	1429 (20 23) */		fitod	%f13,%f4
+/* 0x11c4	1430 (21 24) */		fsubd	%f20,%f2,%f2
+/* 0x11c8	1431 (21 24) */		fmuld	%f6,%f18,%f26
+/* 0x11cc	1432 (22 25) */		fmuld	%f6,%f16,%f24
+/* 0x11d0	1433 (22 25) */		fdtox	%f0,%f0
+/* 0x11d4	1434 (23 26) */		fitod	%f8,%f6
+/* 0x11d8	1435 (23 26) */		fmuld	%f28,%f18,%f8
+/* 0x11dc	1436 (24 27) */		fdtox	%f26,%f26
+/* 0x11e0	1437 (25 28) */		fdtox	%f24,%f24
+/* 0x11e4	1438 (25 26) */		std	%f26,[%g3-32]
+/* 0x11e8	1439 (26 27) */		std	%f24,[%g2-32]
+/* 0x11ec	1440 (26 29) */		fdtox	%f8,%f8
+/* 0x11f0	1441 (27 30) */		fsubd	%f20,%f6,%f6
+/* 0x11f4	1442 (27 28) */		std	%f8,[%o7-32]
+/* 0x11f8	1443 (27 28) */		add	%o1,8,%o1
+/* 0x11fc	1444 (28 30) */		fxnor	%f14,%f22,%f8
+/* 0x1200	1445 (28 29) */		std	%f0,[%o4-32]
+/* 0x1204	1446 (28 29) */		bcs,pt	%icc,.L990000154	! tprob=0.50
+/* 0x1208	     (28 31) */		fmuld	%f2,%f16,%f0
+
+!
+! ENTRY .L990000157
+!
+
+        .L990000157:		/* frequency 1.0 confidence 0.0 */
+/* 0x120c	1449 ( 0  3) */		fitod	%f12,%f28
+/* 0x1210	1450 ( 0  3) */		fmuld	%f6,%f18,%f24
+/* 0x1214	1451 ( 0  1) */		add	%g3,128,%g3
+/* 0x1218	1452 ( 1  4) */		fitod	%f10,%f12
+/* 0x121c	1453 ( 1  4) */		fmuld	%f6,%f16,%f26
+/* 0x1220	1454 ( 1  2) */		add	%g2,128,%g2
+/* 0x1224	1455 ( 2  5) */		fsubd	%f20,%f4,%f4
+/* 0x1228	1456 ( 2  5) */		fmuld	%f2,%f18,%f22
+/* 0x122c	1457 ( 2  3) */		add	%o7,128,%o7
+/* 0x1230	1458 ( 3  6) */		fdtox	%f24,%f6
+/* 0x1234	1459 ( 3  4) */		std	%f6,[%g3-128]
+/* 0x1238	1460 ( 3  4) */		add	%o4,128,%o4
+/* 0x123c	1461 ( 4  7) */		fsubd	%f20,%f28,%f2
+/* 0x1240	1462 ( 4  5) */		subcc	%g4,%o3,%g0
+/* 0x1244	1463 ( 5  8) */		fitod	%f11,%f6
+/* 0x1248	1464 ( 5  8) */		fmuld	%f4,%f18,%f24
+/* 0x124c	1465 ( 6  9) */		fdtox	%f26,%f10
+/* 0x1250	1466 ( 6  7) */		std	%f10,[%g2-128]
+/* 0x1254	1467 ( 7 10) */		fdtox	%f22,%f10
+/* 0x1258	1468 ( 7  8) */		std	%f10,[%o7-128]
+/* 0x125c	1469 ( 7 10) */		fmuld	%f2,%f18,%f26
+/* 0x1260	1470 ( 8 11) */		fsubd	%f20,%f12,%f10
+/* 0x1264	1471 ( 8 11) */		fmuld	%f2,%f16,%f2
+/* 0x1268	1472 ( 9 12) */		fsubd	%f20,%f6,%f22
+/* 0x126c	1473 ( 9 12) */		fmuld	%f4,%f16,%f12
+/* 0x1270	1474 (10 13) */		fdtox	%f0,%f0
+/* 0x1274	1475 (10 11) */		std	%f0,[%o4-128]
+/* 0x1278	1476 (11 14) */		fitod	%f8,%f4
+/* 0x127c	1477 (11 14) */		fmuld	%f10,%f18,%f6
+/* 0x1280	1478 (12 15) */		fdtox	%f26,%f0
+/* 0x1284	1479 (12 13) */		std	%f0,[%g3-96]
+/* 0x1288	1480 (12 15) */		fmuld	%f10,%f16,%f10
+/* 0x128c	1481 (13 16) */		fdtox	%f2,%f2
+/* 0x1290	1482 (13 14) */		std	%f2,[%g2-96]
+/* 0x1294	1483 (14 17) */		fitod	%f9,%f0
+/* 0x1298	1484 (14 17) */		fmuld	%f22,%f18,%f2
+/* 0x129c	1485 (15 18) */		fdtox	%f24,%f8
+/* 0x12a0	1486 (15 16) */		std	%f8,[%o7-96]
+/* 0x12a4	1487 (16 19) */		fsubd	%f20,%f4,%f4
+/* 0x12a8	1488 (16 19) */		fmuld	%f22,%f16,%f8
+/* 0x12ac	1489 (17 20) */		fdtox	%f12,%f12
+/* 0x12b0	1490 (17 18) */		std	%f12,[%o4-96]
+/* 0x12b4	1491 (18 21) */		fsubd	%f20,%f0,%f0
+/* 0x12b8	1492 (19 22) */		fdtox	%f6,%f6
+/* 0x12bc	1493 (19 20) */		std	%f6,[%g3-64]
+/* 0x12c0	1494 (20 23) */		fdtox	%f10,%f10
+/* 0x12c4	1495 (20 21) */		std	%f10,[%g2-64]
+/* 0x12c8	1496 (20 23) */		fmuld	%f4,%f18,%f6
+/* 0x12cc	1497 (21 24) */		fdtox	%f2,%f2
+/* 0x12d0	1498 (21 22) */		std	%f2,[%o7-64]
+/* 0x12d4	1499 (21 24) */		fmuld	%f4,%f16,%f4
+/* 0x12d8	1500 (22 25) */		fmuld	%f0,%f18,%f2
+/* 0x12dc	1501 (22 25) */		fdtox	%f8,%f8
+/* 0x12e0	1502 (22 23) */		std	%f8,[%o4-64]
+/* 0x12e4	1503 (23 26) */		fdtox	%f6,%f6
+/* 0x12e8	1504 (23 24) */		std	%f6,[%g3-32]
+/* 0x12ec	1505 (23 26) */		fmuld	%f0,%f16,%f0
+/* 0x12f0	1506 (24 27) */		fdtox	%f4,%f4
+/* 0x12f4	1507 (24 25) */		std	%f4,[%g2-32]
+/* 0x12f8	1508 (25 28) */		fdtox	%f2,%f2
+/* 0x12fc	1509 (25 26) */		std	%f2,[%o7-32]
+/* 0x1300	1510 (26 29) */		fdtox	%f0,%f0
+/* 0x1304	1511 (26 27) */		bcc,pn	%icc,.L77000056	! tprob=0.50
+/* 0x1308	     (26 27) */		std	%f0,[%o4-32]
+
+!
+! ENTRY .L77000054
+!
+
+        .L77000054:		/* frequency 1.0 confidence 0.0 */
+/* 0x130c	1514 ( 0  3) */		ldd	[%o1],%f0
+
+!
+! ENTRY .L990000161
+!
+
+        .L990000161:		/* frequency 1.0 confidence 0.0 */
+/* 0x1310	1516 ( 0  2) */		fxnor	%f14,%f0,%f0
+/* 0x1314	1517 ( 0  1) */		add	%g4,1,%g4
+/* 0x1318	1518 ( 0  1) */		add	%o1,8,%o1
+/* 0x131c	1519 ( 1  2) */		subcc	%g4,%o3,%g0
+/* 0x1320	1520 ( 2  5) */		fitod	%f0,%f2
+/* 0x1324	1521 ( 3  6) */		fitod	%f1,%f0
+/* 0x1328	1522 ( 5  8) */		fsubd	%f20,%f2,%f2
+/* 0x132c	1523 ( 6  9) */		fsubd	%f20,%f0,%f0
+/* 0x1330	1524 ( 8 11) */		fmuld	%f2,%f18,%f6
+/* 0x1334	1525 ( 9 12) */		fmuld	%f2,%f16,%f4
+/* 0x1338	1526 (10 13) */		fmuld	%f0,%f18,%f2
+/* 0x133c	1527 (11 14) */		fdtox	%f6,%f6
+/* 0x1340	1528 (11 12) */		std	%f6,[%g3]
+/* 0x1344	1529 (11 14) */		fmuld	%f0,%f16,%f0
+/* 0x1348	1530 (12 15) */		fdtox	%f4,%f4
+/* 0x134c	1531 (12 13) */		std	%f4,[%g2]
+/* 0x1350	1532 (12 13) */		add	%g2,32,%g2
+/* 0x1354	1533 (13 16) */		fdtox	%f2,%f2
+/* 0x1358	1534 (13 14) */		std	%f2,[%o7]
+/* 0x135c	1535 (13 14) */		add	%o7,32,%o7
+/* 0x1360	1536 (14 17) */		fdtox	%f0,%f0
+/* 0x1364	1537 (14 15) */		std	%f0,[%o4]
+/* 0x1368	1538 (14 15) */		add	%o4,32,%o4
+/* 0x136c	1539 (15 16) */		add	%g3,32,%g3
+/* 0x1370	1540 (15 16) */		bcs,a,pt	%icc,.L990000161	! tprob=0.50
+/* 0x1374	     (16 19) */		ldd	[%o1],%f0
+
+!
+! ENTRY .L77000056
+!
+
+         .L77000056:		/* frequency 1.0 confidence 0.0 */
+/* 0x1378	1548 ( 0  1) */		subcc	%o0,0,%g0
+
+!
+! ENTRY .L990000162
+!
+
+         .L990000162:		/* frequency 1.0 confidence 0.0 */
+/* 0x137c	1550 ( 0  1) */		bleu,pt	%icc,.L77770061	! tprob=0.50
+/* 0x1380	     ( 0  1) */		nop
+/* 0x1384	1555 ( 0  1) */		sethi	%hi(0x1800),%g1
+/* 0x1388	1556 ( 1  2) */		xor	%g1,-304,%g1
+/* 0x138c	1557 ( 1  2) */		or	%g0,%i1,%g4
+/* 0x1390	1558 ( 2  3) */		add	%g1,%fp,%g5
+/* 0x1394	1559 ( 2  3) */		sethi	%hi(0x1800),%g1
+/* 0x1398	1560 ( 3  4) */		xor	%g1,-296,%g1
+/* 0x139c	1561 ( 3  4) */		or	%g0,%o0,%o7
+/* 0x13a0	1562 ( 4  5) */		add	%g1,%fp,%g2
+/* 0x13a4	1563 ( 4  5) */		or	%g0,0,%i2
+/* 0x13a8	1564 ( 5  6) */		or	%g0,%i0,%g3
+/* 0x13ac	1565 ( 5  6) */		subcc	%o0,6,%g0
+/* 0x13b0	1566 ( 5  6) */		bl,pn	%icc,.L77000058	! tprob=0.50
+/* 0x13b4	     ( 6  7) */		sethi	%hi(0x1800),%g1
+/* 0x13b8	1568 ( 6  8) */		ld	[%g4],%o2
+/* 0x13bc	1569 ( 6  7) */		add	%g3,4,%g3
+/* 0x13c0	1570 ( 7  8) */		xor	%g1,-264,%g1
+/* 0x13c4	1571 ( 7  8) */		sub	%o7,3,%o4
+/* 0x13c8	1572 ( 8  9) */		add	%g1,%fp,%g2
+/* 0x13cc	1573 ( 8  9) */		sethi	%hi(0x1800),%g1
+/* 0x13d0	1574 ( 9 10) */		xor	%g1,-272,%g1
+/* 0x13d4	1575 ( 9 10) */		or	%g0,2,%i2
+/* 0x13d8	1576 (10 11) */		add	%g1,%fp,%g5
+/* 0x13dc	1577 (10 11) */		sethi	%hi(0x1800),%g1
+/* 0x13e0	1578 (11 12) */		xor	%g1,-296,%g1
+/* 0x13e4	1579 (12 13) */		add	%g1,%fp,%g1
+/* 0x13e8	1580 (13 15) */		ldx	[%g1],%o1
+/* 0x13ec	1581 (14 16) */		ldx	[%g1-8],%o0
+/* 0x13f0	1582 (15 16) */		sllx	%o1,19,%o1
+/* 0x13f4	1583 (15 17) */		ldx	[%g1+16],%o3
+/* 0x13f8	1584 (16 17) */		add	%o0,%o1,%o0
+/* 0x13fc	1585 (16 18) */		ld	[%g4+4],%o1
+/* 0x1400	1586 (16 17) */		add	%g4,8,%g4
+/* 0x1404	1587 (17 18) */		sllx	%o3,19,%o3
+/* 0x1408	1588 (17 18) */		add	%o0,%o2,%o0
+/* 0x140c	1589 (17 19) */		ldx	[%g1+8],%o2
+/* 0x1410	1590 (18 19) */		st	%o0,[%g3-4]
+/* 0x1414	1591 (18 19) */		srlx	%o0,32,%o0
+
+!
+! ENTRY .L990000142
+!
+
+        .L990000142:		/* frequency 1.0 confidence 0.0 */
+/* 0x1418	1593 ( 0  1) */		add	%o2,%o3,%o2
+/* 0x141c	1594 ( 0  1) */		add	%i2,4,%i2
+/* 0x1420	1595 ( 0  2) */		ld	[%g4],%o3
+/* 0x1424	1596 ( 1  2) */		srl	%o0,0,%o5
+/* 0x1428	1597 ( 1  2) */		add	%o2,%o1,%o1
+/* 0x142c	1598 ( 1  3) */		ldx	[%g2],%o0
+/* 0x1430	1599 ( 3  4) */		sllx	%o0,19,%o2
+/* 0x1434	1600 ( 3  5) */		ldx	[%g5],%o0
+/* 0x1438	1601 ( 3  4) */		add	%o1,%o5,%o1
+/* 0x143c	1602 ( 4  5) */		st	%o1,[%g3]
+/* 0x1440	1603 ( 4  5) */		srlx	%o1,32,%o5
+/* 0x1444	1604 ( 4  5) */		subcc	%i2,%o4,%g0
+/* 0x1448	1605 ( 5  7) */		ldx	[%g2+16],%o1
+/* 0x144c	1606 ( 5  6) */		add	%o0,%o2,%o0
+/* 0x1450	1607 ( 5  6) */		add	%g3,16,%g3
+/* 0x1454	1608 ( 6  8) */		ld	[%g4+4],%o2
+/* 0x1458	1609 ( 6  7) */		add	%o0,%o3,%o0
+/* 0x145c	1610 ( 7  8) */		sllx	%o1,19,%o3
+/* 0x1460	1611 ( 7  9) */		ldx	[%g5+16],%o1
+/* 0x1464	1612 ( 7  8) */		add	%o0,%o5,%o0
+/* 0x1468	1613 ( 8  9) */		st	%o0,[%g3-12]
+/* 0x146c	1614 ( 8  9) */		srlx	%o0,32,%o5
+/* 0x1470	1615 ( 8  9) */		add	%g4,16,%g4
+/* 0x1474	1616 ( 9 11) */		ldx	[%g2+32],%o0
+/* 0x1478	1617 ( 9 10) */		add	%o1,%o3,%o1
+/* 0x147c	1618 ( 9 10) */		add	%g2,64,%g2
+/* 0x1480	1619 (10 12) */		ld	[%g4-8],%o3
+/* 0x1484	1620 (10 11) */		add	%o1,%o2,%o2
+/* 0x1488	1621 (11 12) */		sllx	%o0,19,%o1
+/* 0x148c	1622 (11 13) */		ldx	[%g5+32],%o0
+/* 0x1490	1623 (11 12) */		add	%o2,%o5,%o2
+/* 0x1494	1624 (12 13) */		st	%o2,[%g3-8]
+/* 0x1498	1625 (12 13) */		srlx	%o2,32,%o5
+/* 0x149c	1626 (12 13) */		add	%g5,64,%g5
+/* 0x14a0	1627 (13 15) */		ldx	[%g2-16],%o2
+/* 0x14a4	1628 (13 14) */		add	%o0,%o1,%o0
+/* 0x14a8	1629 (14 16) */		ld	[%g4-4],%o1
+/* 0x14ac	1630 (14 15) */		add	%o0,%o3,%o0
+/* 0x14b0	1631 (15 16) */		sllx	%o2,19,%o3
+/* 0x14b4	1632 (15 17) */		ldx	[%g5-16],%o2
+/* 0x14b8	1633 (15 16) */		add	%o0,%o5,%o0
+/* 0x14bc	1634 (16 17) */		st	%o0,[%g3-4]
+/* 0x14c0	1635 (16 17) */		bcs,pt	%icc,.L990000142	! tprob=0.50
+/* 0x14c4	     (16 17) */		srlx	%o0,32,%o0
+
+!
+! ENTRY .L990000145
+!
+
+        .L990000145:		/* frequency 1.0 confidence 0.0 */
+/* 0x14c8	1638 ( 0  1) */		add	%o2,%o3,%o3
+/* 0x14cc	1639 ( 0  1) */		add	%g3,4,%g3
+/* 0x14d0	1640 ( 1  2) */		srl	%o0,0,%o2
+/* 0x14d4	1641 ( 1  2) */		add	%o3,%o1,%o0
+/* 0x14d8	1642 ( 2  3) */		add	%o0,%o2,%o0
+/* 0x14dc	1643 ( 2  3) */		st	%o0,[%g3-4]
+/* 0x14e0	1644 ( 2  3) */		subcc	%i2,%o7,%g0
+/* 0x14e4	1645 ( 2  3) */		bcc,pn	%icc,.L77770061	! tprob=0.50
+/* 0x14e8	     ( 3  4) */		srlx	%o0,32,%o5
+
+!
+! ENTRY .L77000058
+!
+
+        .L77000058:		/* frequency 1.0 confidence 0.0 */
+/* 0x14ec	1648 ( 0  2) */		ldx	[%g2],%o2
+
+!
+! ENTRY .L990000160
+!
+
+        .L990000160:		/* frequency 1.0 confidence 0.0 */
+/* 0x14f0	1650 ( 0  1) */		sllx	%o2,19,%o3
+/* 0x14f4	1651 ( 0  2) */		ldx	[%g5],%o0
+/* 0x14f8	1652 ( 0  1) */		add	%i2,1,%i2
+/* 0x14fc	1653 ( 1  2) */		srl	%o5,0,%o1
+/* 0x1500	1654 ( 1  3) */		ld	[%g4],%o2
+/* 0x1504	1655 ( 1  2) */		add	%g2,16,%g2
+/* 0x1508	1656 ( 2  3) */		add	%o0,%o3,%o0
+/* 0x150c	1657 ( 2  3) */		add	%g5,16,%g5
+/* 0x1510	1658 ( 3  4) */		add	%o0,%o2,%o0
+/* 0x1514	1659 ( 3  4) */		add	%g4,4,%g4
+/* 0x1518	1660 ( 4  5) */		add	%o0,%o1,%o0
+/* 0x151c	1661 ( 4  5) */		st	%o0,[%g3]
+/* 0x1520	1662 ( 4  5) */		subcc	%i2,%o7,%g0
+/* 0x1524	1663 ( 5  6) */		srlx	%o0,32,%o5
+/* 0x1528	1664 ( 5  6) */		add	%g3,4,%g3
+/* 0x152c	1665 ( 5  6) */		bcs,a,pt	%icc,.L990000160	! tprob=0.50
+/* 0x1530	     ( 6  8) */		ldx	[%g2],%o2
+
+!
+! ENTRY .L77770061
+!
+
+        .L77770061:		/* frequency 1.0 confidence 0.0 */
+/* 0x1534	     ( 0  2) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x1538	     ( 2  3) */		restore	%g0,%o5,%o0
+
+/* 0x11a8	1441 ( 0  0) */		.type	mul_add,2
+/* 0x11a8	1442 ( 0  0) */		.size	mul_add,(.-mul_add)
+/* 0x11a8	1445 ( 0  0) */		.align	16
+/* 0x11b0	1451 ( 0  0) */		.global	mul_add_inp
+
+!
+! ENTRY mul_add_inp
+!
+
+        .global mul_add_inp
+        mul_add_inp:		/* frequency 1.0 confidence 0.0 */
+/* 0x11b0	1453 ( 0  1) */		or	%g0,%o2,%g1
+/* 0x11b4	1454 ( 0  1) */		or	%g0,%o3,%o4
+/* 0x11b8	1455 ( 1  2) */		or	%g0,%o0,%g3
+/* 0x11bc	1456 ( 1  2) */		or	%g0,%o1,%g2
+/* 0x11c0	1466 ( 2  3) */		or	%g0,%g1,%o3
+/* 0x11c4	1467 ( 2  3) */		or	%g0,%g3,%o1
+/* 0x11c8	1468 ( 3  4) */		or	%g0,%g2,%o2
+/* 0x11cc	1469 ( 3  4) */		or	%g0,%o7,%g1
+/* 0x11d0	1470 ( 4  6) */		call	mul_add	! params = 	! Result = 
+/* 0x11d4	     ( 5  6) */		or	%g0,%g1,%o7
+/* 0x11d8	1472 ( 0  0) */		.type	mul_add_inp,2
+/* 0x11d8	1473 ( 0  0) */		.size	mul_add_inp,(.-mul_add_inp)
+
+	.section	".data",#alloc,#write
+/* 0x11d8	   6 ( 0  0) */		.align	8
+
+!
+! ENTRY mask_cnst
+!
+
+        mask_cnst:		/* frequency 1.0 confidence 0.0 */
+/* 0x11d8	   8 ( 0  0) */		.word	-2147483648
+/* 0x11dc	   9 ( 0  0) */		.word	-2147483648
+/* 0x11e0	  10 ( 0  0) */		.type	mask_cnst,#object
+/* 0x11e0	  11 ( 0  0) */		.size	mask_cnst,8
+
diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv9.s b/security/nss/lib/freebl/mpi/mpv_sparcv9.s
new file mode 100644
index 000000000..e2fbe0bd0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparcv9.s
@@ -0,0 +1,1645 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+	.section	".text",#alloc,#execinstr
+/* 000000	   0 ( 0  0) */		.register	%g2,#scratch
+/* 000000	     ( 0  0) */		.register	%g3,#scratch
+/* 000000	   3 ( 0  0) */		.file	"mpv_sparc.c"
+/* 000000	  15 ( 0  0) */		.align	8
+!
+! SUBROUTINE .L_const_seg_900000101
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
+
+                                   .L_const_seg_900000101:		/* frequency 1.0 confidence 0.0 */
+/* 000000	  20 ( 0  0) */		.word	1127219200,0
+/* 0x0008	  21 ( 0  0) */		.word	1105199103,-4194304
+/* 0x0010	  22 ( 0  0) */		.align	8
+/* 0x0010	  28 ( 0  0) */		.global	mul_add
+
+!
+! ENTRY mul_add
+!
+
+                                   	.global mul_add
+                                   mul_add:		/* frequency 1.0 confidence 0.0 */
+/* 0x0010	  30 ( 0  1) */		sethi	%hi(0x1c00),%g1
+/* 0x0014	  31 ( 0  1) */		sethi	%hi(mask_cnst),%g2
+/* 0x0018	  32 ( 1  2) */		xor	%g1,-48,%g1
+/* 0x001c	  33 ( 1  2) */		add	%g2,%lo(mask_cnst),%g2
+/* 0x0020	  34 ( 2  3) */		save	%sp,%g1,%sp
+
+!
+! ENTRY .L900000149
+!
+
+                                   .L900000149:		/* frequency 1.0 confidence 0.0 */
+/* 0x0024	  36 ( 0  2) */		call	(.+0x8)	! params = 	! Result = 
+/* 0x0028	     ( 1  2) */		sethi	%hi((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5
+/* 0x002c	 178 ( 2  3) */		sethi	%hi(.L_const_seg_900000101),%g3
+/* 0x0030	 179 ( 2  3) */		add	%g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5
+/* 0x0034	 180 ( 3  4) */		add	%g3,%lo(.L_const_seg_900000101),%g3
+/* 0x0038	 181 ( 3  4) */		add	%g5,%o7,%o1
+/* 0x003c	 182 ( 4  5) */		sethi	%hi(0x80000),%g4
+/* 0x0040	 183 ( 4  6) */		ldx	[%o1+%g2],%g2
+/* 0x0044	 184 ( 4  5) */		or	%g0,%i2,%o2
+/* 0x0048	 185 ( 5  6) */		subcc	%i4,%g4,%g0
+/* 0x004c	 186 ( 5  7) */		ldx	[%o1+%g3],%o0
+/* 0x0050	 187 ( 6  7) */		or	%g0,%i0,%o7
+/* 0x0054	 188 ( 6  7) */		or	%g0,%i1,%o5
+/* 0x0058	 189 ( 6  9) */		ldd	[%g2],%f0
+/* 0x005c	 190 ( 6  7) */		bcc,pn	%icc,.L77000048	! tprob=0.50
+/* 0x0060	     ( 7  8) */		subcc	%i3,8,%g0
+/* 0x0064	 192 ( 7  8) */		bne,pn	%icc,.L900000158	! tprob=0.50
+/* 0x0068	     ( 8  9) */		subcc	%i3,16,%g0
+/* 0x006c	 194 ( 9 12) */		ldd	[%o2],%f4
+/* 0x0070	 195 (10 11) */		st	%i4,[%sp+2287]
+/* 0x0074	 196 (11 14) */		ldd	[%o0],%f8
+/* 0x0078	 197 (11 13) */		fxnor	%f0,%f4,%f4
+/* 0x007c	 198 (12 15) */		ldd	[%o2+8],%f10
+/* 0x0080	 199 (13 16) */		fitod	%f4,%f12
+/* 0x0084	 200 (13 16) */		ldd	[%o0+8],%f14
+/* 0x0088	 201 (14 17) */		ld	[%sp+2287],%f7
+/* 0x008c	 202 (14 17) */		fitod	%f5,%f4
+/* 0x0090	 203 (15 17) */		fxnor	%f0,%f10,%f10
+/* 0x0094	 204 (15 18) */		ldd	[%o2+16],%f16
+/* 0x0098	 205 (16 19) */		ldd	[%o2+24],%f18
+/* 0x009c	 206 (17 20) */		fsubd	%f14,%f4,%f4
+/* 0x00a0	 210 (17 20) */		ld	[%i1],%g2
+/* 0x00a4	 211 (18 20) */		fxnor	%f0,%f16,%f16
+/* 0x00a8	 212 (18 21) */		ld	[%i1+4],%g3
+/* 0x00ac	 213 (19 22) */		ld	[%i1+8],%g4
+/* 0x00b0	 214 (20 23) */		fitod	%f16,%f20
+/* 0x00b4	 215 (20 23) */		ld	[%i1+16],%o0
+/* 0x00b8	 216 (21 24) */		ld	[%i1+12],%g5
+/* 0x00bc	 217 (22 25) */		ld	[%i1+20],%o1
+/* 0x00c0	 218 (23 26) */		ld	[%i1+24],%o2
+/* 0x00c4	 219 (24 25) */		fmovs	%f8,%f6
+/* 0x00c8	 220 (24 27) */		ld	[%i1+28],%o3
+/* 0x00cc	 221 (26 29) */		fsubd	%f6,%f8,%f6
+/* 0x00d0	 222 (27 30) */		fsubd	%f14,%f12,%f8
+/* 0x00d4	 223 (28 31) */		fitod	%f10,%f12
+/* 0x00d8	 224 (29 32) */		fmuld	%f4,%f6,%f4
+/* 0x00dc	 225 (29 32) */		fitod	%f11,%f10
+/* 0x00e0	 226 (30 33) */		fmuld	%f8,%f6,%f8
+/* 0x00e4	 227 (31 34) */		fsubd	%f14,%f12,%f12
+/* 0x00e8	 228 (32 35) */		fdtox	%f4,%f4
+/* 0x00ec	 229 (32 33) */		std	%f4,[%sp+2271]
+/* 0x00f0	 230 (33 36) */		fdtox	%f8,%f8
+/* 0x00f4	 231 (33 34) */		std	%f8,[%sp+2279]
+/* 0x00f8	 232 (34 37) */		fmuld	%f12,%f6,%f12
+/* 0x00fc	 233 (34 37) */		fsubd	%f14,%f10,%f10
+/* 0x0100	 234 (35 38) */		fsubd	%f14,%f20,%f4
+/* 0x0104	 235 (36 39) */		fitod	%f17,%f8
+/* 0x0108	 236 (37 39) */		fxnor	%f0,%f18,%f16
+/* 0x010c	 237 (37 39) */		ldx	[%sp+2279],%o4
+/* 0x0110	 238 (37 40) */		fmuld	%f10,%f6,%f10
+/* 0x0114	 239 (38 41) */		fdtox	%f12,%f12
+/* 0x0118	 240 (38 39) */		std	%f12,[%sp+2263]
+/* 0x011c	 241 (38 41) */		fmuld	%f4,%f6,%f4
+/* 0x0120	 242 (39 42) */		fitod	%f16,%f18
+/* 0x0124	 243 (39 40) */		add	%o4,%g2,%g2
+/* 0x0128	 244 (39 40) */		st	%g2,[%i0]
+/* 0x012c	 245 (40 42) */		ldx	[%sp+2271],%o4
+/* 0x0130	 246 (40 43) */		fsubd	%f14,%f8,%f8
+/* 0x0134	 247 (40 41) */		srax	%g2,32,%o5
+/* 0x0138	 248 (41 44) */		fdtox	%f10,%f10
+/* 0x013c	 249 (41 42) */		std	%f10,[%sp+2255]
+/* 0x0140	 250 (42 45) */		fdtox	%f4,%f4
+/* 0x0144	 251 (42 43) */		std	%f4,[%sp+2247]
+/* 0x0148	 252 (42 43) */		add	%o4,%g3,%o4
+/* 0x014c	 253 (43 46) */		fitod	%f17,%f12
+/* 0x0150	 254 (43 45) */		ldx	[%sp+2263],%g2
+/* 0x0154	 255 (43 44) */		add	%o4,%o5,%g3
+/* 0x0158	 256 (43 46) */		fmuld	%f8,%f6,%f8
+/* 0x015c	 257 (44 47) */		fsubd	%f14,%f18,%f10
+/* 0x0160	 258 (44 45) */		st	%g3,[%i0+4]
+/* 0x0164	 259 (44 45) */		srax	%g3,32,%g3
+/* 0x0168	 260 (45 46) */		add	%g2,%g4,%g4
+/* 0x016c	 261 (45 47) */		ldx	[%sp+2255],%g2
+/* 0x0170	 262 (46 49) */		fsubd	%f14,%f12,%f4
+/* 0x0174	 263 (46 47) */		add	%g4,%g3,%g3
+/* 0x0178	 264 (46 48) */		ldx	[%sp+2247],%g4
+/* 0x017c	 265 (47 50) */		fmuld	%f10,%f6,%f10
+/* 0x0180	 266 (47 50) */		fdtox	%f8,%f8
+/* 0x0184	 267 (47 48) */		std	%f8,[%sp+2239]
+/* 0x0188	 268 (48 49) */		add	%g4,%o0,%g4
+/* 0x018c	 269 (48 49) */		add	%g2,%g5,%g2
+/* 0x0190	 270 (48 49) */		st	%g3,[%i0+8]
+/* 0x0194	 271 (49 52) */		fmuld	%f4,%f6,%f4
+/* 0x0198	 272 (49 50) */		srax	%g3,32,%o0
+/* 0x019c	 273 (49 51) */		ldx	[%sp+2239],%g5
+/* 0x01a0	 274 (50 53) */		fdtox	%f10,%f6
+/* 0x01a4	 275 (50 51) */		std	%f6,[%sp+2231]
+/* 0x01a8	 276 (50 51) */		add	%g2,%o0,%g2
+/* 0x01ac	 277 (51 52) */		srax	%g2,32,%g3
+/* 0x01b0	 278 (51 52) */		add	%g5,%o1,%o1
+/* 0x01b4	 279 (51 52) */		st	%g2,[%i0+12]
+/* 0x01b8	 280 (52 55) */		fdtox	%f4,%f4
+/* 0x01bc	 281 (52 53) */		std	%f4,[%sp+2223]
+/* 0x01c0	 282 (52 53) */		add	%g4,%g3,%g3
+/* 0x01c4	 283 (53 54) */		srax	%g3,32,%g4
+/* 0x01c8	 284 (53 54) */		st	%g3,[%i0+16]
+/* 0x01cc	 285 (54 56) */		ldx	[%sp+2231],%o0
+/* 0x01d0	 286 (54 55) */		add	%o1,%g4,%g4
+/* 0x01d4	 287 (55 56) */		srax	%g4,32,%g2
+/* 0x01d8	 288 (55 57) */		ldx	[%sp+2223],%g5
+/* 0x01dc	 289 (56 57) */		add	%o0,%o2,%o2
+/* 0x01e0	 290 (56 57) */		st	%g4,[%i0+20]
+/* 0x01e4	 291 (57 58) */		add	%o2,%g2,%g2
+/* 0x01e8	 292 (57 58) */		add	%g5,%o3,%g5
+/* 0x01ec	 293 (57 58) */		st	%g2,[%i0+24]
+/* 0x01f0	 294 (58 59) */		srax	%g2,32,%g3
+/* 0x01f4	 295 (59 60) */		add	%g5,%g3,%g2
+/* 0x01f8	 296 (59 60) */		st	%g2,[%i0+28]
+/* 0x01fc	 300 (60 61) */		srax	%g2,32,%o3
+/* 0x0200	 301 (61 62) */		srl	%o3,0,%i0
+/* 0x0204	     (62 64) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x0208	     (64 65) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L900000158
+!
+
+                                   .L900000158:		/* frequency 1.0 confidence 0.0 */
+/* 0x020c	 308 ( 0  1) */		bne,a,pn	%icc,.L900000157	! tprob=0.50
+/* 0x0210	     ( 0  1) */		st	%i4,[%sp+2223]
+/* 0x0214	 315 ( 1  4) */		ldd	[%o2],%f4
+/* 0x0218	 316 ( 2  3) */		st	%i4,[%sp+2351]
+/* 0x021c	 317 ( 3  6) */		ldd	[%o0],%f8
+/* 0x0220	 318 ( 3  5) */		fxnor	%f0,%f4,%f4
+/* 0x0224	 319 ( 4  7) */		ldd	[%o2+8],%f10
+/* 0x0228	 320 ( 5  8) */		ldd	[%o0+8],%f14
+/* 0x022c	 321 ( 5  8) */		fitod	%f4,%f12
+/* 0x0230	 322 ( 6  9) */		ld	[%sp+2351],%f7
+/* 0x0234	 323 ( 6  8) */		fxnor	%f0,%f10,%f10
+/* 0x0238	 324 ( 7 10) */		ldd	[%o2+16],%f16
+/* 0x023c	 325 ( 7 10) */		fitod	%f5,%f4
+/* 0x0240	 326 ( 8 11) */		ldd	[%o2+24],%f18
+/* 0x0244	 330 ( 9 12) */		ldd	[%o2+32],%f20
+/* 0x0248	 331 ( 9 11) */		fxnor	%f0,%f16,%f16
+/* 0x024c	 335 (10 13) */		ld	[%i1],%g2
+/* 0x0250	 336 (10 13) */		fsubd	%f14,%f4,%f4
+/* 0x0254	 337 (11 14) */		ldd	[%o2+40],%f22
+/* 0x0258	 338 (11 14) */		fitod	%f16,%f28
+/* 0x025c	 339 (12 15) */		ld	[%i1+4],%g3
+/* 0x0260	 340 (13 16) */		ld	[%i1+8],%g4
+/* 0x0264	 341 (13 15) */		fxnor	%f0,%f22,%f22
+/* 0x0268	 342 (14 17) */		ld	[%i1+12],%g5
+/* 0x026c	 343 (15 18) */		ld	[%i1+16],%o0
+/* 0x0270	 344 (16 19) */		ldd	[%o2+48],%f24
+/* 0x0274	 345 (17 20) */		ld	[%i1+20],%o1
+/* 0x0278	 346 (17 18) */		fmovs	%f8,%f6
+/* 0x027c	 347 (18 21) */		ldd	[%o2+56],%f26
+/* 0x0280	 348 (19 22) */		ld	[%i1+24],%o2
+/* 0x0284	 349 (19 22) */		fsubd	%f6,%f8,%f6
+/* 0x0288	 350 (20 23) */		ld	[%i1+28],%o3
+/* 0x028c	 351 (20 23) */		fsubd	%f14,%f12,%f8
+/* 0x0290	 355 (21 24) */		ld	[%i1+32],%o4
+/* 0x0294	 356 (21 24) */		fitod	%f10,%f12
+/* 0x0298	 357 (22 25) */		ld	[%i1+36],%o7
+/* 0x029c	 358 (22 25) */		fitod	%f11,%f10
+/* 0x02a0	 359 (22 25) */		fmuld	%f4,%f6,%f4
+/* 0x02a4	 360 (23 26) */		ld	[%i1+40],%l1
+/* 0x02a8	 361 (23 26) */		fmuld	%f8,%f6,%f8
+/* 0x02ac	 362 (24 27) */		ld	[%i1+56],%l5
+/* 0x02b0	 363 (24 27) */		fsubd	%f14,%f12,%f12
+/* 0x02b4	 364 (25 28) */		fsubd	%f14,%f10,%f10
+/* 0x02b8	 365 (26 29) */		fdtox	%f8,%f8
+/* 0x02bc	 366 (26 27) */		std	%f8,[%sp+2343]
+/* 0x02c0	 367 (27 30) */		fitod	%f17,%f8
+/* 0x02c4	 368 (27 30) */		fmuld	%f12,%f6,%f12
+/* 0x02c8	 369 (28 31) */		fdtox	%f4,%f4
+/* 0x02cc	 370 (28 29) */		std	%f4,[%sp+2335]
+/* 0x02d0	 371 (28 31) */		fmuld	%f10,%f6,%f10
+/* 0x02d4	 372 (29 31) */		fxnor	%f0,%f18,%f16
+/* 0x02d8	 373 (30 33) */		fdtox	%f12,%f12
+/* 0x02dc	 374 (30 31) */		std	%f12,[%sp+2327]
+/* 0x02e0	 375 (31 33) */		ldx	[%sp+2343],%o5
+/* 0x02e4	 376 (31 34) */		fsubd	%f14,%f8,%f8
+/* 0x02e8	 377 (32 35) */		fsubd	%f14,%f28,%f4
+/* 0x02ec	 378 (33 36) */		fitod	%f17,%f12
+/* 0x02f0	 379 (33 34) */		add	%o5,%g2,%g2
+/* 0x02f4	 380 (33 34) */		st	%g2,[%i0]
+/* 0x02f8	 381 (34 36) */		ldx	[%sp+2335],%o5
+/* 0x02fc	 382 (34 37) */		fitod	%f16,%f18
+/* 0x0300	 383 (34 35) */		srax	%g2,32,%l0
+/* 0x0304	 384 (35 37) */		fxnor	%f0,%f20,%f16
+/* 0x0308	 385 (35 38) */		fmuld	%f8,%f6,%f20
+/* 0x030c	 386 (36 39) */		fdtox	%f10,%f10
+/* 0x0310	 387 (36 37) */		std	%f10,[%sp+2319]
+/* 0x0314	 388 (36 37) */		add	%o5,%g3,%g3
+/* 0x0318	 389 (36 39) */		fmuld	%f4,%f6,%f4
+/* 0x031c	 390 (37 40) */		fitod	%f16,%f8
+/* 0x0320	 391 (37 38) */		add	%g3,%l0,%g3
+/* 0x0324	 392 (37 38) */		st	%g3,[%i0+4]
+/* 0x0328	 393 (38 40) */		ldx	[%sp+2327],%o5
+/* 0x032c	 394 (38 41) */		fsubd	%f14,%f18,%f18
+/* 0x0330	 395 (38 39) */		srax	%g3,32,%l3
+/* 0x0334	 396 (39 41) */		ldx	[%sp+2319],%l2
+/* 0x0338	 397 (39 42) */		fdtox	%f4,%f4
+/* 0x033c	 398 (40 41) */		std	%f4,[%sp+2311]
+/* 0x0340	 399 (40 43) */		fdtox	%f20,%f20
+/* 0x0344	 400 (40 41) */		add	%o5,%g4,%g4
+/* 0x0348	 401 (41 42) */		std	%f20,[%sp+2303]
+/* 0x034c	 402 (41 44) */		fsubd	%f14,%f12,%f4
+/* 0x0350	 403 (41 42) */		add	%g4,%l3,%g4
+/* 0x0354	 404 (41 44) */		fmuld	%f18,%f6,%f18
+/* 0x0358	 405 (42 43) */		st	%g4,[%i0+8]
+/* 0x035c	 406 (42 45) */		fitod	%f17,%f16
+/* 0x0360	 407 (42 43) */		srax	%g4,32,%l4
+/* 0x0364	 408 (43 46) */		ld	[%i1+44],%l0
+/* 0x0368	 409 (43 46) */		fsubd	%f14,%f8,%f20
+/* 0x036c	 410 (43 44) */		add	%l2,%g5,%l2
+/* 0x0370	 411 (44 46) */		ldx	[%sp+2311],%g5
+/* 0x0374	 412 (44 47) */		fitod	%f22,%f8
+/* 0x0378	 413 (44 45) */		add	%l2,%l4,%l2
+/* 0x037c	 414 (44 47) */		fmuld	%f4,%f6,%f4
+/* 0x0380	 415 (45 46) */		st	%l2,[%i0+12]
+/* 0x0384	 416 (45 48) */		fsubd	%f14,%f16,%f10
+/* 0x0388	 417 (46 49) */		ld	[%i1+52],%l3
+/* 0x038c	 418 (46 49) */		fdtox	%f18,%f18
+/* 0x0390	 419 (46 47) */		add	%g5,%o0,%l4
+/* 0x0394	 420 (46 49) */		fmuld	%f20,%f6,%f12
+/* 0x0398	 421 (47 48) */		std	%f18,[%sp+2295]
+/* 0x039c	 422 (47 48) */		srax	%l2,32,%o0
+/* 0x03a0	 423 (47 50) */		fitod	%f23,%f16
+/* 0x03a4	 424 (48 51) */		ld	[%i1+48],%o5
+/* 0x03a8	 425 (48 51) */		fsubd	%f14,%f8,%f8
+/* 0x03ac	 426 (48 49) */		add	%l4,%o0,%l4
+/* 0x03b0	 427 (49 50) */		st	%l4,[%i0+16]
+/* 0x03b4	 428 (49 50) */		srax	%l4,32,%o0
+/* 0x03b8	 429 (49 51) */		fxnor	%f0,%f24,%f18
+/* 0x03bc	 430 (50 52) */		ldx	[%sp+2303],%g5
+/* 0x03c0	 431 (50 53) */		fdtox	%f4,%f4
+/* 0x03c4	 432 (51 52) */		std	%f4,[%sp+2287]
+/* 0x03c8	 433 (51 54) */		fdtox	%f12,%f12
+/* 0x03cc	 434 (51 54) */		fmuld	%f10,%f6,%f4
+/* 0x03d0	 435 (52 53) */		std	%f12,[%sp+2279]
+/* 0x03d4	 436 (52 55) */		fsubd	%f14,%f16,%f12
+/* 0x03d8	 437 (52 53) */		add	%g5,%o1,%g2
+/* 0x03dc	 438 (52 55) */		fmuld	%f8,%f6,%f8
+/* 0x03e0	 439 (53 55) */		ldx	[%sp+2295],%g5
+/* 0x03e4	 440 (53 56) */		fitod	%f18,%f10
+/* 0x03e8	 441 (53 54) */		add	%g2,%o0,%g2
+/* 0x03ec	 442 (54 55) */		st	%g2,[%i0+20]
+/* 0x03f0	 443 (54 57) */		fitod	%f19,%f16
+/* 0x03f4	 444 (54 55) */		srax	%g2,32,%o0
+/* 0x03f8	 445 (55 58) */		fdtox	%f8,%f8
+/* 0x03fc	 446 (55 56) */		std	%f8,[%sp+2263]
+/* 0x0400	 447 (55 56) */		add	%g5,%o2,%g3
+/* 0x0404	 448 (56 58) */		ldx	[%sp+2287],%g5
+/* 0x0408	 449 (56 59) */		fsubd	%f14,%f10,%f10
+/* 0x040c	 450 (56 57) */		add	%g3,%o0,%g3
+/* 0x0410	 451 (57 58) */		st	%g3,[%i0+24]
+/* 0x0414	 452 (57 60) */		fsubd	%f14,%f16,%f8
+/* 0x0418	 453 (57 58) */		srax	%g3,32,%o0
+/* 0x041c	 454 (58 61) */		fdtox	%f4,%f4
+/* 0x0420	 455 (58 59) */		std	%f4,[%sp+2271]
+/* 0x0424	 456 (58 59) */		add	%g5,%o3,%g4
+/* 0x0428	 457 (59 61) */		fxnor	%f0,%f26,%f18
+/* 0x042c	 458 (59 62) */		fmuld	%f12,%f6,%f4
+/* 0x0430	 459 (59 60) */		add	%g4,%o0,%g4
+/* 0x0434	 460 (60 61) */		st	%g4,[%i0+28]
+/* 0x0438	 461 (60 63) */		fmuld	%f10,%f6,%f10
+/* 0x043c	 462 (60 61) */		srax	%g4,32,%o0
+/* 0x0440	 463 (61 63) */		ldx	[%sp+2279],%g5
+/* 0x0444	 464 (61 64) */		fitod	%f18,%f12
+/* 0x0448	 465 (61 64) */		fmuld	%f8,%f6,%f8
+/* 0x044c	 466 (62 65) */		fdtox	%f4,%f4
+/* 0x0450	 467 (62 63) */		std	%f4,[%sp+2255]
+/* 0x0454	 468 (63 64) */		add	%g5,%o4,%l2
+/* 0x0458	 469 (63 65) */		ldx	[%sp+2271],%g5
+/* 0x045c	 470 (63 66) */		fdtox	%f10,%f16
+/* 0x0460	 471 (64 67) */		fsubd	%f14,%f12,%f4
+/* 0x0464	 472 (64 65) */		std	%f16,[%sp+2247]
+/* 0x0468	 473 (64 65) */		add	%l2,%o0,%l2
+/* 0x046c	 474 (65 68) */		fdtox	%f8,%f8
+/* 0x0470	 475 (65 66) */		std	%f8,[%sp+2239]
+/* 0x0474	 476 (65 66) */		add	%g5,%o7,%l4
+/* 0x0478	 477 (66 69) */		fitod	%f19,%f10
+/* 0x047c	 478 (66 68) */		ldx	[%sp+2263],%g5
+/* 0x0480	 479 (66 67) */		srax	%l2,32,%o0
+/* 0x0484	 480 (67 68) */		add	%l4,%o0,%l4
+/* 0x0488	 481 (67 70) */		fmuld	%f4,%f6,%f4
+/* 0x048c	 482 (67 69) */		ldx	[%sp+2255],%o0
+/* 0x0490	 483 (68 69) */		srax	%l4,32,%o1
+/* 0x0494	 484 (68 69) */		add	%g5,%l1,%l1
+/* 0x0498	 485 (68 69) */		st	%l2,[%i0+32]
+/* 0x049c	 486 (69 72) */		fsubd	%f14,%f10,%f8
+/* 0x04a0	 487 (69 71) */		ldx	[%sp+2239],%o3
+/* 0x04a4	 488 (69 70) */		add	%l1,%o1,%o1
+/* 0x04a8	 489 (70 72) */		ldx	[%sp+2247],%g5
+/* 0x04ac	 490 (70 71) */		srax	%o1,32,%o2
+/* 0x04b0	 491 (70 71) */		add	%o0,%l0,%o0
+/* 0x04b4	 492 (71 74) */		fdtox	%f4,%f4
+/* 0x04b8	 493 (71 72) */		std	%f4,[%sp+2231]
+/* 0x04bc	 494 (71 72) */		add	%o0,%o2,%o2
+/* 0x04c0	 495 (72 73) */		add	%o3,%l3,%l3
+/* 0x04c4	 496 (72 75) */		fmuld	%f8,%f6,%f4
+/* 0x04c8	 497 (72 73) */		add	%g5,%o5,%g5
+/* 0x04cc	 498 (73 74) */		srax	%o2,32,%o3
+/* 0x04d0	 499 (73 74) */		st	%l4,[%i0+36]
+/* 0x04d4	 500 (74 75) */		add	%g5,%o3,%g2
+/* 0x04d8	 501 (74 76) */		ldx	[%sp+2231],%o0
+/* 0x04dc	 502 (75 76) */		srax	%g2,32,%g3
+/* 0x04e0	 503 (75 78) */		fdtox	%f4,%f4
+/* 0x04e4	 504 (75 76) */		std	%f4,[%sp+2223]
+/* 0x04e8	 505 (76 77) */		st	%o1,[%i0+40]
+/* 0x04ec	 506 (76 77) */		add	%l3,%g3,%g3
+/* 0x04f0	 507 (76 77) */		add	%o0,%l5,%g5
+/* 0x04f4	 508 (77 78) */		st	%o2,[%i0+44]
+/* 0x04f8	 509 (77 78) */		srax	%g3,32,%g4
+/* 0x04fc	 510 (78 79) */		st	%g2,[%i0+48]
+/* 0x0500	 511 (78 79) */		add	%g5,%g4,%g4
+/* 0x0504	 512 (79 80) */		st	%g3,[%i0+52]
+/* 0x0508	 513 (79 80) */		srax	%g4,32,%g5
+/* 0x050c	 514 (80 83) */		ld	[%i1+60],%g3
+/* 0x0510	 515 (81 83) */		ldx	[%sp+2223],%g2
+/* 0x0514	 516 (82 83) */		st	%g4,[%i0+56]
+/* 0x0518	 517 (83 84) */		add	%g2,%g3,%g2
+/* 0x051c	 518 (84 85) */		add	%g2,%g5,%g2
+/* 0x0520	 519 (84 85) */		st	%g2,[%i0+60]
+/* 0x0524	 523 (85 86) */		srax	%g2,32,%o3
+/* 0x0528	 524 (86 87) */		srl	%o3,0,%i0
+/* 0x052c	     (87 89) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x0530	     (89 90) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L900000157
+!
+
+                                   .L900000157:		/* frequency 1.0 confidence 0.0 */
+/* 0x0534	 532 ( 0  1) */		fmovd	%f0,%f14
+/* 0x0538	 533 ( 0  3) */		ldd	[%o0],%f8
+/* 0x053c	 539 ( 0  1) */		add	%i3,1,%g2
+/* 0x0540	 540 ( 1  4) */		ld	[%sp+2223],%f7
+/* 0x0544	 541 ( 1  2) */		srl	%g2,31,%g3
+/* 0x0548	 545 ( 1  2) */		add	%fp,-217,%g4
+/* 0x054c	 546 ( 2  3) */		add	%g2,%g3,%g2
+/* 0x0550	 547 ( 2  3) */		or	%g0,0,%g5
+/* 0x0554	 548 ( 2  5) */		ldd	[%o0+8],%f18
+/* 0x0558	 549 ( 3  4) */		fmovs	%f8,%f6
+/* 0x055c	 550 ( 3  4) */		sra	%g2,1,%o1
+/* 0x0560	 551 ( 3  4) */		or	%g0,0,%o0
+/* 0x0564	 552 ( 4  5) */		subcc	%o1,0,%g0
+/* 0x0568	 553 ( 5  6) */		or	%g0,%o1,%o3
+/* 0x056c	 554 ( 5  8) */		fsubd	%f6,%f8,%f16
+/* 0x0570	 555 ( 5  6) */		ble,pt	%icc,.L900000156	! tprob=0.50
+/* 0x0574	     ( 6  7) */		subcc	%i3,0,%g0
+/* 0x0578	 557 ( 6  7) */		sub	%o1,1,%g2
+/* 0x057c	 558 ( 7  8) */		or	%g0,0,%i0
+/* 0x0580	 559 ( 7  8) */		or	%g0,1,%g3
+/* 0x0584	 560 ( 8  9) */		subcc	%o3,10,%g0
+/* 0x0588	 561 ( 8  9) */		bl,pn	%icc,.L77000077	! tprob=0.50
+/* 0x058c	     ( 9 10) */		or	%g0,0,%o1
+/* 0x0590	 563 ( 9 12) */		ldd	[%i2+8],%f0
+/* 0x0594	 564 ( 9 10) */		sub	%o3,3,%o3
+/* 0x0598	 565 (10 13) */		ldd	[%i2],%f2
+/* 0x059c	 566 (10 11) */		or	%g0,7,%o0
+/* 0x05a0	 567 (10 11) */		or	%g0,2,%i0
+/* 0x05a4	 568 (11 13) */		fxnor	%f14,%f0,%f8
+/* 0x05a8	 569 (11 14) */		ldd	[%i2+16],%f4
+/* 0x05ac	 570 (11 12) */		or	%g0,16,%o2
+/* 0x05b0	 571 (12 14) */		fxnor	%f14,%f2,%f2
+/* 0x05b4	 572 (12 15) */		ldd	[%i2+24],%f6
+/* 0x05b8	 573 (12 13) */		or	%g0,48,%o4
+/* 0x05bc	 574 (13 16) */		fitod	%f8,%f12
+/* 0x05c0	 575 (13 14) */		or	%g0,24,%o1
+/* 0x05c4	 576 (13 14) */		or	%g0,3,%g3
+/* 0x05c8	 577 (14 17) */		fitod	%f2,%f0
+/* 0x05cc	 578 (15 18) */		fitod	%f3,%f20
+/* 0x05d0	 579 (15 18) */		ldd	[%i2+32],%f2
+/* 0x05d4	 580 (16 19) */		fitod	%f9,%f10
+/* 0x05d8	 581 (16 19) */		ldd	[%i2+40],%f8
+/* 0x05dc	 582 (17 20) */		fsubd	%f18,%f0,%f0
+/* 0x05e0	 583 (18 21) */		fsubd	%f18,%f20,%f22
+/* 0x05e4	 584 (19 22) */		fsubd	%f18,%f12,%f20
+/* 0x05e8	 585 (19 22) */		ldd	[%i2+48],%f12
+/* 0x05ec	 586 (20 23) */		fsubd	%f18,%f10,%f10
+/* 0x05f0	 587 (20 23) */		fmuld	%f0,%f16,%f0
+/* 0x05f4	 588 (21 23) */		fxnor	%f14,%f4,%f4
+/* 0x05f8	 589 (21 24) */		fmuld	%f22,%f16,%f22
+/* 0x05fc	 590 (22 24) */		fxnor	%f14,%f6,%f6
+/* 0x0600	 591 (22 25) */		fmuld	%f20,%f16,%f20
+/* 0x0604	 592 (23 26) */		fdtox	%f0,%f0
+/* 0x0608	 593 (23 24) */		std	%f0,[%fp-217]
+/* 0x060c	 594 (23 26) */		fmuld	%f10,%f16,%f10
+/* 0x0610	 595 (24 27) */		fdtox	%f22,%f22
+/* 0x0614	 596 (24 25) */		std	%f22,[%fp-209]
+/* 0x0618	 597 (25 28) */		fitod	%f5,%f0
+/* 0x061c	 598 (26 29) */		fdtox	%f10,%f10
+/* 0x0620	 599 (27 30) */		fdtox	%f20,%f20
+/* 0x0624	 600 (27 28) */		std	%f20,[%fp-201]
+/* 0x0628	 601 (28 31) */		fitod	%f4,%f4
+/* 0x062c	 602 (28 29) */		std	%f10,[%fp-193]
+/* 0x0630	 603 (29 31) */		fxnor	%f14,%f2,%f10
+/* 0x0634	 604 (30 33) */		fitod	%f7,%f2
+/* 0x0638	 605 (31 34) */		fsubd	%f18,%f0,%f0
+/* 0x063c	 606 (32 35) */		fsubd	%f18,%f4,%f4
+/* 0x0640	 607 (33 35) */		fxnor	%f14,%f8,%f8
+
+!
+! ENTRY .L900000144
+!
+
+                                   .L900000144:		/* frequency 1.0 confidence 0.0 */
+/* 0x0644	 609 ( 0  3) */		fitod	%f11,%f22
+/* 0x0648	 610 ( 0  1) */		add	%o0,3,%o0
+/* 0x064c	 611 ( 0  1) */		add	%g3,6,%g3
+/* 0x0650	 612 ( 0  3) */		fmuld	%f0,%f16,%f0
+/* 0x0654	 613 ( 1  4) */		fmuld	%f4,%f16,%f24
+/* 0x0658	 614 ( 1  2) */		subcc	%o0,%o3,%g0
+/* 0x065c	 615 ( 1  2) */		add	%i0,6,%i0
+/* 0x0660	 616 ( 1  4) */		fsubd	%f18,%f2,%f2
+/* 0x0664	 617 ( 2  5) */		fitod	%f6,%f4
+/* 0x0668	 618 ( 3  6) */		fdtox	%f0,%f0
+/* 0x066c	 619 ( 3  4) */		add	%o4,8,%i1
+/* 0x0670	 620 ( 4  7) */		ldd	[%i2+%i1],%f20
+/* 0x0674	 621 ( 4  7) */		fdtox	%f24,%f6
+/* 0x0678	 622 ( 4  5) */		add	%o2,16,%o4
+/* 0x067c	 623 ( 5  8) */		fsubd	%f18,%f4,%f4
+/* 0x0680	 624 ( 5  6) */		std	%f6,[%o4+%g4]
+/* 0x0684	 625 ( 5  6) */		add	%o1,16,%o2
+/* 0x0688	 626 ( 6  8) */		fxnor	%f14,%f12,%f6
+/* 0x068c	 627 ( 6  7) */		std	%f0,[%o2+%g4]
+/* 0x0690	 628 ( 7 10) */		fitod	%f9,%f0
+/* 0x0694	 629 ( 7 10) */		fmuld	%f2,%f16,%f2
+/* 0x0698	 630 ( 8 11) */		fmuld	%f4,%f16,%f24
+/* 0x069c	 631 ( 8 11) */		fsubd	%f18,%f22,%f12
+/* 0x06a0	 632 ( 9 12) */		fitod	%f10,%f4
+/* 0x06a4	 633 (10 13) */		fdtox	%f2,%f2
+/* 0x06a8	 634 (10 11) */		add	%i1,8,%o1
+/* 0x06ac	 635 (11 14) */		ldd	[%i2+%o1],%f22
+/* 0x06b0	 636 (11 14) */		fdtox	%f24,%f10
+/* 0x06b4	 637 (11 12) */		add	%o4,16,%i4
+/* 0x06b8	 638 (12 15) */		fsubd	%f18,%f4,%f4
+/* 0x06bc	 639 (12 13) */		std	%f10,[%i4+%g4]
+/* 0x06c0	 640 (12 13) */		add	%o2,16,%i1
+/* 0x06c4	 641 (13 15) */		fxnor	%f14,%f20,%f10
+/* 0x06c8	 642 (13 14) */		std	%f2,[%i1+%g4]
+/* 0x06cc	 643 (14 17) */		fitod	%f7,%f2
+/* 0x06d0	 644 (14 17) */		fmuld	%f12,%f16,%f12
+/* 0x06d4	 645 (15 18) */		fmuld	%f4,%f16,%f24
+/* 0x06d8	 646 (15 18) */		fsubd	%f18,%f0,%f0
+/* 0x06dc	 647 (16 19) */		fitod	%f8,%f4
+/* 0x06e0	 648 (17 20) */		fdtox	%f12,%f20
+/* 0x06e4	 649 (17 18) */		add	%o1,8,%o4
+/* 0x06e8	 650 (18 21) */		ldd	[%i2+%o4],%f12
+/* 0x06ec	 651 (18 21) */		fdtox	%f24,%f8
+/* 0x06f0	 652 (18 19) */		add	%i4,16,%o2
+/* 0x06f4	 653 (19 22) */		fsubd	%f18,%f4,%f4
+/* 0x06f8	 654 (19 20) */		std	%f8,[%o2+%g4]
+/* 0x06fc	 655 (19 20) */		add	%i1,16,%o1
+/* 0x0700	 656 (20 22) */		fxnor	%f14,%f22,%f8
+/* 0x0704	 657 (20 21) */		ble,pt	%icc,.L900000144	! tprob=0.50
+/* 0x0708	     (20 21) */		std	%f20,[%o1+%g4]
+
+!
+! ENTRY .L900000147
+!
+
+                                   .L900000147:		/* frequency 1.0 confidence 0.0 */
+/* 0x070c	 660 ( 0  3) */		fitod	%f6,%f6
+/* 0x0710	 661 ( 0  3) */		fmuld	%f4,%f16,%f24
+/* 0x0714	 662 ( 0  1) */		add	%i4,32,%l4
+/* 0x0718	 663 ( 1  4) */		fsubd	%f18,%f2,%f2
+/* 0x071c	 664 ( 1  4) */		fmuld	%f0,%f16,%f22
+/* 0x0720	 665 ( 1  2) */		add	%i1,32,%l3
+/* 0x0724	 666 ( 2  5) */		fitod	%f10,%f28
+/* 0x0728	 667 ( 2  3) */		sra	%o0,0,%o2
+/* 0x072c	 668 ( 2  3) */		add	%i4,48,%l2
+/* 0x0730	 669 ( 3  6) */		fsubd	%f18,%f6,%f4
+/* 0x0734	 670 ( 3  4) */		add	%i1,48,%l1
+/* 0x0738	 671 ( 3  4) */		add	%i4,64,%l0
+/* 0x073c	 672 ( 4  7) */		fitod	%f11,%f26
+/* 0x0740	 673 ( 4  5) */		sllx	%o2,3,%o1
+/* 0x0744	 674 ( 4  5) */		add	%i1,64,%i5
+/* 0x0748	 675 ( 5  8) */		fitod	%f8,%f6
+/* 0x074c	 676 ( 5  6) */		add	%i4,80,%i4
+/* 0x0750	 677 ( 5  6) */		add	%i1,80,%i1
+/* 0x0754	 678 ( 6  8) */		fxnor	%f14,%f12,%f0
+/* 0x0758	 679 ( 6  9) */		fmuld	%f4,%f16,%f20
+/* 0x075c	 680 ( 6  7) */		add	%i4,16,%o4
+/* 0x0760	 681 ( 7 10) */		fitod	%f9,%f4
+/* 0x0764	 682 ( 7 10) */		fmuld	%f2,%f16,%f12
+/* 0x0768	 683 ( 7  8) */		add	%i1,16,%o3
+/* 0x076c	 684 ( 8 11) */		fsubd	%f18,%f28,%f10
+/* 0x0770	 685 ( 8  9) */		subcc	%o0,%g2,%g0
+/* 0x0774	 686 ( 8  9) */		add	%g3,12,%g3
+/* 0x0778	 687 ( 9 12) */		fitod	%f0,%f2
+/* 0x077c	 688 (10 13) */		fsubd	%f18,%f26,%f8
+/* 0x0780	 689 (11 14) */		fitod	%f1,%f0
+/* 0x0784	 690 (11 14) */		fmuld	%f10,%f16,%f10
+/* 0x0788	 691 (12 15) */		fdtox	%f24,%f24
+/* 0x078c	 692 (12 13) */		std	%f24,[%l4+%g4]
+/* 0x0790	 693 (12 13) */		add	%i0,12,%i0
+/* 0x0794	 694 (13 16) */		fsubd	%f18,%f6,%f6
+/* 0x0798	 695 (13 16) */		fmuld	%f8,%f16,%f8
+/* 0x079c	 696 (14 17) */		fdtox	%f22,%f22
+/* 0x07a0	 697 (14 15) */		std	%f22,[%l3+%g4]
+/* 0x07a4	 698 (15 18) */		fsubd	%f18,%f4,%f4
+/* 0x07a8	 699 (16 19) */		fdtox	%f20,%f20
+/* 0x07ac	 700 (16 17) */		std	%f20,[%l2+%g4]
+/* 0x07b0	 701 (16 19) */		fmuld	%f6,%f16,%f6
+/* 0x07b4	 702 (17 20) */		fsubd	%f18,%f2,%f2
+/* 0x07b8	 703 (18 21) */		fsubd	%f18,%f0,%f0
+/* 0x07bc	 704 (18 21) */		fmuld	%f4,%f16,%f4
+/* 0x07c0	 705 (19 22) */		fdtox	%f12,%f12
+/* 0x07c4	 706 (19 20) */		std	%f12,[%l1+%g4]
+/* 0x07c8	 707 (20 23) */		fdtox	%f10,%f10
+/* 0x07cc	 708 (20 21) */		std	%f10,[%l0+%g4]
+/* 0x07d0	 709 (20 23) */		fmuld	%f2,%f16,%f2
+/* 0x07d4	 710 (21 24) */		fdtox	%f8,%f8
+/* 0x07d8	 711 (21 22) */		std	%f8,[%i5+%g4]
+/* 0x07dc	 712 (21 24) */		fmuld	%f0,%f16,%f0
+/* 0x07e0	 713 (22 25) */		fdtox	%f6,%f6
+/* 0x07e4	 714 (22 23) */		std	%f6,[%i4+%g4]
+/* 0x07e8	 715 (23 26) */		fdtox	%f4,%f4
+/* 0x07ec	 716 (23 24) */		std	%f4,[%i1+%g4]
+/* 0x07f0	 717 (24 27) */		fdtox	%f2,%f2
+/* 0x07f4	 718 (24 25) */		std	%f2,[%o4+%g4]
+/* 0x07f8	 719 (25 28) */		fdtox	%f0,%f0
+/* 0x07fc	 720 (25 26) */		bg,pn	%icc,.L77000043	! tprob=0.50
+/* 0x0800	     (25 26) */		std	%f0,[%o3+%g4]
+
+!
+! ENTRY .L77000077
+!
+
+                                   .L77000077:		/* frequency 1.0 confidence 0.0 */
+/* 0x0804	 723 ( 0  3) */		ldd	[%i2+%o1],%f0
+
+!
+! ENTRY .L900000155
+!
+
+                                   .L900000155:		/* frequency 1.0 confidence 0.0 */
+/* 0x0808	 725 ( 0  2) */		fxnor	%f14,%f0,%f0
+/* 0x080c	 726 ( 0  1) */		sra	%i0,0,%o1
+/* 0x0810	 727 ( 0  1) */		add	%o0,1,%o0
+/* 0x0814	 728 ( 1  2) */		sllx	%o1,3,%i4
+/* 0x0818	 729 ( 1  2) */		add	%i0,2,%i0
+/* 0x081c	 730 ( 2  5) */		fitod	%f0,%f2
+/* 0x0820	 731 ( 2  3) */		sra	%g3,0,%o1
+/* 0x0824	 732 ( 2  3) */		add	%g3,2,%g3
+/* 0x0828	 733 ( 3  6) */		fitod	%f1,%f0
+/* 0x082c	 734 ( 3  4) */		sllx	%o1,3,%i1
+/* 0x0830	 735 ( 3  4) */		subcc	%o0,%g2,%g0
+/* 0x0834	 736 ( 4  5) */		sra	%o0,0,%o2
+/* 0x0838	 737 ( 5  8) */		fsubd	%f18,%f2,%f2
+/* 0x083c	 738 ( 5  6) */		sllx	%o2,3,%o1
+/* 0x0840	 739 ( 6  9) */		fsubd	%f18,%f0,%f0
+/* 0x0844	 740 ( 8 11) */		fmuld	%f2,%f16,%f2
+/* 0x0848	 741 ( 9 12) */		fmuld	%f0,%f16,%f0
+/* 0x084c	 742 (11 14) */		fdtox	%f2,%f2
+/* 0x0850	 743 (11 12) */		std	%f2,[%i4+%g4]
+/* 0x0854	 744 (12 15) */		fdtox	%f0,%f0
+/* 0x0858	 745 (12 13) */		std	%f0,[%i1+%g4]
+/* 0x085c	 746 (12 13) */		ble,a,pt	%icc,.L900000155	! tprob=0.50
+/* 0x0860	     (14 17) */		ldd	[%i2+%o1],%f0
+
+!
+! ENTRY .L77000043
+!
+
+                                   .L77000043:		/* frequency 1.0 confidence 0.0 */
+/* 0x0864	 754 ( 0  1) */		subcc	%i3,0,%g0
+
+!
+! ENTRY .L900000156
+!
+
+                                   .L900000156:		/* frequency 1.0 confidence 0.0 */
+/* 0x0868	 756 ( 0  1) */		ble,a,pt	%icc,.L77000061	! tprob=0.50
+/* 0x086c	     ( 0  1) */		or	%g0,%g5,%o3
+/* 0x0870	 761 ( 0  2) */		ldx	[%fp-209],%i1
+/* 0x0874	 762 ( 1  2) */		sub	%i3,1,%g3
+/* 0x0878	 763 ( 1  2) */		or	%g0,0,%i0
+/* 0x087c	 764 ( 2  3) */		subcc	%i3,5,%g0
+/* 0x0880	 765 ( 2  3) */		bl,pn	%icc,.L77000078	! tprob=0.50
+/* 0x0884	     ( 2  4) */		ldx	[%fp-217],%i2
+/* 0x0888	 767 ( 3  6) */		ld	[%o5],%i3
+/* 0x088c	 768 ( 3  4) */		or	%g0,8,%g2
+/* 0x0890	 769 ( 3  4) */		or	%g0,16,%o4
+/* 0x0894	 770 ( 4  5) */		sub	%g3,1,%o3
+/* 0x0898	 771 ( 4  5) */		or	%g0,3,%i0
+/* 0x089c	 772 ( 5  6) */		add	%i2,%i3,%o1
+/* 0x08a0	 773 ( 5  8) */		ld	[%o5+4],%i2
+/* 0x08a4	 774 ( 6  7) */		st	%o1,[%o7]
+/* 0x08a8	 775 ( 6  7) */		srax	%o1,32,%o1
+/* 0x08ac	 776 ( 7  9) */		ldx	[%fp-201],%o2
+/* 0x08b0	 777 ( 7  8) */		add	%i1,%i2,%o0
+/* 0x08b4	 778 ( 7  8) */		or	%g0,%o1,%i1
+/* 0x08b8	 779 ( 8 11) */		ld	[%o5+8],%o1
+/* 0x08bc	 780 ( 8  9) */		add	%o0,%i1,%o0
+/* 0x08c0	 781 ( 9 10) */		st	%o0,[%o7+4]
+/* 0x08c4	 782 ( 9 10) */		srax	%o0,32,%o0
+
+!
+! ENTRY .L900000140
+!
+
+                                   .L900000140:		/* frequency 1.0 confidence 0.0 */
+/* 0x08c8	 784 ( 0  1) */		add	%g2,4,%i1
+/* 0x08cc	 785 ( 0  1) */		add	%o4,8,%o4
+/* 0x08d0	 786 ( 1  3) */		ldx	[%o4+%g4],%i2
+/* 0x08d4	 787 ( 1  2) */		sra	%o0,0,%g5
+/* 0x08d8	 788 ( 1  2) */		add	%o2,%o1,%o1
+/* 0x08dc	 789 ( 2  5) */		ld	[%o5+%i1],%o0
+/* 0x08e0	 790 ( 2  3) */		add	%o1,%g5,%o1
+/* 0x08e4	 791 ( 2  3) */		add	%i0,2,%i0
+/* 0x08e8	 792 ( 3  4) */		st	%o1,[%o7+%g2]
+/* 0x08ec	 793 ( 3  4) */		srax	%o1,32,%g5
+/* 0x08f0	 794 ( 3  4) */		subcc	%i0,%o3,%g0
+/* 0x08f4	 795 ( 4  5) */		add	%g2,8,%g2
+/* 0x08f8	 796 ( 4  5) */		add	%o4,8,%o4
+/* 0x08fc	 797 ( 5  7) */		ldx	[%o4+%g4],%o2
+/* 0x0900	 798 ( 5  6) */		add	%i2,%o0,%o0
+/* 0x0904	 799 ( 6  9) */		ld	[%o5+%g2],%o1
+/* 0x0908	 800 ( 6  7) */		add	%o0,%g5,%o0
+/* 0x090c	 801 ( 7  8) */		st	%o0,[%o7+%i1]
+/* 0x0910	 802 ( 7  8) */		ble,pt	%icc,.L900000140	! tprob=0.50
+/* 0x0914	     ( 7  8) */		srax	%o0,32,%o0
+
+!
+! ENTRY .L900000143
+!
+
+                                   .L900000143:		/* frequency 1.0 confidence 0.0 */
+/* 0x0918	 805 ( 0  1) */		sra	%o0,0,%o3
+/* 0x091c	 806 ( 0  1) */		add	%o2,%o1,%o0
+/* 0x0920	 807 ( 1  2) */		add	%o0,%o3,%o0
+/* 0x0924	 808 ( 1  2) */		st	%o0,[%o7+%g2]
+/* 0x0928	 809 ( 1  2) */		subcc	%i0,%g3,%g0
+/* 0x092c	 810 ( 2  3) */		srax	%o0,32,%g5
+/* 0x0930	 811 ( 2  3) */		bg,a,pn	%icc,.L77000061	! tprob=0.50
+/* 0x0934	     ( 3  4) */		or	%g0,%g5,%o3
+
+!
+! ENTRY .L77000078
+!
+
+                                   .L77000078:		/* frequency 1.0 confidence 0.0 */
+/* 0x0938	 814 ( 0  1) */		sra	%i0,0,%o0
+
+!
+! ENTRY .L900000154
+!
+
+                                   .L900000154:		/* frequency 1.0 confidence 0.0 */
+/* 0x093c	 816 ( 0  1) */		sllx	%o0,2,%g2
+/* 0x0940	 817 ( 0  1) */		add	%i0,1,%i0
+/* 0x0944	 818 ( 1  2) */		sllx	%o0,3,%o4
+/* 0x0948	 819 ( 1  4) */		ld	[%o5+%g2],%o2
+/* 0x094c	 820 ( 1  2) */		subcc	%i0,%g3,%g0
+/* 0x0950	 821 ( 2  4) */		ldx	[%o4+%g4],%o0
+/* 0x0954	 822 ( 2  3) */		sra	%g5,0,%o1
+/* 0x0958	 823 ( 4  5) */		add	%o0,%o2,%o0
+/* 0x095c	 824 ( 5  6) */		add	%o0,%o1,%o0
+/* 0x0960	 825 ( 5  6) */		st	%o0,[%o7+%g2]
+/* 0x0964	 826 ( 6  7) */		srax	%o0,32,%g5
+/* 0x0968	 827 ( 6  7) */		ble,pt	%icc,.L900000154	! tprob=0.50
+/* 0x096c	     ( 7  8) */		sra	%i0,0,%o0
+
+!
+! ENTRY .L77000047
+!
+
+                                   .L77000047:		/* frequency 1.0 confidence 0.0 */
+/* 0x0970	 834 ( 0  1) */		or	%g0,%g5,%o3
+
+!
+! ENTRY .L77000061
+!
+
+                                  .L77000061:		/* frequency 1.0 confidence 0.0 */
+
+/* 0x0974	 835 ( 1  2) */		srl	%o3,0,%i0
+/* 0x0978	     ( 2  4) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x097c	     ( 4  5) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L77000048
+!
+
+                                   .L77000048:		/* frequency 1.0 confidence 0.0 */
+/* 0x0980	 844 ( 0  1) */		bne,pn	%icc,.L77000050	! tprob=0.50
+/* 0x0984	     ( 0  1) */		sethi	%hi(0xfff80000),%g2
+/* 0x0988	 854 ( 0  3) */		ldd	[%o2],%f4
+/* 0x098c	 855 ( 1  4) */		ldd	[%o0],%f6
+/* 0x0990	 856 ( 1  2) */		srl	%i4,19,%g3
+/* 0x0994	 857 ( 1  2) */		andn	%i4,%g2,%g2
+/* 0x0998	 858 ( 2  3) */		st	%g3,[%sp+2351]
+/* 0x099c	 859 ( 2  4) */		fxnor	%f0,%f4,%f4
+/* 0x09a0	 860 ( 3  4) */		st	%g2,[%sp+2355]
+/* 0x09a4	 861 ( 4  7) */		ldd	[%o2+8],%f12
+/* 0x09a8	 862 ( 4  7) */		fitod	%f4,%f10
+/* 0x09ac	 863 ( 5  8) */		ldd	[%o0+8],%f16
+/* 0x09b0	 864 ( 5  8) */		fitod	%f5,%f4
+/* 0x09b4	 865 ( 6  9) */		ldd	[%o2+16],%f18
+/* 0x09b8	 866 ( 6  8) */		fxnor	%f0,%f12,%f12
+/* 0x09bc	 867 ( 7 10) */		ld	[%sp+2351],%f9
+/* 0x09c0	 868 ( 7 10) */		fsubd	%f16,%f10,%f10
+/* 0x09c4	 869 ( 8 11) */		ld	[%sp+2355],%f15
+/* 0x09c8	 870 ( 8 11) */		fitod	%f12,%f22
+/* 0x09cc	 871 ( 9 12) */		ldd	[%o2+24],%f20
+/* 0x09d0	 872 ( 9 12) */		fitod	%f13,%f12
+/* 0x09d4	 876 (10 13) */		ld	[%i1],%g2
+/* 0x09d8	 877 (10 13) */		fsubd	%f16,%f4,%f4
+/* 0x09dc	 878 (11 14) */		ld	[%i1+4],%g3
+/* 0x09e0	 879 (11 14) */		fsubd	%f16,%f22,%f22
+/* 0x09e4	 880 (12 15) */		ld	[%i1+8],%g4
+/* 0x09e8	 881 (12 14) */		fxnor	%f0,%f18,%f18
+/* 0x09ec	 882 (13 16) */		ld	[%i1+12],%g5
+/* 0x09f0	 883 (13 16) */		fsubd	%f16,%f12,%f12
+/* 0x09f4	 884 (14 17) */		ld	[%i1+16],%o0
+/* 0x09f8	 885 (14 17) */		fitod	%f18,%f26
+/* 0x09fc	 886 (15 18) */		ld	[%i1+20],%o1
+/* 0x0a00	 887 (15 17) */		fxnor	%f0,%f20,%f20
+/* 0x0a04	 888 (16 19) */		ld	[%i1+24],%o2
+/* 0x0a08	 889 (17 20) */		ld	[%i1+28],%o3
+/* 0x0a0c	 890 (19 20) */		fmovs	%f6,%f8
+/* 0x0a10	 891 (20 21) */		fmovs	%f6,%f14
+/* 0x0a14	 892 (22 25) */		fsubd	%f8,%f6,%f8
+/* 0x0a18	 893 (23 26) */		fsubd	%f14,%f6,%f6
+/* 0x0a1c	 894 (25 28) */		fmuld	%f10,%f8,%f14
+/* 0x0a20	 895 (26 29) */		fmuld	%f10,%f6,%f10
+/* 0x0a24	 896 (27 30) */		fmuld	%f4,%f8,%f24
+/* 0x0a28	 897 (28 31) */		fdtox	%f14,%f14
+/* 0x0a2c	 898 (28 29) */		std	%f14,[%sp+2335]
+/* 0x0a30	 899 (28 31) */		fmuld	%f22,%f8,%f28
+/* 0x0a34	 900 (29 32) */		fitod	%f19,%f14
+/* 0x0a38	 901 (29 32) */		fmuld	%f22,%f6,%f18
+/* 0x0a3c	 902 (30 33) */		fdtox	%f10,%f10
+/* 0x0a40	 903 (30 31) */		std	%f10,[%sp+2343]
+/* 0x0a44	 904 (30 33) */		fmuld	%f4,%f6,%f4
+/* 0x0a48	 905 (31 34) */		fmuld	%f12,%f8,%f22
+/* 0x0a4c	 906 (32 35) */		fdtox	%f18,%f18
+/* 0x0a50	 907 (32 33) */		std	%f18,[%sp+2311]
+/* 0x0a54	 908 (32 35) */		fmuld	%f12,%f6,%f10
+/* 0x0a58	 909 (33 35) */		ldx	[%sp+2335],%o4
+/* 0x0a5c	 910 (33 36) */		fdtox	%f24,%f12
+/* 0x0a60	 911 (34 35) */		std	%f12,[%sp+2319]
+/* 0x0a64	 912 (34 37) */		fsubd	%f16,%f26,%f12
+/* 0x0a68	 913 (35 37) */		ldx	[%sp+2343],%o5
+/* 0x0a6c	 914 (35 36) */		sllx	%o4,19,%o4
+/* 0x0a70	 915 (35 38) */		fdtox	%f4,%f4
+/* 0x0a74	 916 (36 37) */		std	%f4,[%sp+2327]
+/* 0x0a78	 917 (36 39) */		fdtox	%f28,%f24
+/* 0x0a7c	 918 (37 38) */		std	%f24,[%sp+2303]
+/* 0x0a80	 919 (37 40) */		fitod	%f20,%f4
+/* 0x0a84	 920 (37 38) */		add	%o5,%o4,%o4
+/* 0x0a88	 921 (37 40) */		fmuld	%f12,%f8,%f24
+/* 0x0a8c	 922 (38 40) */		ldx	[%sp+2319],%o7
+/* 0x0a90	 923 (38 41) */		fsubd	%f16,%f14,%f14
+/* 0x0a94	 924 (38 39) */		add	%o4,%g2,%o4
+/* 0x0a98	 925 (38 41) */		fmuld	%f12,%f6,%f12
+/* 0x0a9c	 926 (39 41) */		ldx	[%sp+2327],%o5
+/* 0x0aa0	 927 (39 42) */		fitod	%f21,%f18
+/* 0x0aa4	 928 (40 41) */		st	%o4,[%i0]
+/* 0x0aa8	 929 (40 41) */		sllx	%o7,19,%o7
+/* 0x0aac	 930 (40 43) */		fdtox	%f22,%f20
+/* 0x0ab0	 931 (41 42) */		std	%f20,[%sp+2287]
+/* 0x0ab4	 932 (41 44) */		fdtox	%f10,%f10
+/* 0x0ab8	 933 (41 42) */		add	%o5,%o7,%o5
+/* 0x0abc	 934 (41 44) */		fmuld	%f14,%f8,%f20
+/* 0x0ac0	 935 (42 43) */		std	%f10,[%sp+2295]
+/* 0x0ac4	 936 (42 43) */		srlx	%o4,32,%o7
+/* 0x0ac8	 937 (42 45) */		fsubd	%f16,%f4,%f4
+/* 0x0acc	 938 (42 45) */		fmuld	%f14,%f6,%f14
+/* 0x0ad0	 939 (43 45) */		ldx	[%sp+2311],%g2
+/* 0x0ad4	 940 (43 46) */		fdtox	%f24,%f10
+/* 0x0ad8	 941 (43 44) */		add	%o5,%g3,%g3
+/* 0x0adc	 942 (44 45) */		std	%f10,[%sp+2271]
+/* 0x0ae0	 943 (44 45) */		add	%g3,%o7,%g3
+/* 0x0ae4	 944 (44 47) */		fdtox	%f12,%f12
+/* 0x0ae8	 945 (45 47) */		ldx	[%sp+2303],%l0
+/* 0x0aec	 946 (45 48) */		fsubd	%f16,%f18,%f10
+/* 0x0af0	 947 (45 48) */		fmuld	%f4,%f8,%f16
+/* 0x0af4	 948 (46 47) */		std	%f12,[%sp+2279]
+/* 0x0af8	 949 (46 49) */		fdtox	%f20,%f12
+/* 0x0afc	 950 (46 49) */		fmuld	%f4,%f6,%f4
+/* 0x0b00	 951 (47 48) */		std	%f12,[%sp+2255]
+/* 0x0b04	 952 (47 48) */		sllx	%l0,19,%l0
+/* 0x0b08	 953 (47 50) */		fdtox	%f14,%f12
+/* 0x0b0c	 954 (48 50) */		ldx	[%sp+2287],%o5
+/* 0x0b10	 955 (48 49) */		add	%g2,%l0,%g2
+/* 0x0b14	 956 (48 51) */		fmuld	%f10,%f8,%f8
+/* 0x0b18	 957 (49 51) */		ldx	[%sp+2295],%l1
+/* 0x0b1c	 958 (49 50) */		srlx	%g3,32,%l0
+/* 0x0b20	 959 (49 50) */		add	%g2,%g4,%g4
+/* 0x0b24	 960 (49 52) */		fmuld	%f10,%f6,%f6
+/* 0x0b28	 961 (50 51) */		std	%f12,[%sp+2263]
+/* 0x0b2c	 962 (50 51) */		sllx	%o5,19,%g2
+/* 0x0b30	 963 (50 51) */		add	%g4,%l0,%g4
+/* 0x0b34	 964 (51 53) */		ldx	[%sp+2279],%l0
+/* 0x0b38	 965 (51 52) */		srlx	%g4,32,%o5
+/* 0x0b3c	 966 (51 52) */		add	%l1,%g2,%g2
+/* 0x0b40	 967 (52 53) */		st	%g3,[%i0+4]
+/* 0x0b44	 968 (52 53) */		add	%g2,%g5,%g2
+/* 0x0b48	 969 (52 55) */		fdtox	%f16,%f10
+/* 0x0b4c	 970 (53 55) */		ldx	[%sp+2271],%o7
+/* 0x0b50	 971 (53 54) */		add	%g2,%o5,%g2
+/* 0x0b54	 972 (53 56) */		fdtox	%f4,%f4
+/* 0x0b58	 973 (54 55) */		std	%f10,[%sp+2239]
+/* 0x0b5c	 974 (55 56) */		sllx	%o7,19,%o7
+/* 0x0b60	 975 (55 56) */		std	%f4,[%sp+2247]
+/* 0x0b64	 976 (55 58) */		fdtox	%f8,%f4
+/* 0x0b68	 977 (56 57) */		add	%l0,%o7,%o7
+/* 0x0b6c	 978 (56 58) */		ldx	[%sp+2263],%o5
+/* 0x0b70	 979 (57 58) */		add	%o7,%o0,%o0
+/* 0x0b74	 980 (57 58) */		std	%f4,[%sp+2223]
+/* 0x0b78	 981 (57 60) */		fdtox	%f6,%f4
+/* 0x0b7c	 982 (58 60) */		ldx	[%sp+2255],%g5
+/* 0x0b80	 983 (58 59) */		srlx	%g2,32,%o7
+/* 0x0b84	 984 (59 60) */		std	%f4,[%sp+2231]
+/* 0x0b88	 985 (59 60) */		add	%o0,%o7,%o0
+/* 0x0b8c	 986 (60 61) */		sllx	%g5,19,%g5
+/* 0x0b90	 987 (60 62) */		ldx	[%sp+2247],%l1
+/* 0x0b94	 988 (61 62) */		add	%o5,%g5,%g5
+/* 0x0b98	 989 (61 62) */		st	%g2,[%i0+12]
+/* 0x0b9c	 990 (62 64) */		ldx	[%sp+2239],%l0
+/* 0x0ba0	 991 (62 63) */		srlx	%o0,32,%o4
+/* 0x0ba4	 992 (62 63) */		add	%g5,%o1,%o1
+/* 0x0ba8	 993 (63 64) */		add	%o1,%o4,%o1
+/* 0x0bac	 994 (63 65) */		ldx	[%sp+2223],%o7
+/* 0x0bb0	 995 (64 65) */		sllx	%l0,19,%g3
+/* 0x0bb4	 996 (64 66) */		ldx	[%sp+2231],%o5
+/* 0x0bb8	 997 (65 66) */		add	%l1,%g3,%o4
+/* 0x0bbc	 998 (65 66) */		st	%o0,[%i0+16]
+/* 0x0bc0	 999 (66 67) */		add	%o4,%o2,%o2
+/* 0x0bc4	1000 (66 67) */		st	%o1,[%i0+20]
+/* 0x0bc8	1001 (67 68) */		srlx	%o1,32,%o4
+/* 0x0bcc	1002 (67 68) */		st	%g4,[%i0+8]
+/* 0x0bd0	1003 (68 69) */		sllx	%o7,19,%g2
+/* 0x0bd4	1004 (68 69) */		add	%o2,%o4,%o4
+/* 0x0bd8	1005 (68 69) */		st	%o4,[%i0+24]
+/* 0x0bdc	1006 (69 70) */		add	%o5,%g2,%g2
+/* 0x0be0	1007 (70 71) */		srlx	%o4,32,%g3
+/* 0x0be4	1008 (70 71) */		add	%g2,%o3,%g2
+/* 0x0be8	1009 (71 72) */		add	%g2,%g3,%g2
+/* 0x0bec	1010 (71 72) */		st	%g2,[%i0+28]
+/* 0x0bf0	1014 (72 73) */		srlx	%g2,32,%o3
+/* 0x0bf4	1015 (73 74) */		srl	%o3,0,%i0
+/* 0x0bf8	     (74 76) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x0bfc	     (76 77) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L77000050
+!
+
+                                   .L77000050:		/* frequency 1.0 confidence 0.0 */
+/* 0x0c00	1022 ( 0  1) */		subcc	%i3,16,%g0
+/* 0x0c04	1023 ( 0  1) */		bne,pn	%icc,.L77000073	! tprob=0.50
+/* 0x0c08	     ( 0  1) */		sethi	%hi(0xfff80000),%g2
+/* 0x0c0c	1034 ( 1  4) */		ldd	[%o2],%f4
+/* 0x0c10	1035 ( 1  2) */		andn	%i4,%g2,%g2
+/* 0x0c14	1036 ( 2  3) */		st	%g2,[%sp+2483]
+/* 0x0c18	1037 ( 2  3) */		srl	%i4,19,%g2
+/* 0x0c1c	1038 ( 3  4) */		st	%g2,[%sp+2479]
+/* 0x0c20	1039 ( 3  5) */		fxnor	%f0,%f4,%f4
+/* 0x0c24	1040 ( 4  7) */		ldd	[%o0],%f8
+/* 0x0c28	1041 ( 5  8) */		fitod	%f4,%f10
+/* 0x0c2c	1042 ( 5  8) */		ldd	[%o0+8],%f16
+/* 0x0c30	1043 ( 6  9) */		ldd	[%o2+8],%f14
+/* 0x0c34	1044 ( 6  9) */		fitod	%f5,%f4
+/* 0x0c38	1045 ( 7 10) */		ld	[%sp+2483],%f13
+/* 0x0c3c	1046 ( 8 11) */		ld	[%sp+2479],%f7
+/* 0x0c40	1047 ( 8 11) */		fsubd	%f16,%f10,%f10
+/* 0x0c44	1048 ( 9 11) */		fxnor	%f0,%f14,%f14
+/* 0x0c48	1049 (10 13) */		fsubd	%f16,%f4,%f4
+/* 0x0c4c	1050 (14 15) */		fmovs	%f8,%f12
+/* 0x0c50	1051 (15 16) */		fmovs	%f8,%f6
+/* 0x0c54	1052 (17 20) */		fsubd	%f12,%f8,%f12
+/* 0x0c58	1053 (18 21) */		fsubd	%f6,%f8,%f6
+/* 0x0c5c	1054 (19 22) */		fitod	%f14,%f8
+/* 0x0c60	1055 (20 23) */		fmuld	%f10,%f12,%f18
+/* 0x0c64	1056 (20 23) */		fitod	%f15,%f14
+/* 0x0c68	1057 (21 24) */		fmuld	%f10,%f6,%f10
+/* 0x0c6c	1058 (22 25) */		fsubd	%f16,%f8,%f8
+/* 0x0c70	1059 (22 25) */		fmuld	%f4,%f12,%f20
+/* 0x0c74	1060 (23 26) */		fmuld	%f4,%f6,%f4
+/* 0x0c78	1061 (23 26) */		fsubd	%f16,%f14,%f14
+/* 0x0c7c	1062 (24 27) */		fdtox	%f10,%f10
+/* 0x0c80	1063 (24 25) */		std	%f10,[%sp+2463]
+/* 0x0c84	1064 (25 28) */		fmuld	%f8,%f12,%f10
+/* 0x0c88	1065 (25 28) */		fdtox	%f18,%f18
+/* 0x0c8c	1066 (25 26) */		std	%f18,[%sp+2471]
+/* 0x0c90	1067 (26 29) */		fmuld	%f8,%f6,%f8
+/* 0x0c94	1068 (26 29) */		fdtox	%f4,%f4
+/* 0x0c98	1069 (26 27) */		std	%f4,[%sp+2447]
+/* 0x0c9c	1070 (27 30) */		fmuld	%f14,%f12,%f4
+/* 0x0ca0	1071 (27 30) */		fdtox	%f20,%f18
+/* 0x0ca4	1072 (27 28) */		std	%f18,[%sp+2455]
+/* 0x0ca8	1073 (28 31) */		fdtox	%f10,%f10
+/* 0x0cac	1074 (28 29) */		std	%f10,[%sp+2439]
+/* 0x0cb0	1075 (28 31) */		fmuld	%f14,%f6,%f14
+/* 0x0cb4	1076 (29 32) */		fdtox	%f8,%f8
+/* 0x0cb8	1077 (29 30) */		std	%f8,[%sp+2431]
+/* 0x0cbc	1078 (30 33) */		ldd	[%o2+16],%f10
+/* 0x0cc0	1079 (30 33) */		fdtox	%f4,%f4
+/* 0x0cc4	1080 (31 34) */		ldd	[%o2+24],%f8
+/* 0x0cc8	1081 (31 34) */		fdtox	%f14,%f14
+/* 0x0ccc	1082 (32 33) */		std	%f4,[%sp+2423]
+/* 0x0cd0	1083 (32 34) */		fxnor	%f0,%f10,%f10
+/* 0x0cd4	1084 (33 35) */		fxnor	%f0,%f8,%f4
+/* 0x0cd8	1085 (33 34) */		std	%f14,[%sp+2415]
+/* 0x0cdc	1086 (34 37) */		fitod	%f10,%f8
+/* 0x0ce0	1087 (35 38) */		fitod	%f11,%f10
+/* 0x0ce4	1088 (36 39) */		fitod	%f4,%f14
+/* 0x0ce8	1089 (37 40) */		fsubd	%f16,%f8,%f8
+/* 0x0cec	1090 (38 41) */		fsubd	%f16,%f10,%f10
+/* 0x0cf0	1091 (39 42) */		fsubd	%f16,%f14,%f14
+/* 0x0cf4	1092 (40 43) */		fmuld	%f8,%f12,%f18
+/* 0x0cf8	1093 (40 43) */		fitod	%f5,%f4
+/* 0x0cfc	1094 (41 44) */		fmuld	%f8,%f6,%f8
+/* 0x0d00	1095 (42 45) */		fmuld	%f10,%f12,%f20
+/* 0x0d04	1096 (43 46) */		fmuld	%f10,%f6,%f10
+/* 0x0d08	1097 (43 46) */		fsubd	%f16,%f4,%f4
+/* 0x0d0c	1098 (44 47) */		fdtox	%f8,%f8
+/* 0x0d10	1099 (44 45) */		std	%f8,[%sp+2399]
+/* 0x0d14	1100 (45 48) */		fmuld	%f14,%f12,%f8
+/* 0x0d18	1101 (45 48) */		fdtox	%f18,%f18
+/* 0x0d1c	1102 (45 46) */		std	%f18,[%sp+2407]
+/* 0x0d20	1103 (46 49) */		fdtox	%f10,%f10
+/* 0x0d24	1104 (46 47) */		std	%f10,[%sp+2383]
+/* 0x0d28	1105 (46 49) */		fmuld	%f14,%f6,%f14
+/* 0x0d2c	1106 (47 50) */		fmuld	%f4,%f12,%f10
+/* 0x0d30	1107 (47 50) */		fdtox	%f20,%f18
+/* 0x0d34	1108 (47 48) */		std	%f18,[%sp+2391]
+/* 0x0d38	1109 (48 51) */		fdtox	%f8,%f8
+/* 0x0d3c	1110 (48 49) */		std	%f8,[%sp+2375]
+/* 0x0d40	1111 (48 51) */		fmuld	%f4,%f6,%f4
+/* 0x0d44	1112 (49 52) */		fdtox	%f14,%f14
+/* 0x0d48	1113 (49 50) */		std	%f14,[%sp+2367]
+/* 0x0d4c	1117 (50 53) */		ldd	[%o2+32],%f8
+/* 0x0d50	1118 (50 53) */		fdtox	%f10,%f10
+/* 0x0d54	1119 (51 54) */		fdtox	%f4,%f4
+/* 0x0d58	1120 (51 52) */		std	%f4,[%sp+2351]
+/* 0x0d5c	1121 (52 54) */		fxnor	%f0,%f8,%f8
+/* 0x0d60	1122 (52 55) */		ldd	[%o2+40],%f14
+/* 0x0d64	1123 (53 54) */		std	%f10,[%sp+2359]
+/* 0x0d68	1124 (54 57) */		fitod	%f8,%f4
+/* 0x0d6c	1125 (55 57) */		fxnor	%f0,%f14,%f10
+/* 0x0d70	1126 (56 59) */		fitod	%f9,%f8
+/* 0x0d74	1127 (57 60) */		fsubd	%f16,%f4,%f4
+/* 0x0d78	1128 (58 61) */		fitod	%f10,%f14
+/* 0x0d7c	1129 (59 62) */		fsubd	%f16,%f8,%f8
+/* 0x0d80	1130 (60 63) */		fmuld	%f4,%f12,%f18
+/* 0x0d84	1131 (60 63) */		fitod	%f11,%f10
+/* 0x0d88	1132 (61 64) */		fmuld	%f4,%f6,%f4
+/* 0x0d8c	1133 (61 64) */		fsubd	%f16,%f14,%f14
+/* 0x0d90	1134 (62 65) */		fmuld	%f8,%f12,%f20
+/* 0x0d94	1135 (63 66) */		fmuld	%f8,%f6,%f8
+/* 0x0d98	1136 (63 66) */		fsubd	%f16,%f10,%f10
+/* 0x0d9c	1137 (64 67) */		fdtox	%f4,%f4
+/* 0x0da0	1138 (64 65) */		std	%f4,[%sp+2335]
+/* 0x0da4	1139 (65 68) */		fmuld	%f14,%f12,%f4
+/* 0x0da8	1140 (65 68) */		fdtox	%f18,%f18
+/* 0x0dac	1141 (65 66) */		std	%f18,[%sp+2343]
+/* 0x0db0	1142 (66 69) */		fdtox	%f8,%f8
+/* 0x0db4	1143 (66 67) */		std	%f8,[%sp+2319]
+/* 0x0db8	1144 (66 69) */		fmuld	%f14,%f6,%f14
+/* 0x0dbc	1145 (67 70) */		fmuld	%f10,%f12,%f8
+/* 0x0dc0	1146 (67 70) */		fdtox	%f20,%f18
+/* 0x0dc4	1147 (67 68) */		std	%f18,[%sp+2327]
+/* 0x0dc8	1148 (68 71) */		fdtox	%f4,%f4
+/* 0x0dcc	1149 (68 69) */		std	%f4,[%sp+2311]
+/* 0x0dd0	1150 (68 71) */		fmuld	%f10,%f6,%f10
+/* 0x0dd4	1151 (69 72) */		fdtox	%f14,%f14
+/* 0x0dd8	1152 (69 70) */		std	%f14,[%sp+2303]
+/* 0x0ddc	1153 (70 73) */		ldd	[%o2+48],%f4
+/* 0x0de0	1154 (70 73) */		fdtox	%f8,%f8
+/* 0x0de4	1155 (71 74) */		fdtox	%f10,%f10
+/* 0x0de8	1156 (71 72) */		std	%f10,[%sp+2287]
+/* 0x0dec	1157 (72 74) */		fxnor	%f0,%f4,%f4
+/* 0x0df0	1158 (72 75) */		ldd	[%o2+56],%f14
+/* 0x0df4	1159 (73 74) */		std	%f8,[%sp+2295]
+/* 0x0df8	1160 (74 77) */		fitod	%f4,%f10
+/* 0x0dfc	1161 (75 78) */		fitod	%f5,%f4
+/* 0x0e00	1162 (76 78) */		fxnor	%f0,%f14,%f8
+/* 0x0e04	1163 (77 80) */		fsubd	%f16,%f10,%f10
+/* 0x0e08	1164 (78 81) */		fsubd	%f16,%f4,%f4
+/* 0x0e0c	1165 (79 82) */		fitod	%f8,%f14
+/* 0x0e10	1166 (80 83) */		fmuld	%f10,%f12,%f18
+/* 0x0e14	1167 (80 83) */		fitod	%f9,%f8
+/* 0x0e18	1168 (81 84) */		fmuld	%f10,%f6,%f10
+/* 0x0e1c	1169 (82 85) */		fmuld	%f4,%f12,%f20
+/* 0x0e20	1170 (82 85) */		fsubd	%f16,%f14,%f14
+/* 0x0e24	1171 (83 86) */		fdtox	%f18,%f18
+/* 0x0e28	1172 (83 84) */		std	%f18,[%sp+2279]
+/* 0x0e2c	1173 (83 86) */		fmuld	%f4,%f6,%f4
+/* 0x0e30	1174 (84 87) */		fdtox	%f10,%f10
+/* 0x0e34	1175 (84 85) */		std	%f10,[%sp+2271]
+/* 0x0e38	1176 (85 88) */		fdtox	%f20,%f10
+/* 0x0e3c	1177 (85 86) */		std	%f10,[%sp+2263]
+/* 0x0e40	1178 (86 89) */		fdtox	%f4,%f4
+/* 0x0e44	1179 (86 87) */		std	%f4,[%sp+2255]
+/* 0x0e48	1180 (86 89) */		fmuld	%f14,%f12,%f10
+/* 0x0e4c	1181 (87 90) */		fmuld	%f14,%f6,%f4
+/* 0x0e50	1182 (89 92) */		fdtox	%f10,%f10
+/* 0x0e54	1183 (89 90) */		std	%f10,[%sp+2247]
+/* 0x0e58	1184 (90 93) */		fdtox	%f4,%f4
+/* 0x0e5c	1185 (90 91) */		std	%f4,[%sp+2239]
+/* 0x0e60	1189 (91 93) */		ldx	[%sp+2463],%g2
+/* 0x0e64	1190 (91 94) */		fsubd	%f16,%f8,%f4
+/* 0x0e68	1191 (92 94) */		ldx	[%sp+2471],%g3
+/* 0x0e6c	1192 (93 96) */		ld	[%i1],%g4
+/* 0x0e70	1193 (93 94) */		sllx	%g2,19,%g2
+/* 0x0e74	1194 (94 96) */		ldx	[%sp+2455],%g5
+/* 0x0e78	1195 (94 95) */		add	%g3,%g2,%g2
+/* 0x0e7c	1196 (94 97) */		fmuld	%f4,%f6,%f6
+/* 0x0e80	1197 (95 97) */		ldx	[%sp+2447],%g3
+/* 0x0e84	1198 (95 96) */		add	%g2,%g4,%g4
+/* 0x0e88	1199 (95 98) */		fmuld	%f4,%f12,%f4
+/* 0x0e8c	1200 (96 97) */		st	%g4,[%i0]
+/* 0x0e90	1201 (96 97) */		srlx	%g4,32,%g4
+/* 0x0e94	1202 (97 100) */		ld	[%i1+8],%o0
+/* 0x0e98	1203 (97 98) */		sllx	%g3,19,%g2
+/* 0x0e9c	1204 (97 100) */		fdtox	%f6,%f6
+/* 0x0ea0	1205 (98 101) */		ld	[%i1+4],%g3
+/* 0x0ea4	1206 (98 99) */		add	%g5,%g2,%g2
+/* 0x0ea8	1207 (98 101) */		fdtox	%f4,%f4
+/* 0x0eac	1208 (99 101) */		ldx	[%sp+2439],%g5
+/* 0x0eb0	1209 (100 103) */		ld	[%i1+12],%o1
+/* 0x0eb4	1210 (100 101) */		add	%g2,%g3,%g2
+/* 0x0eb8	1211 (101 103) */		ldx	[%sp+2431],%g3
+/* 0x0ebc	1212 (101 102) */		add	%g2,%g4,%g4
+/* 0x0ec0	1213 (102 103) */		st	%g4,[%i0+4]
+/* 0x0ec4	1214 (103 104) */		std	%f6,[%sp+2223]
+/* 0x0ec8	1215 (103 104) */		sllx	%g3,19,%g2
+/* 0x0ecc	1216 (104 106) */		ldx	[%sp+2423],%g3
+/* 0x0ed0	1217 (104 105) */		add	%g5,%g2,%g2
+/* 0x0ed4	1218 (105 107) */		ldx	[%sp+2415],%g5
+/* 0x0ed8	1219 (105 106) */		add	%g2,%o0,%g2
+/* 0x0edc	1220 (106 107) */		std	%f4,[%sp+2231]
+/* 0x0ee0	1221 (106 107) */		srlx	%g4,32,%o0
+/* 0x0ee4	1222 (107 109) */		ldx	[%sp+2407],%g4
+/* 0x0ee8	1223 (107 108) */		sllx	%g5,19,%g5
+/* 0x0eec	1224 (107 108) */		add	%g2,%o0,%g2
+/* 0x0ef0	1225 (108 109) */		st	%g2,[%i0+8]
+/* 0x0ef4	1226 (108 109) */		srlx	%g2,32,%o0
+/* 0x0ef8	1227 (108 109) */		add	%g3,%g5,%g3
+/* 0x0efc	1228 (109 111) */		ldx	[%sp+2399],%g5
+/* 0x0f00	1229 (109 110) */		add	%g3,%o1,%g3
+/* 0x0f04	1230 (110 113) */		ld	[%i1+16],%o1
+/* 0x0f08	1231 (110 111) */		add	%g3,%o0,%g3
+/* 0x0f0c	1232 (111 112) */		st	%g3,[%i0+12]
+/* 0x0f10	1233 (111 112) */		sllx	%g5,19,%g5
+/* 0x0f14	1234 (112 113) */		srlx	%g3,32,%o0
+/* 0x0f18	1235 (112 113) */		add	%g4,%g5,%g2
+/* 0x0f1c	1236 (112 114) */		ldx	[%sp+2383],%g5
+/* 0x0f20	1237 (113 115) */		ldx	[%sp+2391],%g4
+/* 0x0f24	1238 (113 114) */		add	%g2,%o1,%g2
+/* 0x0f28	1239 (114 117) */		ld	[%i1+20],%o1
+/* 0x0f2c	1240 (114 115) */		sllx	%g5,19,%g5
+/* 0x0f30	1241 (114 115) */		add	%g2,%o0,%g2
+/* 0x0f34	1242 (115 116) */		st	%g2,[%i0+16]
+/* 0x0f38	1243 (115 116) */		srlx	%g2,32,%o0
+/* 0x0f3c	1244 (115 116) */		add	%g4,%g5,%g3
+/* 0x0f40	1245 (116 118) */		ldx	[%sp+2367],%g5
+/* 0x0f44	1246 (116 117) */		add	%g3,%o1,%g3
+/* 0x0f48	1247 (117 119) */		ldx	[%sp+2375],%g4
+/* 0x0f4c	1248 (117 118) */		add	%g3,%o0,%g3
+/* 0x0f50	1249 (118 121) */		ld	[%i1+24],%o1
+/* 0x0f54	1250 (118 119) */		sllx	%g5,19,%g5
+/* 0x0f58	1251 (119 120) */		st	%g3,[%i0+20]
+/* 0x0f5c	1252 (119 120) */		add	%g4,%g5,%g2
+/* 0x0f60	1253 (120 122) */		ldx	[%sp+2351],%g5
+/* 0x0f64	1254 (120 121) */		srlx	%g3,32,%o0
+/* 0x0f68	1255 (120 121) */		add	%g2,%o1,%g2
+/* 0x0f6c	1256 (121 123) */		ldx	[%sp+2359],%g4
+/* 0x0f70	1257 (121 122) */		add	%g2,%o0,%g2
+/* 0x0f74	1258 (122 125) */		ld	[%i1+28],%o1
+/* 0x0f78	1259 (122 123) */		sllx	%g5,19,%g5
+/* 0x0f7c	1260 (123 124) */		st	%g2,[%i0+24]
+/* 0x0f80	1261 (123 124) */		add	%g4,%g5,%g3
+/* 0x0f84	1265 (124 126) */		ldx	[%sp+2335],%g5
+/* 0x0f88	1266 (124 125) */		srlx	%g2,32,%o0
+/* 0x0f8c	1267 (124 125) */		add	%g3,%o1,%g3
+/* 0x0f90	1268 (125 127) */		ldx	[%sp+2343],%g4
+/* 0x0f94	1269 (125 126) */		add	%g3,%o0,%g3
+/* 0x0f98	1270 (126 127) */		sllx	%g5,19,%g5
+/* 0x0f9c	1271 (126 129) */		ld	[%i1+32],%o1
+/* 0x0fa0	1272 (127 128) */		add	%g4,%g5,%g2
+/* 0x0fa4	1273 (127 129) */		ldx	[%sp+2319],%g5
+/* 0x0fa8	1274 (128 130) */		ldx	[%sp+2327],%g4
+/* 0x0fac	1275 (128 129) */		srlx	%g3,32,%o0
+/* 0x0fb0	1276 (128 129) */		add	%g2,%o1,%g2
+/* 0x0fb4	1277 (129 130) */		st	%g3,[%i0+28]
+/* 0x0fb8	1278 (129 130) */		sllx	%g5,19,%g5
+/* 0x0fbc	1279 (129 130) */		add	%g2,%o0,%g2
+/* 0x0fc0	1280 (130 133) */		ld	[%i1+36],%o1
+/* 0x0fc4	1281 (130 131) */		add	%g4,%g5,%g3
+/* 0x0fc8	1282 (131 133) */		ldx	[%sp+2303],%g5
+/* 0x0fcc	1283 (131 132) */		srlx	%g2,32,%o0
+/* 0x0fd0	1284 (132 134) */		ldx	[%sp+2311],%g4
+/* 0x0fd4	1285 (132 133) */		add	%g3,%o1,%g3
+/* 0x0fd8	1286 (133 134) */		sllx	%g5,19,%g5
+/* 0x0fdc	1287 (133 134) */		st	%g2,[%i0+32]
+/* 0x0fe0	1288 (133 134) */		add	%g3,%o0,%g3
+/* 0x0fe4	1289 (134 135) */		add	%g4,%g5,%g2
+/* 0x0fe8	1290 (134 136) */		ldx	[%sp+2287],%g5
+/* 0x0fec	1291 (135 137) */		ldx	[%sp+2295],%g4
+/* 0x0ff0	1292 (135 136) */		srlx	%g3,32,%o0
+/* 0x0ff4	1293 (136 139) */		ld	[%i1+40],%o1
+/* 0x0ff8	1294 (136 137) */		sllx	%g5,19,%g5
+/* 0x0ffc	1295 (137 138) */		st	%g3,[%i0+36]
+/* 0x1000	1296 (137 138) */		add	%g4,%g5,%g3
+/* 0x1004	1297 (138 140) */		ldx	[%sp+2271],%g5
+/* 0x1008	1298 (138 139) */		add	%g2,%o1,%g2
+/* 0x100c	1299 (139 141) */		ldx	[%sp+2279],%g4
+/* 0x1010	1300 (139 140) */		add	%g2,%o0,%g2
+/* 0x1014	1301 (140 143) */		ld	[%i1+44],%o1
+/* 0x1018	1302 (140 141) */		sllx	%g5,19,%g5
+/* 0x101c	1303 (141 142) */		st	%g2,[%i0+40]
+/* 0x1020	1304 (141 142) */		srlx	%g2,32,%o0
+/* 0x1024	1305 (141 142) */		add	%g4,%g5,%g2
+/* 0x1028	1306 (142 144) */		ldx	[%sp+2255],%g5
+/* 0x102c	1307 (142 143) */		add	%g3,%o1,%g3
+/* 0x1030	1308 (143 145) */		ldx	[%sp+2263],%g4
+/* 0x1034	1309 (143 144) */		add	%g3,%o0,%g3
+/* 0x1038	1310 (144 147) */		ld	[%i1+48],%o1
+/* 0x103c	1311 (144 145) */		sllx	%g5,19,%g5
+/* 0x1040	1312 (145 146) */		srlx	%g3,32,%o0
+/* 0x1044	1313 (145 146) */		st	%g3,[%i0+44]
+/* 0x1048	1314 (145 146) */		add	%g4,%g5,%g3
+/* 0x104c	1315 (146 148) */		ldx	[%sp+2239],%g5
+/* 0x1050	1316 (146 147) */		add	%g2,%o1,%g2
+/* 0x1054	1317 (147 150) */		ld	[%i1+52],%o1
+/* 0x1058	1318 (147 148) */		add	%g2,%o0,%g2
+/* 0x105c	1319 (148 150) */		ldx	[%sp+2247],%g4
+/* 0x1060	1320 (148 149) */		sllx	%g5,19,%g5
+/* 0x1064	1321 (149 150) */		srlx	%g2,32,%o0
+/* 0x1068	1322 (149 150) */		st	%g2,[%i0+48]
+/* 0x106c	1323 (149 150) */		add	%g3,%o1,%g3
+/* 0x1070	1324 (150 153) */		ld	[%i1+56],%o1
+/* 0x1074	1325 (150 151) */		add	%g4,%g5,%g2
+/* 0x1078	1326 (150 151) */		add	%g3,%o0,%g3
+/* 0x107c	1327 (151 153) */		ldx	[%sp+2223],%g5
+/* 0x1080	1328 (151 152) */		srlx	%g3,32,%o0
+/* 0x1084	1329 (152 154) */		ldx	[%sp+2231],%g4
+/* 0x1088	1330 (152 153) */		add	%g2,%o1,%g2
+/* 0x108c	1331 (153 154) */		sllx	%g5,19,%g5
+/* 0x1090	1332 (153 156) */		ld	[%i1+60],%o1
+/* 0x1094	1333 (153 154) */		add	%g2,%o0,%g2
+/* 0x1098	1334 (154 155) */		st	%g3,[%i0+52]
+/* 0x109c	1335 (154 155) */		add	%g4,%g5,%g3
+/* 0x10a0	1336 (155 156) */		st	%g2,[%i0+56]
+/* 0x10a4	1337 (155 156) */		srlx	%g2,32,%g2
+/* 0x10a8	1338 (155 156) */		add	%g3,%o1,%g3
+/* 0x10ac	1339 (156 157) */		add	%g3,%g2,%g2
+/* 0x10b0	1340 (156 157) */		st	%g2,[%i0+60]
+/* 0x10b4	1344 (157 158) */		srlx	%g2,32,%o3
+/* 0x10b8	1345 (158 159) */		srl	%o3,0,%i0
+/* 0x10bc	     (159 161) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x10c0	     (161 162) */		restore	%g0,%g0,%g0
+
+!
+! ENTRY .L77000073
+!
+
+                                   .L77000073:		/* frequency 1.0 confidence 0.0 */
+
+
+	or	%g0, %i4, %o2
+	or	%g0, %o0, %o1
+	or	%g0, %i3, %o0
+
+!
+! ENTRY .L77000052
+!
+
+                                   .L77000052:		/* frequency 1.0 confidence 0.0 */
+/* 0x1028	1318 ( 0  1) */		andn	%o2,%g2,%g2
+/* 0x102c	1319 ( 0  1) */		st	%g2,[%sp+2227]
+/* 0x1030	1325 ( 0  1) */		add	%o0,1,%g3
+/* 0x1034	1326 ( 0  1) */		fmovd	%f0,%f14
+/* 0x1038	1327 ( 1  2) */		srl	%o2,19,%g2
+/* 0x103c	1328 ( 1  2) */		st	%g2,[%sp+2223]
+/* 0x1040	1329 ( 1  2) */		or	%g0,0,%o5
+/* 0x1044	1330 ( 2  3) */		srl	%g3,31,%g2
+/* 0x1048	1331 ( 2  5) */		ldd	[%o1],%f6
+/* 0x104c	1335 ( 2  3) */		sethi	%hi(0x1000),%g1
+/* 0x1050	1336 ( 3  4) */		add	%g3,%g2,%g2
+/* 0x1054	1337 ( 3  4) */		xor	%g1,-625,%g1
+/* 0x1058	1338 ( 3  6) */		ldd	[%o1+8],%f20
+/* 0x105c	1339 ( 4  5) */		sra	%g2,1,%o3
+/* 0x1060	1340 ( 4  5) */		fmovs	%f6,%f8
+/* 0x1064	1341 ( 4  5) */		add	%g1,%fp,%g3
+/* 0x1068	1342 ( 5  6) */		fmovs	%f6,%f10
+/* 0x106c	1343 ( 5  7) */		ld	[%sp+2227],%f9
+/* 0x1070	1344 ( 5  6) */		subcc	%o3,0,%g0
+/* 0x1074	1345 ( 6  8) */		ld	[%sp+2223],%f11
+/* 0x1078	1346 ( 6  7) */		sethi	%hi(0x1000),%g1
+/* 0x107c	1347 ( 6  7) */		or	%g0,%i2,%o1
+/* 0x1080	1348 ( 7 10) */		fsubd	%f8,%f6,%f18
+/* 0x1084	1349 ( 7  8) */		xor	%g1,-617,%g1
+/* 0x1088	1350 ( 7  8) */		or	%g0,0,%g4
+/* 0x108c	1351 ( 8 11) */		fsubd	%f10,%f6,%f16
+/* 0x1090	1352 ( 8  9) */		bleu,pt	%icc,.L990000162	! tprob=0.50
+/* 0x1094	     ( 8  9) */		subcc	%o0,0,%g0
+/* 0x1098	1354 ( 9 10) */		add	%g1,%fp,%g2
+/* 0x109c	1355 ( 9 10) */		sethi	%hi(0x1000),%g1
+/* 0x10a0	1356 (10 11) */		xor	%g1,-609,%g1
+/* 0x10a4	1357 (10 11) */		subcc	%o3,7,%g0
+/* 0x10a8	1358 (11 12) */		add	%g1,%fp,%o7
+/* 0x10ac	1359 (11 12) */		sethi	%hi(0x1000),%g1
+/* 0x10b0	1360 (12 13) */		xor	%g1,-601,%g1
+/* 0x10b4	1361 (13 14) */		add	%g1,%fp,%o4
+/* 0x10b8	1362 (13 14) */		bl,pn	%icc,.L77000054	! tprob=0.50
+/* 0x10bc	     (13 14) */		sub	%o3,2,%o2
+/* 0x10c0	1364 (14 17) */		ldd	[%o1],%f2
+/* 0x10c4	1365 (14 15) */		add	%o1,16,%g5
+/* 0x10c8	1366 (14 15) */		or	%g0,4,%g4
+/* 0x10cc	1367 (15 18) */		ldd	[%o1+8],%f0
+/* 0x10d0	1368 (15 16) */		add	%o1,8,%o1
+/* 0x10d4	1369 (16 18) */		fxnor	%f14,%f2,%f6
+/* 0x10d8	1370 (16 19) */		ldd	[%g5],%f4
+/* 0x10dc	1371 (16 17) */		add	%o1,16,%o1
+/* 0x10e0	1372 (17 19) */		fxnor	%f14,%f0,%f12
+/* 0x10e4	1373 (17 20) */		ldd	[%o1],%f0
+/* 0x10e8	1374 (17 18) */		add	%o1,8,%o1
+/* 0x10ec	1375 (18 21) */		fitod	%f7,%f2
+/* 0x10f0	1376 (19 22) */		fitod	%f6,%f6
+/* 0x10f4	1377 (20 22) */		fxnor	%f14,%f4,%f10
+/* 0x10f8	1378 (21 24) */		fsubd	%f20,%f2,%f2
+/* 0x10fc	1379 (22 24) */		fxnor	%f14,%f0,%f8
+/* 0x1100	1380 (23 26) */		fitod	%f13,%f4
+/* 0x1104	1381 (24 27) */		fsubd	%f20,%f6,%f6
+/* 0x1108	1382 (24 27) */		fmuld	%f2,%f16,%f0
+
+!
+! ENTRY .L990000154
+!
+
+                                   .L990000154:		/* frequency 1.0 confidence 0.0 */
+/* 0x110c	1384 ( 0  3) */		ldd	[%o1],%f24
+/* 0x1110	1385 ( 0  1) */		add	%g4,3,%g4
+/* 0x1114	1386 ( 0  1) */		add	%o4,96,%o4
+/* 0x1118	1387 ( 1  4) */		fitod	%f11,%f22
+/* 0x111c	1388 ( 2  5) */		fsubd	%f20,%f4,%f26
+/* 0x1120	1389 ( 2  3) */		subcc	%g4,%o2,%g0
+/* 0x1124	1390 ( 2  3) */		add	%o7,96,%o7
+/* 0x1128	1391 ( 2  5) */		fmuld	%f6,%f18,%f28
+/* 0x112c	1392 ( 3  6) */		fmuld	%f6,%f16,%f6
+/* 0x1130	1393 ( 3  4) */		add	%g2,96,%g2
+/* 0x1134	1394 ( 3  4) */		add	%g3,96,%g3
+/* 0x1138	1395 ( 4  7) */		fdtox	%f0,%f0
+/* 0x113c	1396 ( 5  8) */		fitod	%f12,%f4
+/* 0x1140	1397 ( 5  8) */		fmuld	%f2,%f18,%f2
+/* 0x1144	1398 ( 6  9) */		fdtox	%f28,%f12
+/* 0x1148	1399 ( 7 10) */		fdtox	%f6,%f6
+/* 0x114c	1400 ( 7  8) */		std	%f12,[%g3-96]
+/* 0x1150	1401 ( 8  9) */		std	%f6,[%g2-96]
+/* 0x1154	1402 ( 8 11) */		fdtox	%f2,%f2
+/* 0x1158	1403 ( 9 12) */		fsubd	%f20,%f4,%f6
+/* 0x115c	1404 ( 9 10) */		std	%f2,[%o7-96]
+/* 0x1160	1405 ( 9 10) */		add	%o1,8,%o1
+/* 0x1164	1406 (10 12) */		fxnor	%f14,%f24,%f12
+/* 0x1168	1407 (10 13) */		fmuld	%f26,%f16,%f4
+/* 0x116c	1408 (10 11) */		std	%f0,[%o4-96]
+/* 0x1170	1409 (11 14) */		ldd	[%o1],%f0
+/* 0x1174	1410 (11 14) */		fitod	%f9,%f2
+/* 0x1178	1411 (12 15) */		fsubd	%f20,%f22,%f28
+/* 0x117c	1412 (12 15) */		fmuld	%f6,%f18,%f24
+/* 0x1180	1413 (13 16) */		fmuld	%f6,%f16,%f22
+/* 0x1184	1414 (13 16) */		fdtox	%f4,%f4
+/* 0x1188	1415 (14 17) */		fitod	%f10,%f6
+/* 0x118c	1416 (14 17) */		fmuld	%f26,%f18,%f10
+/* 0x1190	1417 (15 18) */		fdtox	%f24,%f24
+/* 0x1194	1418 (16 19) */		fdtox	%f22,%f22
+/* 0x1198	1419 (16 17) */		std	%f24,[%g3-64]
+/* 0x119c	1420 (17 18) */		std	%f22,[%g2-64]
+/* 0x11a0	1421 (17 20) */		fdtox	%f10,%f10
+/* 0x11a4	1422 (18 21) */		fsubd	%f20,%f6,%f6
+/* 0x11a8	1423 (18 19) */		std	%f10,[%o7-64]
+/* 0x11ac	1424 (18 19) */		add	%o1,8,%o1
+/* 0x11b0	1425 (19 21) */		fxnor	%f14,%f0,%f10
+/* 0x11b4	1426 (19 22) */		fmuld	%f28,%f16,%f0
+/* 0x11b8	1427 (19 20) */		std	%f4,[%o4-64]
+/* 0x11bc	1428 (20 23) */		ldd	[%o1],%f22
+/* 0x11c0	1429 (20 23) */		fitod	%f13,%f4
+/* 0x11c4	1430 (21 24) */		fsubd	%f20,%f2,%f2
+/* 0x11c8	1431 (21 24) */		fmuld	%f6,%f18,%f26
+/* 0x11cc	1432 (22 25) */		fmuld	%f6,%f16,%f24
+/* 0x11d0	1433 (22 25) */		fdtox	%f0,%f0
+/* 0x11d4	1434 (23 26) */		fitod	%f8,%f6
+/* 0x11d8	1435 (23 26) */		fmuld	%f28,%f18,%f8
+/* 0x11dc	1436 (24 27) */		fdtox	%f26,%f26
+/* 0x11e0	1437 (25 28) */		fdtox	%f24,%f24
+/* 0x11e4	1438 (25 26) */		std	%f26,[%g3-32]
+/* 0x11e8	1439 (26 27) */		std	%f24,[%g2-32]
+/* 0x11ec	1440 (26 29) */		fdtox	%f8,%f8
+/* 0x11f0	1441 (27 30) */		fsubd	%f20,%f6,%f6
+/* 0x11f4	1442 (27 28) */		std	%f8,[%o7-32]
+/* 0x11f8	1443 (27 28) */		add	%o1,8,%o1
+/* 0x11fc	1444 (28 30) */		fxnor	%f14,%f22,%f8
+/* 0x1200	1445 (28 29) */		std	%f0,[%o4-32]
+/* 0x1204	1446 (28 29) */		bcs,pt	%icc,.L990000154	! tprob=0.50
+/* 0x1208	     (28 31) */		fmuld	%f2,%f16,%f0
+
+!
+! ENTRY .L990000157
+!
+
+                                   .L990000157:		/* frequency 1.0 confidence 0.0 */
+/* 0x120c	1449 ( 0  3) */		fitod	%f12,%f28
+/* 0x1210	1450 ( 0  3) */		fmuld	%f6,%f18,%f24
+/* 0x1214	1451 ( 0  1) */		add	%g3,128,%g3
+/* 0x1218	1452 ( 1  4) */		fitod	%f10,%f12
+/* 0x121c	1453 ( 1  4) */		fmuld	%f6,%f16,%f26
+/* 0x1220	1454 ( 1  2) */		add	%g2,128,%g2
+/* 0x1224	1455 ( 2  5) */		fsubd	%f20,%f4,%f4
+/* 0x1228	1456 ( 2  5) */		fmuld	%f2,%f18,%f22
+/* 0x122c	1457 ( 2  3) */		add	%o7,128,%o7
+/* 0x1230	1458 ( 3  6) */		fdtox	%f24,%f6
+/* 0x1234	1459 ( 3  4) */		std	%f6,[%g3-128]
+/* 0x1238	1460 ( 3  4) */		add	%o4,128,%o4
+/* 0x123c	1461 ( 4  7) */		fsubd	%f20,%f28,%f2
+/* 0x1240	1462 ( 4  5) */		subcc	%g4,%o3,%g0
+/* 0x1244	1463 ( 5  8) */		fitod	%f11,%f6
+/* 0x1248	1464 ( 5  8) */		fmuld	%f4,%f18,%f24
+/* 0x124c	1465 ( 6  9) */		fdtox	%f26,%f10
+/* 0x1250	1466 ( 6  7) */		std	%f10,[%g2-128]
+/* 0x1254	1467 ( 7 10) */		fdtox	%f22,%f10
+/* 0x1258	1468 ( 7  8) */		std	%f10,[%o7-128]
+/* 0x125c	1469 ( 7 10) */		fmuld	%f2,%f18,%f26
+/* 0x1260	1470 ( 8 11) */		fsubd	%f20,%f12,%f10
+/* 0x1264	1471 ( 8 11) */		fmuld	%f2,%f16,%f2
+/* 0x1268	1472 ( 9 12) */		fsubd	%f20,%f6,%f22
+/* 0x126c	1473 ( 9 12) */		fmuld	%f4,%f16,%f12
+/* 0x1270	1474 (10 13) */		fdtox	%f0,%f0
+/* 0x1274	1475 (10 11) */		std	%f0,[%o4-128]
+/* 0x1278	1476 (11 14) */		fitod	%f8,%f4
+/* 0x127c	1477 (11 14) */		fmuld	%f10,%f18,%f6
+/* 0x1280	1478 (12 15) */		fdtox	%f26,%f0
+/* 0x1284	1479 (12 13) */		std	%f0,[%g3-96]
+/* 0x1288	1480 (12 15) */		fmuld	%f10,%f16,%f10
+/* 0x128c	1481 (13 16) */		fdtox	%f2,%f2
+/* 0x1290	1482 (13 14) */		std	%f2,[%g2-96]
+/* 0x1294	1483 (14 17) */		fitod	%f9,%f0
+/* 0x1298	1484 (14 17) */		fmuld	%f22,%f18,%f2
+/* 0x129c	1485 (15 18) */		fdtox	%f24,%f8
+/* 0x12a0	1486 (15 16) */		std	%f8,[%o7-96]
+/* 0x12a4	1487 (16 19) */		fsubd	%f20,%f4,%f4
+/* 0x12a8	1488 (16 19) */		fmuld	%f22,%f16,%f8
+/* 0x12ac	1489 (17 20) */		fdtox	%f12,%f12
+/* 0x12b0	1490 (17 18) */		std	%f12,[%o4-96]
+/* 0x12b4	1491 (18 21) */		fsubd	%f20,%f0,%f0
+/* 0x12b8	1492 (19 22) */		fdtox	%f6,%f6
+/* 0x12bc	1493 (19 20) */		std	%f6,[%g3-64]
+/* 0x12c0	1494 (20 23) */		fdtox	%f10,%f10
+/* 0x12c4	1495 (20 21) */		std	%f10,[%g2-64]
+/* 0x12c8	1496 (20 23) */		fmuld	%f4,%f18,%f6
+/* 0x12cc	1497 (21 24) */		fdtox	%f2,%f2
+/* 0x12d0	1498 (21 22) */		std	%f2,[%o7-64]
+/* 0x12d4	1499 (21 24) */		fmuld	%f4,%f16,%f4
+/* 0x12d8	1500 (22 25) */		fmuld	%f0,%f18,%f2
+/* 0x12dc	1501 (22 25) */		fdtox	%f8,%f8
+/* 0x12e0	1502 (22 23) */		std	%f8,[%o4-64]
+/* 0x12e4	1503 (23 26) */		fdtox	%f6,%f6
+/* 0x12e8	1504 (23 24) */		std	%f6,[%g3-32]
+/* 0x12ec	1505 (23 26) */		fmuld	%f0,%f16,%f0
+/* 0x12f0	1506 (24 27) */		fdtox	%f4,%f4
+/* 0x12f4	1507 (24 25) */		std	%f4,[%g2-32]
+/* 0x12f8	1508 (25 28) */		fdtox	%f2,%f2
+/* 0x12fc	1509 (25 26) */		std	%f2,[%o7-32]
+/* 0x1300	1510 (26 29) */		fdtox	%f0,%f0
+/* 0x1304	1511 (26 27) */		bcc,pn	%icc,.L77000056	! tprob=0.50
+/* 0x1308	     (26 27) */		std	%f0,[%o4-32]
+
+!
+! ENTRY .L77000054
+!
+
+                                   .L77000054:		/* frequency 1.0 confidence 0.0 */
+/* 0x130c	1514 ( 0  3) */		ldd	[%o1],%f0
+
+!
+! ENTRY .L990000161
+!
+
+                                   .L990000161:		/* frequency 1.0 confidence 0.0 */
+/* 0x1310	1516 ( 0  2) */		fxnor	%f14,%f0,%f0
+/* 0x1314	1517 ( 0  1) */		add	%g4,1,%g4
+/* 0x1318	1518 ( 0  1) */		add	%o1,8,%o1
+/* 0x131c	1519 ( 1  2) */		subcc	%g4,%o3,%g0
+/* 0x1320	1520 ( 2  5) */		fitod	%f0,%f2
+/* 0x1324	1521 ( 3  6) */		fitod	%f1,%f0
+/* 0x1328	1522 ( 5  8) */		fsubd	%f20,%f2,%f2
+/* 0x132c	1523 ( 6  9) */		fsubd	%f20,%f0,%f0
+/* 0x1330	1524 ( 8 11) */		fmuld	%f2,%f18,%f6
+/* 0x1334	1525 ( 9 12) */		fmuld	%f2,%f16,%f4
+/* 0x1338	1526 (10 13) */		fmuld	%f0,%f18,%f2
+/* 0x133c	1527 (11 14) */		fdtox	%f6,%f6
+/* 0x1340	1528 (11 12) */		std	%f6,[%g3]
+/* 0x1344	1529 (11 14) */		fmuld	%f0,%f16,%f0
+/* 0x1348	1530 (12 15) */		fdtox	%f4,%f4
+/* 0x134c	1531 (12 13) */		std	%f4,[%g2]
+/* 0x1350	1532 (12 13) */		add	%g2,32,%g2
+/* 0x1354	1533 (13 16) */		fdtox	%f2,%f2
+/* 0x1358	1534 (13 14) */		std	%f2,[%o7]
+/* 0x135c	1535 (13 14) */		add	%o7,32,%o7
+/* 0x1360	1536 (14 17) */		fdtox	%f0,%f0
+/* 0x1364	1537 (14 15) */		std	%f0,[%o4]
+/* 0x1368	1538 (14 15) */		add	%o4,32,%o4
+/* 0x136c	1539 (15 16) */		add	%g3,32,%g3
+/* 0x1370	1540 (15 16) */		bcs,a,pt	%icc,.L990000161	! tprob=0.50
+/* 0x1374	     (16 19) */		ldd	[%o1],%f0
+
+!
+! ENTRY .L77000056
+!
+
+                                   .L77000056:		/* frequency 1.0 confidence 0.0 */
+/* 0x1378	1548 ( 0  1) */		subcc	%o0,0,%g0
+
+!
+! ENTRY .L990000162
+!
+
+                                   .L990000162:		/* frequency 1.0 confidence 0.0 */
+/* 0x137c	1550 ( 0  1) */		bleu,pt	%icc,.L77770061	! tprob=0.50
+/* 0x1380	     ( 0  1) */		nop
+/* 0x1384	1555 ( 0  1) */		sethi	%hi(0x1000),%g1
+/* 0x1388	1556 ( 1  2) */		xor	%g1,-625,%g1
+/* 0x138c	1557 ( 1  2) */		or	%g0,%i1,%g4
+/* 0x1390	1558 ( 2  3) */		add	%g1,%fp,%g5
+/* 0x1394	1559 ( 2  3) */		sethi	%hi(0x1000),%g1
+/* 0x1398	1560 ( 3  4) */		xor	%g1,-617,%g1
+/* 0x139c	1561 ( 3  4) */		or	%g0,%o0,%o7
+/* 0x13a0	1562 ( 4  5) */		add	%g1,%fp,%g2
+/* 0x13a4	1563 ( 4  5) */		or	%g0,0,%i2
+/* 0x13a8	1564 ( 5  6) */		or	%g0,%i0,%g3
+/* 0x13ac	1565 ( 5  6) */		subcc	%o0,6,%g0
+/* 0x13b0	1566 ( 5  6) */		bl,pn	%icc,.L77000058	! tprob=0.50
+/* 0x13b4	     ( 6  7) */		sethi	%hi(0x1000),%g1
+/* 0x13b8	1568 ( 6  8) */		ld	[%g4],%o2
+/* 0x13bc	1569 ( 6  7) */		add	%g3,4,%g3
+/* 0x13c0	1570 ( 7  8) */		xor	%g1,-585,%g1
+/* 0x13c4	1571 ( 7  8) */		sub	%o7,3,%o4
+/* 0x13c8	1572 ( 8  9) */		add	%g1,%fp,%g2
+/* 0x13cc	1573 ( 8  9) */		sethi	%hi(0x1000),%g1
+/* 0x13d0	1574 ( 9 10) */		xor	%g1,-593,%g1
+/* 0x13d4	1575 ( 9 10) */		or	%g0,2,%i2
+/* 0x13d8	1576 (10 11) */		add	%g1,%fp,%g5
+/* 0x13dc	1577 (10 11) */		sethi	%hi(0x1000),%g1
+/* 0x13e0	1578 (11 12) */		xor	%g1,-617,%g1
+/* 0x13e4	1579 (12 13) */		add	%g1,%fp,%g1
+/* 0x13e8	1580 (13 15) */		ldx	[%g1],%o1
+/* 0x13ec	1581 (14 16) */		ldx	[%g1-8],%o0
+/* 0x13f0	1582 (15 16) */		sllx	%o1,19,%o1
+/* 0x13f4	1583 (15 17) */		ldx	[%g1+16],%o3
+/* 0x13f8	1584 (16 17) */		add	%o0,%o1,%o0
+/* 0x13fc	1585 (16 18) */		ld	[%g4+4],%o1
+/* 0x1400	1586 (16 17) */		add	%g4,8,%g4
+/* 0x1404	1587 (17 18) */		sllx	%o3,19,%o3
+/* 0x1408	1588 (17 18) */		add	%o0,%o2,%o0
+/* 0x140c	1589 (17 19) */		ldx	[%g1+8],%o2
+/* 0x1410	1590 (18 19) */		st	%o0,[%g3-4]
+/* 0x1414	1591 (18 19) */		srlx	%o0,32,%o0
+
+!
+! ENTRY .L990000142
+!
+
+                                   .L990000142:		/* frequency 1.0 confidence 0.0 */
+/* 0x1418	1593 ( 0  1) */		add	%o2,%o3,%o2
+/* 0x141c	1594 ( 0  1) */		add	%i2,4,%i2
+/* 0x1420	1595 ( 0  2) */		ld	[%g4],%o3
+/* 0x1424	1596 ( 1  2) */		srl	%o0,0,%o5
+/* 0x1428	1597 ( 1  2) */		add	%o2,%o1,%o1
+/* 0x142c	1598 ( 1  3) */		ldx	[%g2],%o0
+/* 0x1430	1599 ( 3  4) */		sllx	%o0,19,%o2
+/* 0x1434	1600 ( 3  5) */		ldx	[%g5],%o0
+/* 0x1438	1601 ( 3  4) */		add	%o1,%o5,%o1
+/* 0x143c	1602 ( 4  5) */		st	%o1,[%g3]
+/* 0x1440	1603 ( 4  5) */		srlx	%o1,32,%o5
+/* 0x1444	1604 ( 4  5) */		subcc	%i2,%o4,%g0
+/* 0x1448	1605 ( 5  7) */		ldx	[%g2+16],%o1
+/* 0x144c	1606 ( 5  6) */		add	%o0,%o2,%o0
+/* 0x1450	1607 ( 5  6) */		add	%g3,16,%g3
+/* 0x1454	1608 ( 6  8) */		ld	[%g4+4],%o2
+/* 0x1458	1609 ( 6  7) */		add	%o0,%o3,%o0
+/* 0x145c	1610 ( 7  8) */		sllx	%o1,19,%o3
+/* 0x1460	1611 ( 7  9) */		ldx	[%g5+16],%o1
+/* 0x1464	1612 ( 7  8) */		add	%o0,%o5,%o0
+/* 0x1468	1613 ( 8  9) */		st	%o0,[%g3-12]
+/* 0x146c	1614 ( 8  9) */		srlx	%o0,32,%o5
+/* 0x1470	1615 ( 8  9) */		add	%g4,16,%g4
+/* 0x1474	1616 ( 9 11) */		ldx	[%g2+32],%o0
+/* 0x1478	1617 ( 9 10) */		add	%o1,%o3,%o1
+/* 0x147c	1618 ( 9 10) */		add	%g2,64,%g2
+/* 0x1480	1619 (10 12) */		ld	[%g4-8],%o3
+/* 0x1484	1620 (10 11) */		add	%o1,%o2,%o2
+/* 0x1488	1621 (11 12) */		sllx	%o0,19,%o1
+/* 0x148c	1622 (11 13) */		ldx	[%g5+32],%o0
+/* 0x1490	1623 (11 12) */		add	%o2,%o5,%o2
+/* 0x1494	1624 (12 13) */		st	%o2,[%g3-8]
+/* 0x1498	1625 (12 13) */		srlx	%o2,32,%o5
+/* 0x149c	1626 (12 13) */		add	%g5,64,%g5
+/* 0x14a0	1627 (13 15) */		ldx	[%g2-16],%o2
+/* 0x14a4	1628 (13 14) */		add	%o0,%o1,%o0
+/* 0x14a8	1629 (14 16) */		ld	[%g4-4],%o1
+/* 0x14ac	1630 (14 15) */		add	%o0,%o3,%o0
+/* 0x14b0	1631 (15 16) */		sllx	%o2,19,%o3
+/* 0x14b4	1632 (15 17) */		ldx	[%g5-16],%o2
+/* 0x14b8	1633 (15 16) */		add	%o0,%o5,%o0
+/* 0x14bc	1634 (16 17) */		st	%o0,[%g3-4]
+/* 0x14c0	1635 (16 17) */		bcs,pt	%icc,.L990000142	! tprob=0.50
+/* 0x14c4	     (16 17) */		srlx	%o0,32,%o0
+
+!
+! ENTRY .L990000145
+!
+
+                                   .L990000145:		/* frequency 1.0 confidence 0.0 */
+/* 0x14c8	1638 ( 0  1) */		add	%o2,%o3,%o3
+/* 0x14cc	1639 ( 0  1) */		add	%g3,4,%g3
+/* 0x14d0	1640 ( 1  2) */		srl	%o0,0,%o2
+/* 0x14d4	1641 ( 1  2) */		add	%o3,%o1,%o0
+/* 0x14d8	1642 ( 2  3) */		add	%o0,%o2,%o0
+/* 0x14dc	1643 ( 2  3) */		st	%o0,[%g3-4]
+/* 0x14e0	1644 ( 2  3) */		subcc	%i2,%o7,%g0
+/* 0x14e4	1645 ( 2  3) */		bcc,pn	%icc,.L77770061	! tprob=0.50
+/* 0x14e8	     ( 3  4) */		srlx	%o0,32,%o5
+
+!
+! ENTRY .L77000058
+!
+
+                                   .L77000058:		/* frequency 1.0 confidence 0.0 */
+/* 0x14ec	1648 ( 0  2) */		ldx	[%g2],%o2
+
+!
+! ENTRY .L990000160
+!
+
+                                   .L990000160:		/* frequency 1.0 confidence 0.0 */
+/* 0x14f0	1650 ( 0  1) */		sllx	%o2,19,%o3
+/* 0x14f4	1651 ( 0  2) */		ldx	[%g5],%o0
+/* 0x14f8	1652 ( 0  1) */		add	%i2,1,%i2
+/* 0x14fc	1653 ( 1  2) */		srl	%o5,0,%o1
+/* 0x1500	1654 ( 1  3) */		ld	[%g4],%o2
+/* 0x1504	1655 ( 1  2) */		add	%g2,16,%g2
+/* 0x1508	1656 ( 2  3) */		add	%o0,%o3,%o0
+/* 0x150c	1657 ( 2  3) */		add	%g5,16,%g5
+/* 0x1510	1658 ( 3  4) */		add	%o0,%o2,%o0
+/* 0x1514	1659 ( 3  4) */		add	%g4,4,%g4
+/* 0x1518	1660 ( 4  5) */		add	%o0,%o1,%o0
+/* 0x151c	1661 ( 4  5) */		st	%o0,[%g3]
+/* 0x1520	1662 ( 4  5) */		subcc	%i2,%o7,%g0
+/* 0x1524	1663 ( 5  6) */		srlx	%o0,32,%o5
+/* 0x1528	1664 ( 5  6) */		add	%g3,4,%g3
+/* 0x152c	1665 ( 5  6) */		bcs,a,pt	%icc,.L990000160	! tprob=0.50
+/* 0x1530	     ( 6  8) */		ldx	[%g2],%o2
+
+!
+! ENTRY .L77770061
+!
+
+                                   .L77770061:		/* frequency 1.0 confidence 0.0 */
+/* 0x1534	     ( 0  2) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x1538	     ( 2  3) */		restore	%g0,%o5,%o0
+
+
+/* 0x124c	1476 ( 0  0) */		.type	mul_add,2
+/* 0x124c	1477 ( 0  0) */		.size	mul_add,(.-mul_add)
+/* 0x124c	1480 ( 0  0) */		.align	8
+/* 0x1250	1486 ( 0  0) */		.global	mul_add_inp
+
+!
+! ENTRY mul_add_inp
+!
+
+                                   	.global mul_add_inp
+                                   mul_add_inp:		/* frequency 1.0 confidence 0.0 */
+/* 0x1250	1488 ( 0  1) */		save	%sp,-176,%sp
+/* 0x1254	1500 ( 1  2) */		sra	%i2,0,%o3
+/* 0x1258	1501 ( 1  2) */		or	%g0,%i1,%o2
+/* 0x125c	1502 ( 2  3) */		or	%g0,%i0,%o0
+/* 0x1260	1503 ( 2  3) */		or	%g0,%i0,%o1
+/* 0x1264	1504 ( 3  5) */		call	mul_add	! params = 	! Result = 
+/* 0x1268	     ( 4  5) */		srl	%i3,0,%o4
+/* 0x126c	1506 ( 5  6) */		srl	%o0,0,%i0
+/* 0x1270	     ( 6  8) */		ret	! Result =  %o1 %o0 %f0 %f1
+/* 0x1274	     ( 8  9) */		restore	%g0,%g0,%g0
+/* 0x1278	1509 ( 0  0) */		.type	mul_add_inp,2
+/* 0x1278	1510 ( 0  0) */		.size	mul_add_inp,(.-mul_add_inp)
+
+	.section	".data",#alloc,#write
+/* 0x1278	   6 ( 0  0) */		.align	8
+
+!
+! ENTRY mask_cnst
+!
+
+                                   mask_cnst:		/* frequency 1.0 confidence 0.0 */
+/* 0x1278	   8 ( 0  0) */		.xword	-9223372034707292160
+/* 0x1280	   9 ( 0  0) */		.type	mask_cnst,#object
+/* 0x1280	  10 ( 0  0) */		.size	mask_cnst,8
+
diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c
new file mode 100644
index 000000000..94e86eedb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpvalpha.c
@@ -0,0 +1,183 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include <c_asm.h>
+
+#define MP_MUL_DxD(a, b, Phi, Plo)              \
+    {                                           \
+        Plo = asm("mulq %a0, %a1, %v0", a, b);  \
+        Phi = asm("umulh %a0, %a1, %v0", a, b); \
+    }
+
+/* This is empty for the loop in s_mpv_mul_d    */
+#define CARRY_ADD
+
+#define ONE_MUL                     \
+    a_i = *a++;                     \
+    MP_MUL_DxD(a_i, b, a1b1, a0b0); \
+    a0b0 += carry;                  \
+    if (a0b0 < carry)               \
+        ++a1b1;                     \
+    CARRY_ADD                       \
+    *c++ = a0b0;                    \
+    carry = a1b1;
+
+#define FOUR_MUL \
+    ONE_MUL      \
+    ONE_MUL      \
+    ONE_MUL      \
+    ONE_MUL
+
+#define SIXTEEN_MUL \
+    FOUR_MUL        \
+    FOUR_MUL        \
+    FOUR_MUL        \
+    FOUR_MUL
+
+#define THIRTYTWO_MUL \
+    SIXTEEN_MUL       \
+    SIXTEEN_MUL
+
+#define ONETWENTYEIGHT_MUL \
+    THIRTYTWO_MUL          \
+    THIRTYTWO_MUL          \
+    THIRTYTWO_MUL          \
+    THIRTYTWO_MUL
+
+#define EXPAND_256(CALL)                     \
+    mp_digit carry = 0;                      \
+    mp_digit a_i;                            \
+    mp_digit a0b0, a1b1;                     \
+    if (a_len & 255) {                       \
+        if (a_len & 1) {                     \
+            ONE_MUL                          \
+        }                                    \
+        if (a_len & 2) {                     \
+            ONE_MUL                          \
+            ONE_MUL                          \
+        }                                    \
+        if (a_len & 4) {                     \
+            FOUR_MUL                         \
+        }                                    \
+        if (a_len & 8) {                     \
+            FOUR_MUL                         \
+            FOUR_MUL                         \
+        }                                    \
+        if (a_len & 16) {                    \
+            SIXTEEN_MUL                      \
+        }                                    \
+        if (a_len & 32) {                    \
+            THIRTYTWO_MUL                    \
+        }                                    \
+        if (a_len & 64) {                    \
+            THIRTYTWO_MUL                    \
+            THIRTYTWO_MUL                    \
+        }                                    \
+        if (a_len & 128) {                   \
+            ONETWENTYEIGHT_MUL               \
+        }                                    \
+        a_len = a_len & (-256);              \
+    }                                        \
+    if (a_len >= 256) {                      \
+        carry = CALL(a, a_len, b, c, carry); \
+        c += a_len;                          \
+    }
+
+#define FUNC_NAME(NAME)                    \
+    mp_digit NAME(const mp_digit *a,       \
+                  mp_size a_len,           \
+                  mp_digit b, mp_digit *c, \
+                  mp_digit carry)
+
+#define DECLARE_MUL_256(FNAME) \
+    FUNC_NAME(FNAME)           \
+    {                          \
+        mp_digit a_i;          \
+        mp_digit a0b0, a1b1;   \
+        while (a_len) {        \
+            ONETWENTYEIGHT_MUL \
+            ONETWENTYEIGHT_MUL \
+            a_len -= 256;      \
+        }                      \
+        return carry;          \
+    }
+
+/* Expanding the loop in s_mpv_mul_d appeared to slow down the
+   (admittedly) small number of tests (i.e., timetest) used to
+   measure performance, so this define disables that optimization. */
+#define DO_NOT_EXPAND 1
+
+/* Need forward declaration so it can be instantiated after
+   the routine that uses it; this helps locality somewhat  */
+#if !defined(DO_NOT_EXPAND)
+FUNC_NAME(s_mpv_mul_d_MUL256);
+#endif
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+            mp_digit b, mp_digit *c)
+{
+#if defined(DO_NOT_EXPAND)
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        if (a0b0 < carry)
+            ++a1b1;
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+#else
+    EXPAND_256(s_mpv_mul_d_MUL256)
+#endif
+    *c = carry;
+}
+
+#if !defined(DO_NOT_EXPAND)
+DECLARE_MUL_256(s_mpv_mul_d_MUL256)
+#endif
+
+#undef CARRY_ADD
+/* This is redefined for the loop in s_mpv_mul_d_add */
+#define CARRY_ADD     \
+    a0b0 += a_i = *c; \
+    if (a0b0 < a_i)   \
+        ++a1b1;
+
+/* Need forward declaration so it can be instantiated between the
+   two routines that use it; this helps locality somewhat  */
+FUNC_NAME(s_mpv_mul_d_add_MUL256);
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+                mp_digit b, mp_digit *c)
+{
+    EXPAND_256(s_mpv_mul_d_add_MUL256)
+    *c = carry;
+}
+
+/* Instantiate multiply 256 routine here */
+DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+                     mp_digit b, mp_digit *c)
+{
+    EXPAND_256(s_mpv_mul_d_add_MUL256)
+    while (carry) {
+        mp_digit c_i = *c;
+        carry += c_i;
+        *c++ = carry;
+        carry = carry < c_i;
+    }
+}
diff --git a/security/nss/lib/freebl/mpi/mulsqr.c b/security/nss/lib/freebl/mpi/mulsqr.c
new file mode 100644
index 000000000..461d40ab3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mulsqr.c
@@ -0,0 +1,84 @@
+/*
+ * Test whether to include squaring code given the current settings
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <time.h>
+
+#define MP_SQUARE 1 /* make sure squaring code is included */
+
+#include "mpi.h"
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+    int ntests, prec, ix;
+    unsigned int seed;
+    clock_t start, stop;
+    double multime, sqrtime;
+    mp_int a, c;
+
+    seed = (unsigned int)time(NULL);
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <ntests> <nbits>\n", argv[0]);
+        return 1;
+    }
+
+    if ((ntests = abs(atoi(argv[1]))) == 0) {
+        fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]);
+        return 1;
+    }
+    if ((prec = abs(atoi(argv[2]))) < CHAR_BIT) {
+        fprintf(stderr, "%s: must request at least %d bits.\n", argv[0],
+                CHAR_BIT);
+        return 1;
+    }
+
+    prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;
+
+    mp_init_size(&a, prec);
+    mp_init_size(&c, 2 * prec);
+
+    /* Test multiplication by self */
+    srand(seed);
+    start = clock();
+    for (ix = 0; ix < ntests; ix++) {
+        mpp_random_size(&a, prec);
+        mp_mul(&a, &a, &c);
+    }
+    stop = clock();
+
+    multime = (double)(stop - start) / CLOCKS_PER_SEC;
+
+    /* Test squaring */
+    srand(seed);
+    start = clock();
+    for (ix = 0; ix < ntests; ix++) {
+        mpp_random_size(&a, prec);
+        mp_sqr(&a, &c);
+    }
+    stop = clock();
+
+    sqrtime = (double)(stop - start) / CLOCKS_PER_SEC;
+
+    printf("Multiply: %.4f\n", multime);
+    printf("Square:   %.4f\n", sqrtime);
+    if (multime < sqrtime) {
+        printf("Speedup:  %.1f%%\n", 100.0 * (1.0 - multime / sqrtime));
+        printf("Prefer:   multiply\n");
+    } else {
+        printf("Speedup:  %.1f%%\n", 100.0 * (1.0 - sqrtime / multime));
+        printf("Prefer:   square\n");
+    }
+
+    mp_clear(&a);
+    mp_clear(&c);
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/multest b/security/nss/lib/freebl/mpi/multest
new file mode 100755
index 000000000..24752e019
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/multest
@@ -0,0 +1,76 @@
+#!/bin/sh
+#
+# multest
+#
+# Run multiply and square timing tests, to compute a chart for the
+# current processor and compiler combination.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ECHO=/bin/echo
+MAKE=gmake
+
+$ECHO "\n** Running multiply and square timing tests\n"
+
+$ECHO "Bringing 'mulsqr' up to date ... "
+if $MAKE mulsqr ; then
+    :
+else
+    $ECHO "\nMake failed to build mulsqr.\n"
+    exit 1
+fi
+
+if [ ! -x ./mulsqr ] ; then
+    $ECHO "\nCannot find 'mulsqr' program, testing cannot continue.\n"
+    exit 1
+fi
+
+sizes='64 128 192 256 320 384 448 512 640 768 896 1024 1536 2048'
+ntests=500000
+
+$ECHO "Running timing tests, please wait ... "
+
+trap 'echo "oop!";rm -f tt*.tmp;exit 0' INT HUP
+
+touch tt$$.tmp
+$ECHO $ntests tests >> tt$$.tmp
+for size in $sizes ; do
+    $ECHO "$size bits ... \c"
+    set -A res `./mulsqr $ntests $size|head -3|tr -d '%'|awk '{print $2}'`
+    $ECHO $size"\t"${res[0]}"\t"${res[1]}"\t"${res[2]} >> tt$$.tmp
+    $ECHO "(done)"
+done
+mv tt$$.tmp mulsqr-results.txt
+rm -f tt$$.tmp
+
+$ECHO "\n** Running Karatsuba-Ofman multiplication tests\n"
+
+$ECHO "Brining 'karatsuba' up to date ... "
+if $MAKE karatsuba ; then
+    :
+else
+    $ECHO "\nMake failed to build karatsuba.\n"
+    exit 1
+fi
+
+if [ ! -x ./karatsuba ] ; then
+    $ECHO "\nCannot find 'karatsuba' program, testing cannot continue.\n"
+    exit 1
+fi
+
+ntests=100000
+
+trap 'echo "oop!";rm -f tt*.tmp;exit 0' INT HUP
+
+touch tt$$.tmp
+for size in $sizes ; do
+    $ECHO "$size bits ... "
+    ./karatsuba $ntests $size >> tt$$.tmp
+    tail -2 tt$$.tmp
+done
+mv tt$$.tmp karatsuba-results.txt
+rm -f tt$$.tmp
+
+exit 0
diff --git a/security/nss/lib/freebl/mpi/primes.c b/security/nss/lib/freebl/mpi/primes.c
new file mode 100644
index 000000000..c8bd93ff9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/primes.c
@@ -0,0 +1,841 @@
+/*
+ * These tables of primes wwere generated using the 'sieve' program
+ * (sieve.c) and converted to this format with 'ptab.pl'.
+ *
+ * The 'small' table is just the first 128 primes.  The 'large' table
+ * is a table of all the prime values that will fit into a single
+ * mp_digit (given the current size of an mp_digit, which is two bytes).
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if SMALL_TABLE
+#define MP_PRIME_TAB_SIZE 128
+#else
+#define MP_PRIME_TAB_SIZE 6542
+#endif
+
+const int prime_tab_size = MP_PRIME_TAB_SIZE;
+const mp_digit prime_tab[] = {
+    0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+    0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+    0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+    0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+    0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+    0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+    0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+    0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+    0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+    0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+    0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+    0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+    0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+    0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+    0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+    0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+#if !SMALL_TABLE
+    0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+    0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+    0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+    0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+    0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+    0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+    0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+    0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+    0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+    0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+    0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+    0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+    0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+    0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+    0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+    0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653,
+    0x0655, 0x065B, 0x0665, 0x0679, 0x067F, 0x0683, 0x0685, 0x069D,
+    0x06A1, 0x06A3, 0x06AD, 0x06B9, 0x06BB, 0x06C5, 0x06CD, 0x06D3,
+    0x06D9, 0x06DF, 0x06F1, 0x06F7, 0x06FB, 0x06FD, 0x0709, 0x0713,
+    0x071F, 0x0727, 0x0737, 0x0745, 0x074B, 0x074F, 0x0751, 0x0755,
+    0x0757, 0x0761, 0x076D, 0x0773, 0x0779, 0x078B, 0x078D, 0x079D,
+    0x079F, 0x07B5, 0x07BB, 0x07C3, 0x07C9, 0x07CD, 0x07CF, 0x07D3,
+    0x07DB, 0x07E1, 0x07EB, 0x07ED, 0x07F7, 0x0805, 0x080F, 0x0815,
+    0x0821, 0x0823, 0x0827, 0x0829, 0x0833, 0x083F, 0x0841, 0x0851,
+    0x0853, 0x0859, 0x085D, 0x085F, 0x0869, 0x0871, 0x0883, 0x089B,
+    0x089F, 0x08A5, 0x08AD, 0x08BD, 0x08BF, 0x08C3, 0x08CB, 0x08DB,
+    0x08DD, 0x08E1, 0x08E9, 0x08EF, 0x08F5, 0x08F9, 0x0905, 0x0907,
+    0x091D, 0x0923, 0x0925, 0x092B, 0x092F, 0x0935, 0x0943, 0x0949,
+    0x094D, 0x094F, 0x0955, 0x0959, 0x095F, 0x096B, 0x0971, 0x0977,
+    0x0985, 0x0989, 0x098F, 0x099B, 0x09A3, 0x09A9, 0x09AD, 0x09C7,
+    0x09D9, 0x09E3, 0x09EB, 0x09EF, 0x09F5, 0x09F7, 0x09FD, 0x0A13,
+    0x0A1F, 0x0A21, 0x0A31, 0x0A39, 0x0A3D, 0x0A49, 0x0A57, 0x0A61,
+    0x0A63, 0x0A67, 0x0A6F, 0x0A75, 0x0A7B, 0x0A7F, 0x0A81, 0x0A85,
+    0x0A8B, 0x0A93, 0x0A97, 0x0A99, 0x0A9F, 0x0AA9, 0x0AAB, 0x0AB5,
+    0x0ABD, 0x0AC1, 0x0ACF, 0x0AD9, 0x0AE5, 0x0AE7, 0x0AED, 0x0AF1,
+    0x0AF3, 0x0B03, 0x0B11, 0x0B15, 0x0B1B, 0x0B23, 0x0B29, 0x0B2D,
+    0x0B3F, 0x0B47, 0x0B51, 0x0B57, 0x0B5D, 0x0B65, 0x0B6F, 0x0B7B,
+    0x0B89, 0x0B8D, 0x0B93, 0x0B99, 0x0B9B, 0x0BB7, 0x0BB9, 0x0BC3,
+    0x0BCB, 0x0BCF, 0x0BDD, 0x0BE1, 0x0BE9, 0x0BF5, 0x0BFB, 0x0C07,
+    0x0C0B, 0x0C11, 0x0C25, 0x0C2F, 0x0C31, 0x0C41, 0x0C5B, 0x0C5F,
+    0x0C61, 0x0C6D, 0x0C73, 0x0C77, 0x0C83, 0x0C89, 0x0C91, 0x0C95,
+    0x0C9D, 0x0CB3, 0x0CB5, 0x0CB9, 0x0CBB, 0x0CC7, 0x0CE3, 0x0CE5,
+    0x0CEB, 0x0CF1, 0x0CF7, 0x0CFB, 0x0D01, 0x0D03, 0x0D0F, 0x0D13,
+    0x0D1F, 0x0D21, 0x0D2B, 0x0D2D, 0x0D3D, 0x0D3F, 0x0D4F, 0x0D55,
+    0x0D69, 0x0D79, 0x0D81, 0x0D85, 0x0D87, 0x0D8B, 0x0D8D, 0x0DA3,
+    0x0DAB, 0x0DB7, 0x0DBD, 0x0DC7, 0x0DC9, 0x0DCD, 0x0DD3, 0x0DD5,
+    0x0DDB, 0x0DE5, 0x0DE7, 0x0DF3, 0x0DFD, 0x0DFF, 0x0E09, 0x0E17,
+    0x0E1D, 0x0E21, 0x0E27, 0x0E2F, 0x0E35, 0x0E3B, 0x0E4B, 0x0E57,
+    0x0E59, 0x0E5D, 0x0E6B, 0x0E71, 0x0E75, 0x0E7D, 0x0E87, 0x0E8F,
+    0x0E95, 0x0E9B, 0x0EB1, 0x0EB7, 0x0EB9, 0x0EC3, 0x0ED1, 0x0ED5,
+    0x0EDB, 0x0EED, 0x0EEF, 0x0EF9, 0x0F07, 0x0F0B, 0x0F0D, 0x0F17,
+    0x0F25, 0x0F29, 0x0F31, 0x0F43, 0x0F47, 0x0F4D, 0x0F4F, 0x0F53,
+    0x0F59, 0x0F5B, 0x0F67, 0x0F6B, 0x0F7F, 0x0F95, 0x0FA1, 0x0FA3,
+    0x0FA7, 0x0FAD, 0x0FB3, 0x0FB5, 0x0FBB, 0x0FD1, 0x0FD3, 0x0FD9,
+    0x0FE9, 0x0FEF, 0x0FFB, 0x0FFD, 0x1003, 0x100F, 0x101F, 0x1021,
+    0x1025, 0x102B, 0x1039, 0x103D, 0x103F, 0x1051, 0x1069, 0x1073,
+    0x1079, 0x107B, 0x1085, 0x1087, 0x1091, 0x1093, 0x109D, 0x10A3,
+    0x10A5, 0x10AF, 0x10B1, 0x10BB, 0x10C1, 0x10C9, 0x10E7, 0x10F1,
+    0x10F3, 0x10FD, 0x1105, 0x110B, 0x1115, 0x1127, 0x112D, 0x1139,
+    0x1145, 0x1147, 0x1159, 0x115F, 0x1163, 0x1169, 0x116F, 0x1181,
+    0x1183, 0x118D, 0x119B, 0x11A1, 0x11A5, 0x11A7, 0x11AB, 0x11C3,
+    0x11C5, 0x11D1, 0x11D7, 0x11E7, 0x11EF, 0x11F5, 0x11FB, 0x120D,
+    0x121D, 0x121F, 0x1223, 0x1229, 0x122B, 0x1231, 0x1237, 0x1241,
+    0x1247, 0x1253, 0x125F, 0x1271, 0x1273, 0x1279, 0x127D, 0x128F,
+    0x1297, 0x12AF, 0x12B3, 0x12B5, 0x12B9, 0x12BF, 0x12C1, 0x12CD,
+    0x12D1, 0x12DF, 0x12FD, 0x1307, 0x130D, 0x1319, 0x1327, 0x132D,
+    0x1337, 0x1343, 0x1345, 0x1349, 0x134F, 0x1357, 0x135D, 0x1367,
+    0x1369, 0x136D, 0x137B, 0x1381, 0x1387, 0x138B, 0x1391, 0x1393,
+    0x139D, 0x139F, 0x13AF, 0x13BB, 0x13C3, 0x13D5, 0x13D9, 0x13DF,
+    0x13EB, 0x13ED, 0x13F3, 0x13F9, 0x13FF, 0x141B, 0x1421, 0x142F,
+    0x1433, 0x143B, 0x1445, 0x144D, 0x1459, 0x146B, 0x146F, 0x1471,
+    0x1475, 0x148D, 0x1499, 0x149F, 0x14A1, 0x14B1, 0x14B7, 0x14BD,
+    0x14CB, 0x14D5, 0x14E3, 0x14E7, 0x1505, 0x150B, 0x1511, 0x1517,
+    0x151F, 0x1525, 0x1529, 0x152B, 0x1537, 0x153D, 0x1541, 0x1543,
+    0x1549, 0x155F, 0x1565, 0x1567, 0x156B, 0x157D, 0x157F, 0x1583,
+    0x158F, 0x1591, 0x1597, 0x159B, 0x15B5, 0x15BB, 0x15C1, 0x15C5,
+    0x15CD, 0x15D7, 0x15F7, 0x1607, 0x1609, 0x160F, 0x1613, 0x1615,
+    0x1619, 0x161B, 0x1625, 0x1633, 0x1639, 0x163D, 0x1645, 0x164F,
+    0x1655, 0x1669, 0x166D, 0x166F, 0x1675, 0x1693, 0x1697, 0x169F,
+    0x16A9, 0x16AF, 0x16B5, 0x16BD, 0x16C3, 0x16CF, 0x16D3, 0x16D9,
+    0x16DB, 0x16E1, 0x16E5, 0x16EB, 0x16ED, 0x16F7, 0x16F9, 0x1709,
+    0x170F, 0x1723, 0x1727, 0x1733, 0x1741, 0x175D, 0x1763, 0x1777,
+    0x177B, 0x178D, 0x1795, 0x179B, 0x179F, 0x17A5, 0x17B3, 0x17B9,
+    0x17BF, 0x17C9, 0x17CB, 0x17D5, 0x17E1, 0x17E9, 0x17F3, 0x17F5,
+    0x17FF, 0x1807, 0x1813, 0x181D, 0x1835, 0x1837, 0x183B, 0x1843,
+    0x1849, 0x184D, 0x1855, 0x1867, 0x1871, 0x1877, 0x187D, 0x187F,
+    0x1885, 0x188F, 0x189B, 0x189D, 0x18A7, 0x18AD, 0x18B3, 0x18B9,
+    0x18C1, 0x18C7, 0x18D1, 0x18D7, 0x18D9, 0x18DF, 0x18E5, 0x18EB,
+    0x18F5, 0x18FD, 0x1915, 0x191B, 0x1931, 0x1933, 0x1945, 0x1949,
+    0x1951, 0x195B, 0x1979, 0x1981, 0x1993, 0x1997, 0x1999, 0x19A3,
+    0x19A9, 0x19AB, 0x19B1, 0x19B5, 0x19C7, 0x19CF, 0x19DB, 0x19ED,
+    0x19FD, 0x1A03, 0x1A05, 0x1A11, 0x1A17, 0x1A21, 0x1A23, 0x1A2D,
+    0x1A2F, 0x1A35, 0x1A3F, 0x1A4D, 0x1A51, 0x1A69, 0x1A6B, 0x1A7B,
+    0x1A7D, 0x1A87, 0x1A89, 0x1A93, 0x1AA7, 0x1AAB, 0x1AAD, 0x1AB1,
+    0x1AB9, 0x1AC9, 0x1ACF, 0x1AD5, 0x1AD7, 0x1AE3, 0x1AF3, 0x1AFB,
+    0x1AFF, 0x1B05, 0x1B23, 0x1B25, 0x1B2F, 0x1B31, 0x1B37, 0x1B3B,
+    0x1B41, 0x1B47, 0x1B4F, 0x1B55, 0x1B59, 0x1B65, 0x1B6B, 0x1B73,
+    0x1B7F, 0x1B83, 0x1B91, 0x1B9D, 0x1BA7, 0x1BBF, 0x1BC5, 0x1BD1,
+    0x1BD7, 0x1BD9, 0x1BEF, 0x1BF7, 0x1C09, 0x1C13, 0x1C19, 0x1C27,
+    0x1C2B, 0x1C2D, 0x1C33, 0x1C3D, 0x1C45, 0x1C4B, 0x1C4F, 0x1C55,
+    0x1C73, 0x1C81, 0x1C8B, 0x1C8D, 0x1C99, 0x1CA3, 0x1CA5, 0x1CB5,
+    0x1CB7, 0x1CC9, 0x1CE1, 0x1CF3, 0x1CF9, 0x1D09, 0x1D1B, 0x1D21,
+    0x1D23, 0x1D35, 0x1D39, 0x1D3F, 0x1D41, 0x1D4B, 0x1D53, 0x1D5D,
+    0x1D63, 0x1D69, 0x1D71, 0x1D75, 0x1D7B, 0x1D7D, 0x1D87, 0x1D89,
+    0x1D95, 0x1D99, 0x1D9F, 0x1DA5, 0x1DA7, 0x1DB3, 0x1DB7, 0x1DC5,
+    0x1DD7, 0x1DDB, 0x1DE1, 0x1DF5, 0x1DF9, 0x1E01, 0x1E07, 0x1E0B,
+    0x1E13, 0x1E17, 0x1E25, 0x1E2B, 0x1E2F, 0x1E3D, 0x1E49, 0x1E4D,
+    0x1E4F, 0x1E6D, 0x1E71, 0x1E89, 0x1E8F, 0x1E95, 0x1EA1, 0x1EAD,
+    0x1EBB, 0x1EC1, 0x1EC5, 0x1EC7, 0x1ECB, 0x1EDD, 0x1EE3, 0x1EEF,
+    0x1EF7, 0x1EFD, 0x1F01, 0x1F0D, 0x1F0F, 0x1F1B, 0x1F39, 0x1F49,
+    0x1F4B, 0x1F51, 0x1F67, 0x1F75, 0x1F7B, 0x1F85, 0x1F91, 0x1F97,
+    0x1F99, 0x1F9D, 0x1FA5, 0x1FAF, 0x1FB5, 0x1FBB, 0x1FD3, 0x1FE1,
+    0x1FE7, 0x1FEB, 0x1FF3, 0x1FFF, 0x2011, 0x201B, 0x201D, 0x2027,
+    0x2029, 0x202D, 0x2033, 0x2047, 0x204D, 0x2051, 0x205F, 0x2063,
+    0x2065, 0x2069, 0x2077, 0x207D, 0x2089, 0x20A1, 0x20AB, 0x20B1,
+    0x20B9, 0x20C3, 0x20C5, 0x20E3, 0x20E7, 0x20ED, 0x20EF, 0x20FB,
+    0x20FF, 0x210D, 0x2113, 0x2135, 0x2141, 0x2149, 0x214F, 0x2159,
+    0x215B, 0x215F, 0x2173, 0x217D, 0x2185, 0x2195, 0x2197, 0x21A1,
+    0x21AF, 0x21B3, 0x21B5, 0x21C1, 0x21C7, 0x21D7, 0x21DD, 0x21E5,
+    0x21E9, 0x21F1, 0x21F5, 0x21FB, 0x2203, 0x2209, 0x220F, 0x221B,
+    0x2221, 0x2225, 0x222B, 0x2231, 0x2239, 0x224B, 0x224F, 0x2263,
+    0x2267, 0x2273, 0x2275, 0x227F, 0x2285, 0x2287, 0x2291, 0x229D,
+    0x229F, 0x22A3, 0x22B7, 0x22BD, 0x22DB, 0x22E1, 0x22E5, 0x22ED,
+    0x22F7, 0x2303, 0x2309, 0x230B, 0x2327, 0x2329, 0x232F, 0x2333,
+    0x2335, 0x2345, 0x2351, 0x2353, 0x2359, 0x2363, 0x236B, 0x2383,
+    0x238F, 0x2395, 0x23A7, 0x23AD, 0x23B1, 0x23BF, 0x23C5, 0x23C9,
+    0x23D5, 0x23DD, 0x23E3, 0x23EF, 0x23F3, 0x23F9, 0x2405, 0x240B,
+    0x2417, 0x2419, 0x2429, 0x243D, 0x2441, 0x2443, 0x244D, 0x245F,
+    0x2467, 0x246B, 0x2479, 0x247D, 0x247F, 0x2485, 0x249B, 0x24A1,
+    0x24AF, 0x24B5, 0x24BB, 0x24C5, 0x24CB, 0x24CD, 0x24D7, 0x24D9,
+    0x24DD, 0x24DF, 0x24F5, 0x24F7, 0x24FB, 0x2501, 0x2507, 0x2513,
+    0x2519, 0x2527, 0x2531, 0x253D, 0x2543, 0x254B, 0x254F, 0x2573,
+    0x2581, 0x258D, 0x2593, 0x2597, 0x259D, 0x259F, 0x25AB, 0x25B1,
+    0x25BD, 0x25CD, 0x25CF, 0x25D9, 0x25E1, 0x25F7, 0x25F9, 0x2605,
+    0x260B, 0x260F, 0x2615, 0x2627, 0x2629, 0x2635, 0x263B, 0x263F,
+    0x264B, 0x2653, 0x2659, 0x2665, 0x2669, 0x266F, 0x267B, 0x2681,
+    0x2683, 0x268F, 0x269B, 0x269F, 0x26AD, 0x26B3, 0x26C3, 0x26C9,
+    0x26CB, 0x26D5, 0x26DD, 0x26EF, 0x26F5, 0x2717, 0x2719, 0x2735,
+    0x2737, 0x274D, 0x2753, 0x2755, 0x275F, 0x276B, 0x276D, 0x2773,
+    0x2777, 0x277F, 0x2795, 0x279B, 0x279D, 0x27A7, 0x27AF, 0x27B3,
+    0x27B9, 0x27C1, 0x27C5, 0x27D1, 0x27E3, 0x27EF, 0x2803, 0x2807,
+    0x280D, 0x2813, 0x281B, 0x281F, 0x2821, 0x2831, 0x283D, 0x283F,
+    0x2849, 0x2851, 0x285B, 0x285D, 0x2861, 0x2867, 0x2875, 0x2881,
+    0x2897, 0x289F, 0x28BB, 0x28BD, 0x28C1, 0x28D5, 0x28D9, 0x28DB,
+    0x28DF, 0x28ED, 0x28F7, 0x2903, 0x2905, 0x2911, 0x2921, 0x2923,
+    0x293F, 0x2947, 0x295D, 0x2965, 0x2969, 0x296F, 0x2975, 0x2983,
+    0x2987, 0x298F, 0x299B, 0x29A1, 0x29A7, 0x29AB, 0x29BF, 0x29C3,
+    0x29D5, 0x29D7, 0x29E3, 0x29E9, 0x29ED, 0x29F3, 0x2A01, 0x2A13,
+    0x2A1D, 0x2A25, 0x2A2F, 0x2A4F, 0x2A55, 0x2A5F, 0x2A65, 0x2A6B,
+    0x2A6D, 0x2A73, 0x2A83, 0x2A89, 0x2A8B, 0x2A97, 0x2A9D, 0x2AB9,
+    0x2ABB, 0x2AC5, 0x2ACD, 0x2ADD, 0x2AE3, 0x2AEB, 0x2AF1, 0x2AFB,
+    0x2B13, 0x2B27, 0x2B31, 0x2B33, 0x2B3D, 0x2B3F, 0x2B4B, 0x2B4F,
+    0x2B55, 0x2B69, 0x2B6D, 0x2B6F, 0x2B7B, 0x2B8D, 0x2B97, 0x2B99,
+    0x2BA3, 0x2BA5, 0x2BA9, 0x2BBD, 0x2BCD, 0x2BE7, 0x2BEB, 0x2BF3,
+    0x2BF9, 0x2BFD, 0x2C09, 0x2C0F, 0x2C17, 0x2C23, 0x2C2F, 0x2C35,
+    0x2C39, 0x2C41, 0x2C57, 0x2C59, 0x2C69, 0x2C77, 0x2C81, 0x2C87,
+    0x2C93, 0x2C9F, 0x2CAD, 0x2CB3, 0x2CB7, 0x2CCB, 0x2CCF, 0x2CDB,
+    0x2CE1, 0x2CE3, 0x2CE9, 0x2CEF, 0x2CFF, 0x2D07, 0x2D1D, 0x2D1F,
+    0x2D3B, 0x2D43, 0x2D49, 0x2D4D, 0x2D61, 0x2D65, 0x2D71, 0x2D89,
+    0x2D9D, 0x2DA1, 0x2DA9, 0x2DB3, 0x2DB5, 0x2DC5, 0x2DC7, 0x2DD3,
+    0x2DDF, 0x2E01, 0x2E03, 0x2E07, 0x2E0D, 0x2E19, 0x2E1F, 0x2E25,
+    0x2E2D, 0x2E33, 0x2E37, 0x2E39, 0x2E3F, 0x2E57, 0x2E5B, 0x2E6F,
+    0x2E79, 0x2E7F, 0x2E85, 0x2E93, 0x2E97, 0x2E9D, 0x2EA3, 0x2EA5,
+    0x2EB1, 0x2EB7, 0x2EC1, 0x2EC3, 0x2ECD, 0x2ED3, 0x2EE7, 0x2EEB,
+    0x2F05, 0x2F09, 0x2F0B, 0x2F11, 0x2F27, 0x2F29, 0x2F41, 0x2F45,
+    0x2F4B, 0x2F4D, 0x2F51, 0x2F57, 0x2F6F, 0x2F75, 0x2F7D, 0x2F81,
+    0x2F83, 0x2FA5, 0x2FAB, 0x2FB3, 0x2FC3, 0x2FCF, 0x2FD1, 0x2FDB,
+    0x2FDD, 0x2FE7, 0x2FED, 0x2FF5, 0x2FF9, 0x3001, 0x300D, 0x3023,
+    0x3029, 0x3037, 0x303B, 0x3055, 0x3059, 0x305B, 0x3067, 0x3071,
+    0x3079, 0x307D, 0x3085, 0x3091, 0x3095, 0x30A3, 0x30A9, 0x30B9,
+    0x30BF, 0x30C7, 0x30CB, 0x30D1, 0x30D7, 0x30DF, 0x30E5, 0x30EF,
+    0x30FB, 0x30FD, 0x3103, 0x3109, 0x3119, 0x3121, 0x3127, 0x312D,
+    0x3139, 0x3143, 0x3145, 0x314B, 0x315D, 0x3161, 0x3167, 0x316D,
+    0x3173, 0x317F, 0x3191, 0x3199, 0x319F, 0x31A9, 0x31B1, 0x31C3,
+    0x31C7, 0x31D5, 0x31DB, 0x31ED, 0x31F7, 0x31FF, 0x3209, 0x3215,
+    0x3217, 0x321D, 0x3229, 0x3235, 0x3259, 0x325D, 0x3263, 0x326B,
+    0x326F, 0x3275, 0x3277, 0x327B, 0x328D, 0x3299, 0x329F, 0x32A7,
+    0x32AD, 0x32B3, 0x32B7, 0x32C9, 0x32CB, 0x32CF, 0x32D1, 0x32E9,
+    0x32ED, 0x32F3, 0x32F9, 0x3307, 0x3325, 0x332B, 0x332F, 0x3335,
+    0x3341, 0x3347, 0x335B, 0x335F, 0x3367, 0x336B, 0x3373, 0x3379,
+    0x337F, 0x3383, 0x33A1, 0x33A3, 0x33AD, 0x33B9, 0x33C1, 0x33CB,
+    0x33D3, 0x33EB, 0x33F1, 0x33FD, 0x3401, 0x340F, 0x3413, 0x3419,
+    0x341B, 0x3437, 0x3445, 0x3455, 0x3457, 0x3463, 0x3469, 0x346D,
+    0x3481, 0x348B, 0x3491, 0x3497, 0x349D, 0x34A5, 0x34AF, 0x34BB,
+    0x34C9, 0x34D3, 0x34E1, 0x34F1, 0x34FF, 0x3509, 0x3517, 0x351D,
+    0x352D, 0x3533, 0x353B, 0x3541, 0x3551, 0x3565, 0x356F, 0x3571,
+    0x3577, 0x357B, 0x357D, 0x3581, 0x358D, 0x358F, 0x3599, 0x359B,
+    0x35A1, 0x35B7, 0x35BD, 0x35BF, 0x35C3, 0x35D5, 0x35DD, 0x35E7,
+    0x35EF, 0x3605, 0x3607, 0x3611, 0x3623, 0x3631, 0x3635, 0x3637,
+    0x363B, 0x364D, 0x364F, 0x3653, 0x3659, 0x3661, 0x366B, 0x366D,
+    0x368B, 0x368F, 0x36AD, 0x36AF, 0x36B9, 0x36BB, 0x36CD, 0x36D1,
+    0x36E3, 0x36E9, 0x36F7, 0x3701, 0x3703, 0x3707, 0x371B, 0x373F,
+    0x3745, 0x3749, 0x374F, 0x375D, 0x3761, 0x3775, 0x377F, 0x378D,
+    0x37A3, 0x37A9, 0x37AB, 0x37C9, 0x37D5, 0x37DF, 0x37F1, 0x37F3,
+    0x37F7, 0x3805, 0x380B, 0x3821, 0x3833, 0x3835, 0x3841, 0x3847,
+    0x384B, 0x3853, 0x3857, 0x385F, 0x3865, 0x386F, 0x3871, 0x387D,
+    0x388F, 0x3899, 0x38A7, 0x38B7, 0x38C5, 0x38C9, 0x38CF, 0x38D5,
+    0x38D7, 0x38DD, 0x38E1, 0x38E3, 0x38FF, 0x3901, 0x391D, 0x3923,
+    0x3925, 0x3929, 0x392F, 0x393D, 0x3941, 0x394D, 0x395B, 0x396B,
+    0x3979, 0x397D, 0x3983, 0x398B, 0x3991, 0x3995, 0x399B, 0x39A1,
+    0x39A7, 0x39AF, 0x39B3, 0x39BB, 0x39BF, 0x39CD, 0x39DD, 0x39E5,
+    0x39EB, 0x39EF, 0x39FB, 0x3A03, 0x3A13, 0x3A15, 0x3A1F, 0x3A27,
+    0x3A2B, 0x3A31, 0x3A4B, 0x3A51, 0x3A5B, 0x3A63, 0x3A67, 0x3A6D,
+    0x3A79, 0x3A87, 0x3AA5, 0x3AA9, 0x3AB7, 0x3ACD, 0x3AD5, 0x3AE1,
+    0x3AE5, 0x3AEB, 0x3AF3, 0x3AFD, 0x3B03, 0x3B11, 0x3B1B, 0x3B21,
+    0x3B23, 0x3B2D, 0x3B39, 0x3B45, 0x3B53, 0x3B59, 0x3B5F, 0x3B71,
+    0x3B7B, 0x3B81, 0x3B89, 0x3B9B, 0x3B9F, 0x3BA5, 0x3BA7, 0x3BAD,
+    0x3BB7, 0x3BB9, 0x3BC3, 0x3BCB, 0x3BD1, 0x3BD7, 0x3BE1, 0x3BE3,
+    0x3BF5, 0x3BFF, 0x3C01, 0x3C0D, 0x3C11, 0x3C17, 0x3C1F, 0x3C29,
+    0x3C35, 0x3C43, 0x3C4F, 0x3C53, 0x3C5B, 0x3C65, 0x3C6B, 0x3C71,
+    0x3C85, 0x3C89, 0x3C97, 0x3CA7, 0x3CB5, 0x3CBF, 0x3CC7, 0x3CD1,
+    0x3CDD, 0x3CDF, 0x3CF1, 0x3CF7, 0x3D03, 0x3D0D, 0x3D19, 0x3D1B,
+    0x3D1F, 0x3D21, 0x3D2D, 0x3D33, 0x3D37, 0x3D3F, 0x3D43, 0x3D6F,
+    0x3D73, 0x3D75, 0x3D79, 0x3D7B, 0x3D85, 0x3D91, 0x3D97, 0x3D9D,
+    0x3DAB, 0x3DAF, 0x3DB5, 0x3DBB, 0x3DC1, 0x3DC9, 0x3DCF, 0x3DF3,
+    0x3E05, 0x3E09, 0x3E0F, 0x3E11, 0x3E1D, 0x3E23, 0x3E29, 0x3E2F,
+    0x3E33, 0x3E41, 0x3E57, 0x3E63, 0x3E65, 0x3E77, 0x3E81, 0x3E87,
+    0x3EA1, 0x3EB9, 0x3EBD, 0x3EBF, 0x3EC3, 0x3EC5, 0x3EC9, 0x3ED7,
+    0x3EDB, 0x3EE1, 0x3EE7, 0x3EEF, 0x3EFF, 0x3F0B, 0x3F0D, 0x3F37,
+    0x3F3B, 0x3F3D, 0x3F41, 0x3F59, 0x3F5F, 0x3F65, 0x3F67, 0x3F79,
+    0x3F7D, 0x3F8B, 0x3F91, 0x3FAD, 0x3FBF, 0x3FCD, 0x3FD3, 0x3FDD,
+    0x3FE9, 0x3FEB, 0x3FF1, 0x3FFD, 0x401B, 0x4021, 0x4025, 0x402B,
+    0x4031, 0x403F, 0x4043, 0x4045, 0x405D, 0x4061, 0x4067, 0x406D,
+    0x4087, 0x4091, 0x40A3, 0x40A9, 0x40B1, 0x40B7, 0x40BD, 0x40DB,
+    0x40DF, 0x40EB, 0x40F7, 0x40F9, 0x4109, 0x410B, 0x4111, 0x4115,
+    0x4121, 0x4133, 0x4135, 0x413B, 0x413F, 0x4159, 0x4165, 0x416B,
+    0x4177, 0x417B, 0x4193, 0x41AB, 0x41B7, 0x41BD, 0x41BF, 0x41CB,
+    0x41E7, 0x41EF, 0x41F3, 0x41F9, 0x4205, 0x4207, 0x4219, 0x421F,
+    0x4223, 0x4229, 0x422F, 0x4243, 0x4253, 0x4255, 0x425B, 0x4261,
+    0x4273, 0x427D, 0x4283, 0x4285, 0x4289, 0x4291, 0x4297, 0x429D,
+    0x42B5, 0x42C5, 0x42CB, 0x42D3, 0x42DD, 0x42E3, 0x42F1, 0x4307,
+    0x430F, 0x431F, 0x4325, 0x4327, 0x4333, 0x4337, 0x4339, 0x434F,
+    0x4357, 0x4369, 0x438B, 0x438D, 0x4393, 0x43A5, 0x43A9, 0x43AF,
+    0x43B5, 0x43BD, 0x43C7, 0x43CF, 0x43E1, 0x43E7, 0x43EB, 0x43ED,
+    0x43F1, 0x43F9, 0x4409, 0x440B, 0x4417, 0x4423, 0x4429, 0x443B,
+    0x443F, 0x4445, 0x444B, 0x4451, 0x4453, 0x4459, 0x4465, 0x446F,
+    0x4483, 0x448F, 0x44A1, 0x44A5, 0x44AB, 0x44AD, 0x44BD, 0x44BF,
+    0x44C9, 0x44D7, 0x44DB, 0x44F9, 0x44FB, 0x4505, 0x4511, 0x4513,
+    0x452B, 0x4531, 0x4541, 0x4549, 0x4553, 0x4555, 0x4561, 0x4577,
+    0x457D, 0x457F, 0x458F, 0x45A3, 0x45AD, 0x45AF, 0x45BB, 0x45C7,
+    0x45D9, 0x45E3, 0x45EF, 0x45F5, 0x45F7, 0x4601, 0x4603, 0x4609,
+    0x4613, 0x4625, 0x4627, 0x4633, 0x4639, 0x463D, 0x4643, 0x4645,
+    0x465D, 0x4679, 0x467B, 0x467F, 0x4681, 0x468B, 0x468D, 0x469D,
+    0x46A9, 0x46B1, 0x46C7, 0x46C9, 0x46CF, 0x46D3, 0x46D5, 0x46DF,
+    0x46E5, 0x46F9, 0x4705, 0x470F, 0x4717, 0x4723, 0x4729, 0x472F,
+    0x4735, 0x4739, 0x474B, 0x474D, 0x4751, 0x475D, 0x476F, 0x4771,
+    0x477D, 0x4783, 0x4787, 0x4789, 0x4799, 0x47A5, 0x47B1, 0x47BF,
+    0x47C3, 0x47CB, 0x47DD, 0x47E1, 0x47ED, 0x47FB, 0x4801, 0x4807,
+    0x480B, 0x4813, 0x4819, 0x481D, 0x4831, 0x483D, 0x4847, 0x4855,
+    0x4859, 0x485B, 0x486B, 0x486D, 0x4879, 0x4897, 0x489B, 0x48A1,
+    0x48B9, 0x48CD, 0x48E5, 0x48EF, 0x48F7, 0x4903, 0x490D, 0x4919,
+    0x491F, 0x492B, 0x4937, 0x493D, 0x4945, 0x4955, 0x4963, 0x4969,
+    0x496D, 0x4973, 0x4997, 0x49AB, 0x49B5, 0x49D3, 0x49DF, 0x49E1,
+    0x49E5, 0x49E7, 0x4A03, 0x4A0F, 0x4A1D, 0x4A23, 0x4A39, 0x4A41,
+    0x4A45, 0x4A57, 0x4A5D, 0x4A6B, 0x4A7D, 0x4A81, 0x4A87, 0x4A89,
+    0x4A8F, 0x4AB1, 0x4AC3, 0x4AC5, 0x4AD5, 0x4ADB, 0x4AED, 0x4AEF,
+    0x4B07, 0x4B0B, 0x4B0D, 0x4B13, 0x4B1F, 0x4B25, 0x4B31, 0x4B3B,
+    0x4B43, 0x4B49, 0x4B59, 0x4B65, 0x4B6D, 0x4B77, 0x4B85, 0x4BAD,
+    0x4BB3, 0x4BB5, 0x4BBB, 0x4BBF, 0x4BCB, 0x4BD9, 0x4BDD, 0x4BDF,
+    0x4BE3, 0x4BE5, 0x4BE9, 0x4BF1, 0x4BF7, 0x4C01, 0x4C07, 0x4C0D,
+    0x4C0F, 0x4C15, 0x4C1B, 0x4C21, 0x4C2D, 0x4C33, 0x4C4B, 0x4C55,
+    0x4C57, 0x4C61, 0x4C67, 0x4C73, 0x4C79, 0x4C7F, 0x4C8D, 0x4C93,
+    0x4C99, 0x4CCD, 0x4CE1, 0x4CE7, 0x4CF1, 0x4CF3, 0x4CFD, 0x4D05,
+    0x4D0F, 0x4D1B, 0x4D27, 0x4D29, 0x4D2F, 0x4D33, 0x4D41, 0x4D51,
+    0x4D59, 0x4D65, 0x4D6B, 0x4D81, 0x4D83, 0x4D8D, 0x4D95, 0x4D9B,
+    0x4DB1, 0x4DB3, 0x4DC9, 0x4DCF, 0x4DD7, 0x4DE1, 0x4DED, 0x4DF9,
+    0x4DFB, 0x4E05, 0x4E0B, 0x4E17, 0x4E19, 0x4E1D, 0x4E2B, 0x4E35,
+    0x4E37, 0x4E3D, 0x4E4F, 0x4E53, 0x4E5F, 0x4E67, 0x4E79, 0x4E85,
+    0x4E8B, 0x4E91, 0x4E95, 0x4E9B, 0x4EA1, 0x4EAF, 0x4EB3, 0x4EB5,
+    0x4EC1, 0x4ECD, 0x4ED1, 0x4ED7, 0x4EE9, 0x4EFB, 0x4F07, 0x4F09,
+    0x4F19, 0x4F25, 0x4F2D, 0x4F3F, 0x4F49, 0x4F63, 0x4F67, 0x4F6D,
+    0x4F75, 0x4F7B, 0x4F81, 0x4F85, 0x4F87, 0x4F91, 0x4FA5, 0x4FA9,
+    0x4FAF, 0x4FB7, 0x4FBB, 0x4FCF, 0x4FD9, 0x4FDB, 0x4FFD, 0x4FFF,
+    0x5003, 0x501B, 0x501D, 0x5029, 0x5035, 0x503F, 0x5045, 0x5047,
+    0x5053, 0x5071, 0x5077, 0x5083, 0x5093, 0x509F, 0x50A1, 0x50B7,
+    0x50C9, 0x50D5, 0x50E3, 0x50ED, 0x50EF, 0x50FB, 0x5107, 0x510B,
+    0x510D, 0x5111, 0x5117, 0x5123, 0x5125, 0x5135, 0x5147, 0x5149,
+    0x5171, 0x5179, 0x5189, 0x518F, 0x5197, 0x51A1, 0x51A3, 0x51A7,
+    0x51B9, 0x51C1, 0x51CB, 0x51D3, 0x51DF, 0x51E3, 0x51F5, 0x51F7,
+    0x5209, 0x5213, 0x5215, 0x5219, 0x521B, 0x521F, 0x5227, 0x5243,
+    0x5245, 0x524B, 0x5261, 0x526D, 0x5273, 0x5281, 0x5293, 0x5297,
+    0x529D, 0x52A5, 0x52AB, 0x52B1, 0x52BB, 0x52C3, 0x52C7, 0x52C9,
+    0x52DB, 0x52E5, 0x52EB, 0x52FF, 0x5315, 0x531D, 0x5323, 0x5341,
+    0x5345, 0x5347, 0x534B, 0x535D, 0x5363, 0x5381, 0x5383, 0x5387,
+    0x538F, 0x5395, 0x5399, 0x539F, 0x53AB, 0x53B9, 0x53DB, 0x53E9,
+    0x53EF, 0x53F3, 0x53F5, 0x53FB, 0x53FF, 0x540D, 0x5411, 0x5413,
+    0x5419, 0x5435, 0x5437, 0x543B, 0x5441, 0x5449, 0x5453, 0x5455,
+    0x545F, 0x5461, 0x546B, 0x546D, 0x5471, 0x548F, 0x5491, 0x549D,
+    0x54A9, 0x54B3, 0x54C5, 0x54D1, 0x54DF, 0x54E9, 0x54EB, 0x54F7,
+    0x54FD, 0x5507, 0x550D, 0x551B, 0x5527, 0x552B, 0x5539, 0x553D,
+    0x554F, 0x5551, 0x555B, 0x5563, 0x5567, 0x556F, 0x5579, 0x5585,
+    0x5597, 0x55A9, 0x55B1, 0x55B7, 0x55C9, 0x55D9, 0x55E7, 0x55ED,
+    0x55F3, 0x55FD, 0x560B, 0x560F, 0x5615, 0x5617, 0x5623, 0x562F,
+    0x5633, 0x5639, 0x563F, 0x564B, 0x564D, 0x565D, 0x565F, 0x566B,
+    0x5671, 0x5675, 0x5683, 0x5689, 0x568D, 0x568F, 0x569B, 0x56AD,
+    0x56B1, 0x56D5, 0x56E7, 0x56F3, 0x56FF, 0x5701, 0x5705, 0x5707,
+    0x570B, 0x5713, 0x571F, 0x5723, 0x5747, 0x574D, 0x575F, 0x5761,
+    0x576D, 0x5777, 0x577D, 0x5789, 0x57A1, 0x57A9, 0x57AF, 0x57B5,
+    0x57C5, 0x57D1, 0x57D3, 0x57E5, 0x57EF, 0x5803, 0x580D, 0x580F,
+    0x5815, 0x5827, 0x582B, 0x582D, 0x5855, 0x585B, 0x585D, 0x586D,
+    0x586F, 0x5873, 0x587B, 0x588D, 0x5897, 0x58A3, 0x58A9, 0x58AB,
+    0x58B5, 0x58BD, 0x58C1, 0x58C7, 0x58D3, 0x58D5, 0x58DF, 0x58F1,
+    0x58F9, 0x58FF, 0x5903, 0x5917, 0x591B, 0x5921, 0x5945, 0x594B,
+    0x594D, 0x5957, 0x595D, 0x5975, 0x597B, 0x5989, 0x5999, 0x599F,
+    0x59B1, 0x59B3, 0x59BD, 0x59D1, 0x59DB, 0x59E3, 0x59E9, 0x59ED,
+    0x59F3, 0x59F5, 0x59FF, 0x5A01, 0x5A0D, 0x5A11, 0x5A13, 0x5A17,
+    0x5A1F, 0x5A29, 0x5A2F, 0x5A3B, 0x5A4D, 0x5A5B, 0x5A67, 0x5A77,
+    0x5A7F, 0x5A85, 0x5A95, 0x5A9D, 0x5AA1, 0x5AA3, 0x5AA9, 0x5ABB,
+    0x5AD3, 0x5AE5, 0x5AEF, 0x5AFB, 0x5AFD, 0x5B01, 0x5B0F, 0x5B19,
+    0x5B1F, 0x5B25, 0x5B2B, 0x5B3D, 0x5B49, 0x5B4B, 0x5B67, 0x5B79,
+    0x5B87, 0x5B97, 0x5BA3, 0x5BB1, 0x5BC9, 0x5BD5, 0x5BEB, 0x5BF1,
+    0x5BF3, 0x5BFD, 0x5C05, 0x5C09, 0x5C0B, 0x5C0F, 0x5C1D, 0x5C29,
+    0x5C2F, 0x5C33, 0x5C39, 0x5C47, 0x5C4B, 0x5C4D, 0x5C51, 0x5C6F,
+    0x5C75, 0x5C77, 0x5C7D, 0x5C87, 0x5C89, 0x5CA7, 0x5CBD, 0x5CBF,
+    0x5CC3, 0x5CC9, 0x5CD1, 0x5CD7, 0x5CDD, 0x5CED, 0x5CF9, 0x5D05,
+    0x5D0B, 0x5D13, 0x5D17, 0x5D19, 0x5D31, 0x5D3D, 0x5D41, 0x5D47,
+    0x5D4F, 0x5D55, 0x5D5B, 0x5D65, 0x5D67, 0x5D6D, 0x5D79, 0x5D95,
+    0x5DA3, 0x5DA9, 0x5DAD, 0x5DB9, 0x5DC1, 0x5DC7, 0x5DD3, 0x5DD7,
+    0x5DDD, 0x5DEB, 0x5DF1, 0x5DFD, 0x5E07, 0x5E0D, 0x5E13, 0x5E1B,
+    0x5E21, 0x5E27, 0x5E2B, 0x5E2D, 0x5E31, 0x5E39, 0x5E45, 0x5E49,
+    0x5E57, 0x5E69, 0x5E73, 0x5E75, 0x5E85, 0x5E8B, 0x5E9F, 0x5EA5,
+    0x5EAF, 0x5EB7, 0x5EBB, 0x5ED9, 0x5EFD, 0x5F09, 0x5F11, 0x5F27,
+    0x5F33, 0x5F35, 0x5F3B, 0x5F47, 0x5F57, 0x5F5D, 0x5F63, 0x5F65,
+    0x5F77, 0x5F7B, 0x5F95, 0x5F99, 0x5FA1, 0x5FB3, 0x5FBD, 0x5FC5,
+    0x5FCF, 0x5FD5, 0x5FE3, 0x5FE7, 0x5FFB, 0x6011, 0x6023, 0x602F,
+    0x6037, 0x6053, 0x605F, 0x6065, 0x606B, 0x6073, 0x6079, 0x6085,
+    0x609D, 0x60AD, 0x60BB, 0x60BF, 0x60CD, 0x60D9, 0x60DF, 0x60E9,
+    0x60F5, 0x6109, 0x610F, 0x6113, 0x611B, 0x612D, 0x6139, 0x614B,
+    0x6155, 0x6157, 0x615B, 0x616F, 0x6179, 0x6187, 0x618B, 0x6191,
+    0x6193, 0x619D, 0x61B5, 0x61C7, 0x61C9, 0x61CD, 0x61E1, 0x61F1,
+    0x61FF, 0x6209, 0x6217, 0x621D, 0x6221, 0x6227, 0x623B, 0x6241,
+    0x624B, 0x6251, 0x6253, 0x625F, 0x6265, 0x6283, 0x628D, 0x6295,
+    0x629B, 0x629F, 0x62A5, 0x62AD, 0x62D5, 0x62D7, 0x62DB, 0x62DD,
+    0x62E9, 0x62FB, 0x62FF, 0x6305, 0x630D, 0x6317, 0x631D, 0x632F,
+    0x6341, 0x6343, 0x634F, 0x635F, 0x6367, 0x636D, 0x6371, 0x6377,
+    0x637D, 0x637F, 0x63B3, 0x63C1, 0x63C5, 0x63D9, 0x63E9, 0x63EB,
+    0x63EF, 0x63F5, 0x6401, 0x6403, 0x6409, 0x6415, 0x6421, 0x6427,
+    0x642B, 0x6439, 0x6443, 0x6449, 0x644F, 0x645D, 0x6467, 0x6475,
+    0x6485, 0x648D, 0x6493, 0x649F, 0x64A3, 0x64AB, 0x64C1, 0x64C7,
+    0x64C9, 0x64DB, 0x64F1, 0x64F7, 0x64F9, 0x650B, 0x6511, 0x6521,
+    0x652F, 0x6539, 0x653F, 0x654B, 0x654D, 0x6553, 0x6557, 0x655F,
+    0x6571, 0x657D, 0x658D, 0x658F, 0x6593, 0x65A1, 0x65A5, 0x65AD,
+    0x65B9, 0x65C5, 0x65E3, 0x65F3, 0x65FB, 0x65FF, 0x6601, 0x6607,
+    0x661D, 0x6629, 0x6631, 0x663B, 0x6641, 0x6647, 0x664D, 0x665B,
+    0x6661, 0x6673, 0x667D, 0x6689, 0x668B, 0x6695, 0x6697, 0x669B,
+    0x66B5, 0x66B9, 0x66C5, 0x66CD, 0x66D1, 0x66E3, 0x66EB, 0x66F5,
+    0x6703, 0x6713, 0x6719, 0x671F, 0x6727, 0x6731, 0x6737, 0x673F,
+    0x6745, 0x6751, 0x675B, 0x676F, 0x6779, 0x6781, 0x6785, 0x6791,
+    0x67AB, 0x67BD, 0x67C1, 0x67CD, 0x67DF, 0x67E5, 0x6803, 0x6809,
+    0x6811, 0x6817, 0x682D, 0x6839, 0x683B, 0x683F, 0x6845, 0x684B,
+    0x684D, 0x6857, 0x6859, 0x685D, 0x6863, 0x6869, 0x686B, 0x6871,
+    0x6887, 0x6899, 0x689F, 0x68B1, 0x68BD, 0x68C5, 0x68D1, 0x68D7,
+    0x68E1, 0x68ED, 0x68EF, 0x68FF, 0x6901, 0x690B, 0x690D, 0x6917,
+    0x6929, 0x692F, 0x6943, 0x6947, 0x6949, 0x694F, 0x6965, 0x696B,
+    0x6971, 0x6983, 0x6989, 0x6997, 0x69A3, 0x69B3, 0x69B5, 0x69BB,
+    0x69C1, 0x69C5, 0x69D3, 0x69DF, 0x69E3, 0x69E5, 0x69F7, 0x6A07,
+    0x6A2B, 0x6A37, 0x6A3D, 0x6A4B, 0x6A67, 0x6A69, 0x6A75, 0x6A7B,
+    0x6A87, 0x6A8D, 0x6A91, 0x6A93, 0x6AA3, 0x6AC1, 0x6AC9, 0x6AE1,
+    0x6AE7, 0x6B05, 0x6B0F, 0x6B11, 0x6B23, 0x6B27, 0x6B2D, 0x6B39,
+    0x6B41, 0x6B57, 0x6B59, 0x6B5F, 0x6B75, 0x6B87, 0x6B89, 0x6B93,
+    0x6B95, 0x6B9F, 0x6BBD, 0x6BBF, 0x6BDB, 0x6BE1, 0x6BEF, 0x6BFF,
+    0x6C05, 0x6C19, 0x6C29, 0x6C2B, 0x6C31, 0x6C35, 0x6C55, 0x6C59,
+    0x6C5B, 0x6C5F, 0x6C65, 0x6C67, 0x6C73, 0x6C77, 0x6C7D, 0x6C83,
+    0x6C8F, 0x6C91, 0x6C97, 0x6C9B, 0x6CA1, 0x6CA9, 0x6CAF, 0x6CB3,
+    0x6CC7, 0x6CCB, 0x6CEB, 0x6CF5, 0x6CFD, 0x6D0D, 0x6D0F, 0x6D25,
+    0x6D27, 0x6D2B, 0x6D31, 0x6D39, 0x6D3F, 0x6D4F, 0x6D5D, 0x6D61,
+    0x6D73, 0x6D7B, 0x6D7F, 0x6D93, 0x6D99, 0x6DA5, 0x6DB1, 0x6DB7,
+    0x6DC1, 0x6DC3, 0x6DCD, 0x6DCF, 0x6DDB, 0x6DF7, 0x6E03, 0x6E15,
+    0x6E17, 0x6E29, 0x6E33, 0x6E3B, 0x6E45, 0x6E75, 0x6E77, 0x6E7B,
+    0x6E81, 0x6E89, 0x6E93, 0x6E95, 0x6E9F, 0x6EBD, 0x6EBF, 0x6EE3,
+    0x6EE9, 0x6EF3, 0x6EF9, 0x6EFB, 0x6F0D, 0x6F11, 0x6F17, 0x6F1F,
+    0x6F2F, 0x6F3D, 0x6F4D, 0x6F53, 0x6F61, 0x6F65, 0x6F79, 0x6F7D,
+    0x6F83, 0x6F85, 0x6F8F, 0x6F9B, 0x6F9D, 0x6FA3, 0x6FAF, 0x6FB5,
+    0x6FBB, 0x6FBF, 0x6FCB, 0x6FCD, 0x6FD3, 0x6FD7, 0x6FE3, 0x6FE9,
+    0x6FF1, 0x6FF5, 0x6FF7, 0x6FFD, 0x700F, 0x7019, 0x701F, 0x7027,
+    0x7033, 0x7039, 0x704F, 0x7051, 0x7057, 0x7063, 0x7075, 0x7079,
+    0x7087, 0x708D, 0x7091, 0x70A5, 0x70AB, 0x70BB, 0x70C3, 0x70C7,
+    0x70CF, 0x70E5, 0x70ED, 0x70F9, 0x70FF, 0x7105, 0x7115, 0x7121,
+    0x7133, 0x7151, 0x7159, 0x715D, 0x715F, 0x7163, 0x7169, 0x7183,
+    0x7187, 0x7195, 0x71AD, 0x71C3, 0x71C9, 0x71CB, 0x71D1, 0x71DB,
+    0x71E1, 0x71EF, 0x71F5, 0x71FB, 0x7207, 0x7211, 0x7217, 0x7219,
+    0x7225, 0x722F, 0x723B, 0x7243, 0x7255, 0x7267, 0x7271, 0x7277,
+    0x727F, 0x728F, 0x7295, 0x729B, 0x72A3, 0x72B3, 0x72C7, 0x72CB,
+    0x72CD, 0x72D7, 0x72D9, 0x72E3, 0x72EF, 0x72F5, 0x72FD, 0x7303,
+    0x730D, 0x7321, 0x732B, 0x733D, 0x7357, 0x735B, 0x7361, 0x737F,
+    0x7381, 0x7385, 0x738D, 0x7393, 0x739F, 0x73AB, 0x73BD, 0x73C1,
+    0x73C9, 0x73DF, 0x73E5, 0x73E7, 0x73F3, 0x7415, 0x741B, 0x742D,
+    0x7439, 0x743F, 0x7441, 0x745D, 0x746B, 0x747B, 0x7489, 0x748D,
+    0x749B, 0x74A7, 0x74AB, 0x74B1, 0x74B7, 0x74B9, 0x74DD, 0x74E1,
+    0x74E7, 0x74FB, 0x7507, 0x751F, 0x7525, 0x753B, 0x753D, 0x754D,
+    0x755F, 0x756B, 0x7577, 0x7589, 0x758B, 0x7591, 0x7597, 0x759D,
+    0x75A1, 0x75A7, 0x75B5, 0x75B9, 0x75BB, 0x75D1, 0x75D9, 0x75E5,
+    0x75EB, 0x75F5, 0x75FB, 0x7603, 0x760F, 0x7621, 0x762D, 0x7633,
+    0x763D, 0x763F, 0x7655, 0x7663, 0x7669, 0x766F, 0x7673, 0x7685,
+    0x768B, 0x769F, 0x76B5, 0x76B7, 0x76C3, 0x76DB, 0x76DF, 0x76F1,
+    0x7703, 0x7705, 0x771B, 0x771D, 0x7721, 0x772D, 0x7735, 0x7741,
+    0x774B, 0x7759, 0x775D, 0x775F, 0x7771, 0x7781, 0x77A7, 0x77AD,
+    0x77B3, 0x77B9, 0x77C5, 0x77CF, 0x77D5, 0x77E1, 0x77E9, 0x77EF,
+    0x77F3, 0x77F9, 0x7807, 0x7825, 0x782B, 0x7835, 0x783D, 0x7853,
+    0x7859, 0x7861, 0x786D, 0x7877, 0x7879, 0x7883, 0x7885, 0x788B,
+    0x7895, 0x7897, 0x78A1, 0x78AD, 0x78BF, 0x78D3, 0x78D9, 0x78DD,
+    0x78E5, 0x78FB, 0x7901, 0x7907, 0x7925, 0x792B, 0x7939, 0x793F,
+    0x794B, 0x7957, 0x795D, 0x7967, 0x7969, 0x7973, 0x7991, 0x7993,
+    0x79A3, 0x79AB, 0x79AF, 0x79B1, 0x79B7, 0x79C9, 0x79CD, 0x79CF,
+    0x79D5, 0x79D9, 0x79F3, 0x79F7, 0x79FF, 0x7A05, 0x7A0F, 0x7A11,
+    0x7A15, 0x7A1B, 0x7A23, 0x7A27, 0x7A2D, 0x7A4B, 0x7A57, 0x7A59,
+    0x7A5F, 0x7A65, 0x7A69, 0x7A7D, 0x7A93, 0x7A9B, 0x7A9F, 0x7AA1,
+    0x7AA5, 0x7AED, 0x7AF5, 0x7AF9, 0x7B01, 0x7B17, 0x7B19, 0x7B1D,
+    0x7B2B, 0x7B35, 0x7B37, 0x7B3B, 0x7B4F, 0x7B55, 0x7B5F, 0x7B71,
+    0x7B77, 0x7B8B, 0x7B9B, 0x7BA1, 0x7BA9, 0x7BAF, 0x7BB3, 0x7BC7,
+    0x7BD3, 0x7BE9, 0x7BEB, 0x7BEF, 0x7BF1, 0x7BFD, 0x7C07, 0x7C19,
+    0x7C1B, 0x7C31, 0x7C37, 0x7C49, 0x7C67, 0x7C69, 0x7C73, 0x7C81,
+    0x7C8B, 0x7C93, 0x7CA3, 0x7CD5, 0x7CDB, 0x7CE5, 0x7CED, 0x7CF7,
+    0x7D03, 0x7D09, 0x7D1B, 0x7D1D, 0x7D33, 0x7D39, 0x7D3B, 0x7D3F,
+    0x7D45, 0x7D4D, 0x7D53, 0x7D59, 0x7D63, 0x7D75, 0x7D77, 0x7D8D,
+    0x7D8F, 0x7D9F, 0x7DAD, 0x7DB7, 0x7DBD, 0x7DBF, 0x7DCB, 0x7DD5,
+    0x7DE9, 0x7DED, 0x7DFB, 0x7E01, 0x7E05, 0x7E29, 0x7E2B, 0x7E2F,
+    0x7E35, 0x7E41, 0x7E43, 0x7E47, 0x7E55, 0x7E61, 0x7E67, 0x7E6B,
+    0x7E71, 0x7E73, 0x7E79, 0x7E7D, 0x7E91, 0x7E9B, 0x7E9D, 0x7EA7,
+    0x7EAD, 0x7EB9, 0x7EBB, 0x7ED3, 0x7EDF, 0x7EEB, 0x7EF1, 0x7EF7,
+    0x7EFB, 0x7F13, 0x7F15, 0x7F19, 0x7F31, 0x7F33, 0x7F39, 0x7F3D,
+    0x7F43, 0x7F4B, 0x7F5B, 0x7F61, 0x7F63, 0x7F6D, 0x7F79, 0x7F87,
+    0x7F8D, 0x7FAF, 0x7FB5, 0x7FC3, 0x7FC9, 0x7FCD, 0x7FCF, 0x7FED,
+    0x8003, 0x800B, 0x800F, 0x8015, 0x801D, 0x8021, 0x8023, 0x803F,
+    0x8041, 0x8047, 0x804B, 0x8065, 0x8077, 0x808D, 0x808F, 0x8095,
+    0x80A5, 0x80AB, 0x80AD, 0x80BD, 0x80C9, 0x80CB, 0x80D7, 0x80DB,
+    0x80E1, 0x80E7, 0x80F5, 0x80FF, 0x8105, 0x810D, 0x8119, 0x811D,
+    0x812F, 0x8131, 0x813B, 0x8143, 0x8153, 0x8159, 0x815F, 0x817D,
+    0x817F, 0x8189, 0x819B, 0x819D, 0x81A7, 0x81AF, 0x81B3, 0x81BB,
+    0x81C7, 0x81DF, 0x8207, 0x8209, 0x8215, 0x821F, 0x8225, 0x8231,
+    0x8233, 0x823F, 0x8243, 0x8245, 0x8249, 0x824F, 0x8261, 0x826F,
+    0x827B, 0x8281, 0x8285, 0x8293, 0x82B1, 0x82B5, 0x82BD, 0x82C7,
+    0x82CF, 0x82D5, 0x82DF, 0x82F1, 0x82F9, 0x82FD, 0x830B, 0x831B,
+    0x8321, 0x8329, 0x832D, 0x8333, 0x8335, 0x833F, 0x8341, 0x834D,
+    0x8351, 0x8353, 0x8357, 0x835D, 0x8365, 0x8369, 0x836F, 0x838F,
+    0x83A7, 0x83B1, 0x83B9, 0x83CB, 0x83D5, 0x83D7, 0x83DD, 0x83E7,
+    0x83E9, 0x83ED, 0x83FF, 0x8405, 0x8411, 0x8413, 0x8423, 0x8425,
+    0x843B, 0x8441, 0x8447, 0x844F, 0x8461, 0x8465, 0x8477, 0x8483,
+    0x848B, 0x8491, 0x8495, 0x84A9, 0x84AF, 0x84CD, 0x84E3, 0x84EF,
+    0x84F1, 0x84F7, 0x8509, 0x850D, 0x854B, 0x854F, 0x8551, 0x855D,
+    0x8563, 0x856D, 0x856F, 0x857B, 0x8587, 0x85A3, 0x85A5, 0x85A9,
+    0x85B7, 0x85CD, 0x85D3, 0x85D5, 0x85DB, 0x85E1, 0x85EB, 0x85F9,
+    0x85FD, 0x85FF, 0x8609, 0x860F, 0x8617, 0x8621, 0x862F, 0x8639,
+    0x863F, 0x8641, 0x864D, 0x8663, 0x8675, 0x867D, 0x8687, 0x8699,
+    0x86A5, 0x86A7, 0x86B3, 0x86B7, 0x86C3, 0x86C5, 0x86CF, 0x86D1,
+    0x86D7, 0x86E9, 0x86EF, 0x86F5, 0x8717, 0x871D, 0x871F, 0x872B,
+    0x872F, 0x8735, 0x8747, 0x8759, 0x875B, 0x876B, 0x8771, 0x8777,
+    0x877F, 0x8785, 0x878F, 0x87A1, 0x87A9, 0x87B3, 0x87BB, 0x87C5,
+    0x87C7, 0x87CB, 0x87DD, 0x87F7, 0x8803, 0x8819, 0x881B, 0x881F,
+    0x8821, 0x8837, 0x883D, 0x8843, 0x8851, 0x8861, 0x8867, 0x887B,
+    0x8885, 0x8891, 0x8893, 0x88A5, 0x88CF, 0x88D3, 0x88EB, 0x88ED,
+    0x88F3, 0x88FD, 0x8909, 0x890B, 0x8911, 0x891B, 0x8923, 0x8927,
+    0x892D, 0x8939, 0x8945, 0x894D, 0x8951, 0x8957, 0x8963, 0x8981,
+    0x8995, 0x899B, 0x89B3, 0x89B9, 0x89C3, 0x89CF, 0x89D1, 0x89DB,
+    0x89EF, 0x89F5, 0x89FB, 0x89FF, 0x8A0B, 0x8A19, 0x8A23, 0x8A35,
+    0x8A41, 0x8A49, 0x8A4F, 0x8A5B, 0x8A5F, 0x8A6D, 0x8A77, 0x8A79,
+    0x8A85, 0x8AA3, 0x8AB3, 0x8AB5, 0x8AC1, 0x8AC7, 0x8ACB, 0x8ACD,
+    0x8AD1, 0x8AD7, 0x8AF1, 0x8AF5, 0x8B07, 0x8B09, 0x8B0D, 0x8B13,
+    0x8B21, 0x8B57, 0x8B5D, 0x8B91, 0x8B93, 0x8BA3, 0x8BA9, 0x8BAF,
+    0x8BBB, 0x8BD5, 0x8BD9, 0x8BDB, 0x8BE1, 0x8BF7, 0x8BFD, 0x8BFF,
+    0x8C0B, 0x8C17, 0x8C1D, 0x8C27, 0x8C39, 0x8C3B, 0x8C47, 0x8C53,
+    0x8C5D, 0x8C6F, 0x8C7B, 0x8C81, 0x8C89, 0x8C8F, 0x8C99, 0x8C9F,
+    0x8CA7, 0x8CAB, 0x8CAD, 0x8CB1, 0x8CC5, 0x8CDD, 0x8CE3, 0x8CE9,
+    0x8CF3, 0x8D01, 0x8D0B, 0x8D0D, 0x8D23, 0x8D29, 0x8D37, 0x8D41,
+    0x8D5B, 0x8D5F, 0x8D71, 0x8D79, 0x8D85, 0x8D91, 0x8D9B, 0x8DA7,
+    0x8DAD, 0x8DB5, 0x8DC5, 0x8DCB, 0x8DD3, 0x8DD9, 0x8DDF, 0x8DF5,
+    0x8DF7, 0x8E01, 0x8E15, 0x8E1F, 0x8E25, 0x8E51, 0x8E63, 0x8E69,
+    0x8E73, 0x8E75, 0x8E79, 0x8E7F, 0x8E8D, 0x8E91, 0x8EAB, 0x8EAF,
+    0x8EB1, 0x8EBD, 0x8EC7, 0x8ECF, 0x8ED3, 0x8EDB, 0x8EE7, 0x8EEB,
+    0x8EF7, 0x8EFF, 0x8F15, 0x8F1D, 0x8F23, 0x8F2D, 0x8F3F, 0x8F45,
+    0x8F4B, 0x8F53, 0x8F59, 0x8F65, 0x8F69, 0x8F71, 0x8F83, 0x8F8D,
+    0x8F99, 0x8F9F, 0x8FAB, 0x8FAD, 0x8FB3, 0x8FB7, 0x8FB9, 0x8FC9,
+    0x8FD5, 0x8FE1, 0x8FEF, 0x8FF9, 0x9007, 0x900D, 0x9017, 0x9023,
+    0x9025, 0x9031, 0x9037, 0x903B, 0x9041, 0x9043, 0x904F, 0x9053,
+    0x906D, 0x9073, 0x9085, 0x908B, 0x9095, 0x909B, 0x909D, 0x90AF,
+    0x90B9, 0x90C1, 0x90C5, 0x90DF, 0x90E9, 0x90FD, 0x9103, 0x9113,
+    0x9127, 0x9133, 0x913D, 0x9145, 0x914F, 0x9151, 0x9161, 0x9167,
+    0x917B, 0x9185, 0x9199, 0x919D, 0x91BB, 0x91BD, 0x91C1, 0x91C9,
+    0x91D9, 0x91DB, 0x91ED, 0x91F1, 0x91F3, 0x91F9, 0x9203, 0x9215,
+    0x9221, 0x922F, 0x9241, 0x9247, 0x9257, 0x926B, 0x9271, 0x9275,
+    0x927D, 0x9283, 0x9287, 0x928D, 0x9299, 0x92A1, 0x92AB, 0x92AD,
+    0x92B9, 0x92BF, 0x92C3, 0x92C5, 0x92CB, 0x92D5, 0x92D7, 0x92E7,
+    0x92F3, 0x9301, 0x930B, 0x9311, 0x9319, 0x931F, 0x933B, 0x933D,
+    0x9343, 0x9355, 0x9373, 0x9395, 0x9397, 0x93A7, 0x93B3, 0x93B5,
+    0x93C7, 0x93D7, 0x93DD, 0x93E5, 0x93EF, 0x93F7, 0x9401, 0x9409,
+    0x9413, 0x943F, 0x9445, 0x944B, 0x944F, 0x9463, 0x9467, 0x9469,
+    0x946D, 0x947B, 0x9497, 0x949F, 0x94A5, 0x94B5, 0x94C3, 0x94E1,
+    0x94E7, 0x9505, 0x9509, 0x9517, 0x9521, 0x9527, 0x952D, 0x9535,
+    0x9539, 0x954B, 0x9557, 0x955D, 0x955F, 0x9575, 0x9581, 0x9589,
+    0x958F, 0x959B, 0x959F, 0x95AD, 0x95B1, 0x95B7, 0x95B9, 0x95BD,
+    0x95CF, 0x95E3, 0x95E9, 0x95F9, 0x961F, 0x962F, 0x9631, 0x9635,
+    0x963B, 0x963D, 0x9665, 0x968F, 0x969D, 0x96A1, 0x96A7, 0x96A9,
+    0x96C1, 0x96CB, 0x96D1, 0x96D3, 0x96E5, 0x96EF, 0x96FB, 0x96FD,
+    0x970D, 0x970F, 0x9715, 0x9725, 0x972B, 0x9733, 0x9737, 0x9739,
+    0x9743, 0x9749, 0x9751, 0x975B, 0x975D, 0x976F, 0x977F, 0x9787,
+    0x9793, 0x97A5, 0x97B1, 0x97B7, 0x97C3, 0x97CD, 0x97D3, 0x97D9,
+    0x97EB, 0x97F7, 0x9805, 0x9809, 0x980B, 0x9815, 0x9829, 0x982F,
+    0x983B, 0x9841, 0x9851, 0x986B, 0x986F, 0x9881, 0x9883, 0x9887,
+    0x98A7, 0x98B1, 0x98B9, 0x98BF, 0x98C3, 0x98C9, 0x98CF, 0x98DD,
+    0x98E3, 0x98F5, 0x98F9, 0x98FB, 0x990D, 0x9917, 0x991F, 0x9929,
+    0x9931, 0x993B, 0x993D, 0x9941, 0x9947, 0x9949, 0x9953, 0x997D,
+    0x9985, 0x9991, 0x9995, 0x999B, 0x99AD, 0x99AF, 0x99BF, 0x99C7,
+    0x99CB, 0x99CD, 0x99D7, 0x99E5, 0x99F1, 0x99FB, 0x9A0F, 0x9A13,
+    0x9A1B, 0x9A25, 0x9A4B, 0x9A4F, 0x9A55, 0x9A57, 0x9A61, 0x9A75,
+    0x9A7F, 0x9A8B, 0x9A91, 0x9A9D, 0x9AB7, 0x9AC3, 0x9AC7, 0x9ACF,
+    0x9AEB, 0x9AF3, 0x9AF7, 0x9AFF, 0x9B17, 0x9B1D, 0x9B27, 0x9B2F,
+    0x9B35, 0x9B45, 0x9B51, 0x9B59, 0x9B63, 0x9B6F, 0x9B77, 0x9B8D,
+    0x9B93, 0x9B95, 0x9B9F, 0x9BA1, 0x9BA7, 0x9BB1, 0x9BB7, 0x9BBD,
+    0x9BC5, 0x9BCB, 0x9BCF, 0x9BDD, 0x9BF9, 0x9C01, 0x9C11, 0x9C23,
+    0x9C2B, 0x9C2F, 0x9C35, 0x9C49, 0x9C4D, 0x9C5F, 0x9C65, 0x9C67,
+    0x9C7F, 0x9C97, 0x9C9D, 0x9CA3, 0x9CAF, 0x9CBB, 0x9CBF, 0x9CC1,
+    0x9CD7, 0x9CD9, 0x9CE3, 0x9CE9, 0x9CF1, 0x9CFD, 0x9D01, 0x9D15,
+    0x9D27, 0x9D2D, 0x9D31, 0x9D3D, 0x9D55, 0x9D5B, 0x9D61, 0x9D97,
+    0x9D9F, 0x9DA5, 0x9DA9, 0x9DC3, 0x9DE7, 0x9DEB, 0x9DED, 0x9DF1,
+    0x9E0B, 0x9E17, 0x9E23, 0x9E27, 0x9E2D, 0x9E33, 0x9E3B, 0x9E47,
+    0x9E51, 0x9E53, 0x9E5F, 0x9E6F, 0x9E81, 0x9E87, 0x9E8F, 0x9E95,
+    0x9EA1, 0x9EB3, 0x9EBD, 0x9EBF, 0x9EF5, 0x9EF9, 0x9EFB, 0x9F05,
+    0x9F23, 0x9F2F, 0x9F37, 0x9F3B, 0x9F43, 0x9F53, 0x9F61, 0x9F6D,
+    0x9F73, 0x9F77, 0x9F7D, 0x9F89, 0x9F8F, 0x9F91, 0x9F95, 0x9FA3,
+    0x9FAF, 0x9FB3, 0x9FC1, 0x9FC7, 0x9FDF, 0x9FE5, 0x9FEB, 0x9FF5,
+    0xA001, 0xA00D, 0xA021, 0xA033, 0xA039, 0xA03F, 0xA04F, 0xA057,
+    0xA05B, 0xA061, 0xA075, 0xA079, 0xA099, 0xA09D, 0xA0AB, 0xA0B5,
+    0xA0B7, 0xA0BD, 0xA0C9, 0xA0D9, 0xA0DB, 0xA0DF, 0xA0E5, 0xA0F1,
+    0xA0F3, 0xA0FD, 0xA105, 0xA10B, 0xA10F, 0xA111, 0xA11B, 0xA129,
+    0xA12F, 0xA135, 0xA141, 0xA153, 0xA175, 0xA17D, 0xA187, 0xA18D,
+    0xA1A5, 0xA1AB, 0xA1AD, 0xA1B7, 0xA1C3, 0xA1C5, 0xA1E3, 0xA1ED,
+    0xA1FB, 0xA207, 0xA213, 0xA223, 0xA229, 0xA22F, 0xA231, 0xA243,
+    0xA247, 0xA24D, 0xA26B, 0xA279, 0xA27D, 0xA283, 0xA289, 0xA28B,
+    0xA291, 0xA295, 0xA29B, 0xA2A9, 0xA2AF, 0xA2B3, 0xA2BB, 0xA2C5,
+    0xA2D1, 0xA2D7, 0xA2F7, 0xA301, 0xA309, 0xA31F, 0xA321, 0xA32B,
+    0xA331, 0xA349, 0xA351, 0xA355, 0xA373, 0xA379, 0xA37B, 0xA387,
+    0xA397, 0xA39F, 0xA3A5, 0xA3A9, 0xA3AF, 0xA3B7, 0xA3C7, 0xA3D5,
+    0xA3DB, 0xA3E1, 0xA3E5, 0xA3E7, 0xA3F1, 0xA3FD, 0xA3FF, 0xA40F,
+    0xA41D, 0xA421, 0xA423, 0xA427, 0xA43B, 0xA44D, 0xA457, 0xA459,
+    0xA463, 0xA469, 0xA475, 0xA493, 0xA49B, 0xA4AD, 0xA4B9, 0xA4C3,
+    0xA4C5, 0xA4CB, 0xA4D1, 0xA4D5, 0xA4E1, 0xA4ED, 0xA4EF, 0xA4F3,
+    0xA4FF, 0xA511, 0xA529, 0xA52B, 0xA535, 0xA53B, 0xA543, 0xA553,
+    0xA55B, 0xA561, 0xA56D, 0xA577, 0xA585, 0xA58B, 0xA597, 0xA59D,
+    0xA5A3, 0xA5A7, 0xA5A9, 0xA5C1, 0xA5C5, 0xA5CB, 0xA5D3, 0xA5D9,
+    0xA5DD, 0xA5DF, 0xA5E3, 0xA5E9, 0xA5F7, 0xA5FB, 0xA603, 0xA60D,
+    0xA625, 0xA63D, 0xA649, 0xA64B, 0xA651, 0xA65D, 0xA673, 0xA691,
+    0xA693, 0xA699, 0xA6AB, 0xA6B5, 0xA6BB, 0xA6C1, 0xA6C9, 0xA6CD,
+    0xA6CF, 0xA6D5, 0xA6DF, 0xA6E7, 0xA6F1, 0xA6F7, 0xA6FF, 0xA70F,
+    0xA715, 0xA723, 0xA729, 0xA72D, 0xA745, 0xA74D, 0xA757, 0xA759,
+    0xA765, 0xA76B, 0xA76F, 0xA793, 0xA795, 0xA7AB, 0xA7B1, 0xA7B9,
+    0xA7BF, 0xA7C9, 0xA7D1, 0xA7D7, 0xA7E3, 0xA7ED, 0xA7FB, 0xA805,
+    0xA80B, 0xA81D, 0xA829, 0xA82B, 0xA837, 0xA83B, 0xA855, 0xA85F,
+    0xA86D, 0xA87D, 0xA88F, 0xA897, 0xA8A9, 0xA8B5, 0xA8C1, 0xA8C7,
+    0xA8D7, 0xA8E5, 0xA8FD, 0xA907, 0xA913, 0xA91B, 0xA931, 0xA937,
+    0xA939, 0xA943, 0xA97F, 0xA985, 0xA987, 0xA98B, 0xA993, 0xA9A3,
+    0xA9B1, 0xA9BB, 0xA9C1, 0xA9D9, 0xA9DF, 0xA9EB, 0xA9FD, 0xAA15,
+    0xAA17, 0xAA35, 0xAA39, 0xAA3B, 0xAA47, 0xAA4D, 0xAA57, 0xAA59,
+    0xAA5D, 0xAA6B, 0xAA71, 0xAA81, 0xAA83, 0xAA8D, 0xAA95, 0xAAAB,
+    0xAABF, 0xAAC5, 0xAAC9, 0xAAE9, 0xAAEF, 0xAB01, 0xAB05, 0xAB07,
+    0xAB0B, 0xAB0D, 0xAB11, 0xAB19, 0xAB4D, 0xAB5B, 0xAB71, 0xAB73,
+    0xAB89, 0xAB9D, 0xABA7, 0xABAF, 0xABB9, 0xABBB, 0xABC1, 0xABC5,
+    0xABD3, 0xABD7, 0xABDD, 0xABF1, 0xABF5, 0xABFB, 0xABFD, 0xAC09,
+    0xAC15, 0xAC1B, 0xAC27, 0xAC37, 0xAC39, 0xAC45, 0xAC4F, 0xAC57,
+    0xAC5B, 0xAC61, 0xAC63, 0xAC7F, 0xAC8B, 0xAC93, 0xAC9D, 0xACA9,
+    0xACAB, 0xACAF, 0xACBD, 0xACD9, 0xACE1, 0xACE7, 0xACEB, 0xACED,
+    0xACF1, 0xACF7, 0xACF9, 0xAD05, 0xAD3F, 0xAD45, 0xAD53, 0xAD5D,
+    0xAD5F, 0xAD65, 0xAD81, 0xADA1, 0xADA5, 0xADC3, 0xADCB, 0xADD1,
+    0xADD5, 0xADDB, 0xADE7, 0xADF3, 0xADF5, 0xADF9, 0xADFF, 0xAE05,
+    0xAE13, 0xAE23, 0xAE2B, 0xAE49, 0xAE4D, 0xAE4F, 0xAE59, 0xAE61,
+    0xAE67, 0xAE6B, 0xAE71, 0xAE8B, 0xAE8F, 0xAE9B, 0xAE9D, 0xAEA7,
+    0xAEB9, 0xAEC5, 0xAED1, 0xAEE3, 0xAEE5, 0xAEE9, 0xAEF5, 0xAEFD,
+    0xAF09, 0xAF13, 0xAF27, 0xAF2B, 0xAF33, 0xAF43, 0xAF4F, 0xAF57,
+    0xAF5D, 0xAF6D, 0xAF75, 0xAF7F, 0xAF8B, 0xAF99, 0xAF9F, 0xAFA3,
+    0xAFAB, 0xAFB7, 0xAFBB, 0xAFCF, 0xAFD5, 0xAFFD, 0xB005, 0xB015,
+    0xB01B, 0xB03F, 0xB041, 0xB047, 0xB04B, 0xB051, 0xB053, 0xB069,
+    0xB07B, 0xB07D, 0xB087, 0xB08D, 0xB0B1, 0xB0BF, 0xB0CB, 0xB0CF,
+    0xB0E1, 0xB0E9, 0xB0ED, 0xB0FB, 0xB105, 0xB107, 0xB111, 0xB119,
+    0xB11D, 0xB11F, 0xB131, 0xB141, 0xB14D, 0xB15B, 0xB165, 0xB173,
+    0xB179, 0xB17F, 0xB1A9, 0xB1B3, 0xB1B9, 0xB1BF, 0xB1D3, 0xB1DD,
+    0xB1E5, 0xB1F1, 0xB1F5, 0xB201, 0xB213, 0xB215, 0xB21F, 0xB22D,
+    0xB23F, 0xB249, 0xB25B, 0xB263, 0xB269, 0xB26D, 0xB27B, 0xB281,
+    0xB28B, 0xB2A9, 0xB2B7, 0xB2BD, 0xB2C3, 0xB2C7, 0xB2D3, 0xB2F9,
+    0xB2FD, 0xB2FF, 0xB303, 0xB309, 0xB311, 0xB31D, 0xB327, 0xB32D,
+    0xB33F, 0xB345, 0xB377, 0xB37D, 0xB381, 0xB387, 0xB393, 0xB39B,
+    0xB3A5, 0xB3C5, 0xB3CB, 0xB3E1, 0xB3E3, 0xB3ED, 0xB3F9, 0xB40B,
+    0xB40D, 0xB413, 0xB417, 0xB435, 0xB43D, 0xB443, 0xB449, 0xB45B,
+    0xB465, 0xB467, 0xB46B, 0xB477, 0xB48B, 0xB495, 0xB49D, 0xB4B5,
+    0xB4BF, 0xB4C1, 0xB4C7, 0xB4DD, 0xB4E3, 0xB4E5, 0xB4F7, 0xB501,
+    0xB50D, 0xB50F, 0xB52D, 0xB53F, 0xB54B, 0xB567, 0xB569, 0xB56F,
+    0xB573, 0xB579, 0xB587, 0xB58D, 0xB599, 0xB5A3, 0xB5AB, 0xB5AF,
+    0xB5BB, 0xB5D5, 0xB5DF, 0xB5E7, 0xB5ED, 0xB5FD, 0xB5FF, 0xB609,
+    0xB61B, 0xB629, 0xB62F, 0xB633, 0xB639, 0xB647, 0xB657, 0xB659,
+    0xB65F, 0xB663, 0xB66F, 0xB683, 0xB687, 0xB69B, 0xB69F, 0xB6A5,
+    0xB6B1, 0xB6B3, 0xB6D7, 0xB6DB, 0xB6E1, 0xB6E3, 0xB6ED, 0xB6EF,
+    0xB705, 0xB70D, 0xB713, 0xB71D, 0xB729, 0xB735, 0xB747, 0xB755,
+    0xB76D, 0xB791, 0xB795, 0xB7A9, 0xB7C1, 0xB7CB, 0xB7D1, 0xB7D3,
+    0xB7EF, 0xB7F5, 0xB807, 0xB80F, 0xB813, 0xB819, 0xB821, 0xB827,
+    0xB82B, 0xB82D, 0xB839, 0xB855, 0xB867, 0xB875, 0xB885, 0xB893,
+    0xB8A5, 0xB8AF, 0xB8B7, 0xB8BD, 0xB8C1, 0xB8C7, 0xB8CD, 0xB8D5,
+    0xB8EB, 0xB8F7, 0xB8F9, 0xB903, 0xB915, 0xB91B, 0xB91D, 0xB92F,
+    0xB939, 0xB93B, 0xB947, 0xB951, 0xB963, 0xB983, 0xB989, 0xB98D,
+    0xB993, 0xB999, 0xB9A1, 0xB9A7, 0xB9AD, 0xB9B7, 0xB9CB, 0xB9D1,
+    0xB9DD, 0xB9E7, 0xB9EF, 0xB9F9, 0xBA07, 0xBA0D, 0xBA17, 0xBA25,
+    0xBA29, 0xBA2B, 0xBA41, 0xBA53, 0xBA55, 0xBA5F, 0xBA61, 0xBA65,
+    0xBA79, 0xBA7D, 0xBA7F, 0xBAA1, 0xBAA3, 0xBAAF, 0xBAB5, 0xBABF,
+    0xBAC1, 0xBACB, 0xBADD, 0xBAE3, 0xBAF1, 0xBAFD, 0xBB09, 0xBB1F,
+    0xBB27, 0xBB2D, 0xBB3D, 0xBB43, 0xBB4B, 0xBB4F, 0xBB5B, 0xBB61,
+    0xBB69, 0xBB6D, 0xBB91, 0xBB97, 0xBB9D, 0xBBB1, 0xBBC9, 0xBBCF,
+    0xBBDB, 0xBBED, 0xBBF7, 0xBBF9, 0xBC03, 0xBC1D, 0xBC23, 0xBC33,
+    0xBC3B, 0xBC41, 0xBC45, 0xBC5D, 0xBC6F, 0xBC77, 0xBC83, 0xBC8F,
+    0xBC99, 0xBCAB, 0xBCB7, 0xBCB9, 0xBCD1, 0xBCD5, 0xBCE1, 0xBCF3,
+    0xBCFF, 0xBD0D, 0xBD17, 0xBD19, 0xBD1D, 0xBD35, 0xBD41, 0xBD4F,
+    0xBD59, 0xBD5F, 0xBD61, 0xBD67, 0xBD6B, 0xBD71, 0xBD8B, 0xBD8F,
+    0xBD95, 0xBD9B, 0xBD9D, 0xBDB3, 0xBDBB, 0xBDCD, 0xBDD1, 0xBDE3,
+    0xBDEB, 0xBDEF, 0xBE07, 0xBE09, 0xBE15, 0xBE21, 0xBE25, 0xBE27,
+    0xBE5B, 0xBE5D, 0xBE6F, 0xBE75, 0xBE79, 0xBE7F, 0xBE8B, 0xBE8D,
+    0xBE93, 0xBE9F, 0xBEA9, 0xBEB1, 0xBEB5, 0xBEB7, 0xBECF, 0xBED9,
+    0xBEDB, 0xBEE5, 0xBEE7, 0xBEF3, 0xBEF9, 0xBF0B, 0xBF33, 0xBF39,
+    0xBF4D, 0xBF5D, 0xBF5F, 0xBF6B, 0xBF71, 0xBF7B, 0xBF87, 0xBF89,
+    0xBF8D, 0xBF93, 0xBFA1, 0xBFAD, 0xBFB9, 0xBFCF, 0xBFD5, 0xBFDD,
+    0xBFE1, 0xBFE3, 0xBFF3, 0xC005, 0xC011, 0xC013, 0xC019, 0xC029,
+    0xC02F, 0xC031, 0xC037, 0xC03B, 0xC047, 0xC065, 0xC06D, 0xC07D,
+    0xC07F, 0xC091, 0xC09B, 0xC0B3, 0xC0B5, 0xC0BB, 0xC0D3, 0xC0D7,
+    0xC0D9, 0xC0EF, 0xC0F1, 0xC101, 0xC103, 0xC109, 0xC115, 0xC119,
+    0xC12B, 0xC133, 0xC137, 0xC145, 0xC149, 0xC15B, 0xC173, 0xC179,
+    0xC17B, 0xC181, 0xC18B, 0xC18D, 0xC197, 0xC1BD, 0xC1C3, 0xC1CD,
+    0xC1DB, 0xC1E1, 0xC1E7, 0xC1FF, 0xC203, 0xC205, 0xC211, 0xC221,
+    0xC22F, 0xC23F, 0xC24B, 0xC24D, 0xC253, 0xC25D, 0xC277, 0xC27B,
+    0xC27D, 0xC289, 0xC28F, 0xC293, 0xC29F, 0xC2A7, 0xC2B3, 0xC2BD,
+    0xC2CF, 0xC2D5, 0xC2E3, 0xC2FF, 0xC301, 0xC307, 0xC311, 0xC313,
+    0xC317, 0xC325, 0xC347, 0xC349, 0xC34F, 0xC365, 0xC367, 0xC371,
+    0xC37F, 0xC383, 0xC385, 0xC395, 0xC39D, 0xC3A7, 0xC3AD, 0xC3B5,
+    0xC3BF, 0xC3C7, 0xC3CB, 0xC3D1, 0xC3D3, 0xC3E3, 0xC3E9, 0xC3EF,
+    0xC401, 0xC41F, 0xC42D, 0xC433, 0xC437, 0xC455, 0xC457, 0xC461,
+    0xC46F, 0xC473, 0xC487, 0xC491, 0xC499, 0xC49D, 0xC4A5, 0xC4B7,
+    0xC4BB, 0xC4C9, 0xC4CF, 0xC4D3, 0xC4EB, 0xC4F1, 0xC4F7, 0xC509,
+    0xC51B, 0xC51D, 0xC541, 0xC547, 0xC551, 0xC55F, 0xC56B, 0xC56F,
+    0xC575, 0xC577, 0xC595, 0xC59B, 0xC59F, 0xC5A1, 0xC5A7, 0xC5C3,
+    0xC5D7, 0xC5DB, 0xC5EF, 0xC5FB, 0xC613, 0xC623, 0xC635, 0xC641,
+    0xC64F, 0xC655, 0xC659, 0xC665, 0xC685, 0xC691, 0xC697, 0xC6A1,
+    0xC6A9, 0xC6B3, 0xC6B9, 0xC6CB, 0xC6CD, 0xC6DD, 0xC6EB, 0xC6F1,
+    0xC707, 0xC70D, 0xC719, 0xC71B, 0xC72D, 0xC731, 0xC739, 0xC757,
+    0xC763, 0xC767, 0xC773, 0xC775, 0xC77F, 0xC7A5, 0xC7BB, 0xC7BD,
+    0xC7C1, 0xC7CF, 0xC7D5, 0xC7E1, 0xC7F9, 0xC7FD, 0xC7FF, 0xC803,
+    0xC811, 0xC81D, 0xC827, 0xC829, 0xC839, 0xC83F, 0xC853, 0xC857,
+    0xC86B, 0xC881, 0xC88D, 0xC88F, 0xC893, 0xC895, 0xC8A1, 0xC8B7,
+    0xC8CF, 0xC8D5, 0xC8DB, 0xC8DD, 0xC8E3, 0xC8E7, 0xC8ED, 0xC8EF,
+    0xC8F9, 0xC905, 0xC911, 0xC917, 0xC919, 0xC91F, 0xC92F, 0xC937,
+    0xC93D, 0xC941, 0xC953, 0xC95F, 0xC96B, 0xC979, 0xC97D, 0xC989,
+    0xC98F, 0xC997, 0xC99D, 0xC9AF, 0xC9B5, 0xC9BF, 0xC9CB, 0xC9D9,
+    0xC9DF, 0xC9E3, 0xC9EB, 0xCA01, 0xCA07, 0xCA09, 0xCA25, 0xCA37,
+    0xCA39, 0xCA4B, 0xCA55, 0xCA5B, 0xCA69, 0xCA73, 0xCA75, 0xCA7F,
+    0xCA8D, 0xCA93, 0xCA9D, 0xCA9F, 0xCAB5, 0xCABB, 0xCAC3, 0xCAC9,
+    0xCAD9, 0xCAE5, 0xCAED, 0xCB03, 0xCB05, 0xCB09, 0xCB17, 0xCB29,
+    0xCB35, 0xCB3B, 0xCB53, 0xCB59, 0xCB63, 0xCB65, 0xCB71, 0xCB87,
+    0xCB99, 0xCB9F, 0xCBB3, 0xCBB9, 0xCBC3, 0xCBD1, 0xCBD5, 0xCBD7,
+    0xCBDD, 0xCBE9, 0xCBFF, 0xCC0D, 0xCC19, 0xCC1D, 0xCC23, 0xCC2B,
+    0xCC41, 0xCC43, 0xCC4D, 0xCC59, 0xCC61, 0xCC89, 0xCC8B, 0xCC91,
+    0xCC9B, 0xCCA3, 0xCCA7, 0xCCD1, 0xCCE5, 0xCCE9, 0xCD09, 0xCD15,
+    0xCD1F, 0xCD25, 0xCD31, 0xCD3D, 0xCD3F, 0xCD49, 0xCD51, 0xCD57,
+    0xCD5B, 0xCD63, 0xCD67, 0xCD81, 0xCD93, 0xCD97, 0xCD9F, 0xCDBB,
+    0xCDC1, 0xCDD3, 0xCDD9, 0xCDE5, 0xCDE7, 0xCDF1, 0xCDF7, 0xCDFD,
+    0xCE0B, 0xCE15, 0xCE21, 0xCE2F, 0xCE47, 0xCE4D, 0xCE51, 0xCE65,
+    0xCE7B, 0xCE7D, 0xCE8F, 0xCE93, 0xCE99, 0xCEA5, 0xCEA7, 0xCEB7,
+    0xCEC9, 0xCED7, 0xCEDD, 0xCEE3, 0xCEE7, 0xCEED, 0xCEF5, 0xCF07,
+    0xCF0B, 0xCF19, 0xCF37, 0xCF3B, 0xCF4D, 0xCF55, 0xCF5F, 0xCF61,
+    0xCF65, 0xCF6D, 0xCF79, 0xCF7D, 0xCF89, 0xCF9B, 0xCF9D, 0xCFA9,
+    0xCFB3, 0xCFB5, 0xCFC5, 0xCFCD, 0xCFD1, 0xCFEF, 0xCFF1, 0xCFF7,
+    0xD013, 0xD015, 0xD01F, 0xD021, 0xD033, 0xD03D, 0xD04B, 0xD04F,
+    0xD069, 0xD06F, 0xD081, 0xD085, 0xD099, 0xD09F, 0xD0A3, 0xD0AB,
+    0xD0BD, 0xD0C1, 0xD0CD, 0xD0E7, 0xD0FF, 0xD103, 0xD117, 0xD12D,
+    0xD12F, 0xD141, 0xD157, 0xD159, 0xD15D, 0xD169, 0xD16B, 0xD171,
+    0xD177, 0xD17D, 0xD181, 0xD187, 0xD195, 0xD199, 0xD1B1, 0xD1BD,
+    0xD1C3, 0xD1D5, 0xD1D7, 0xD1E3, 0xD1FF, 0xD20D, 0xD211, 0xD217,
+    0xD21F, 0xD235, 0xD23B, 0xD247, 0xD259, 0xD261, 0xD265, 0xD279,
+    0xD27F, 0xD283, 0xD289, 0xD28B, 0xD29D, 0xD2A3, 0xD2A7, 0xD2B3,
+    0xD2BF, 0xD2C7, 0xD2E3, 0xD2E9, 0xD2F1, 0xD2FB, 0xD2FD, 0xD315,
+    0xD321, 0xD32B, 0xD343, 0xD34B, 0xD355, 0xD369, 0xD375, 0xD37B,
+    0xD387, 0xD393, 0xD397, 0xD3A5, 0xD3B1, 0xD3C9, 0xD3EB, 0xD3FD,
+    0xD405, 0xD40F, 0xD415, 0xD427, 0xD42F, 0xD433, 0xD43B, 0xD44B,
+    0xD459, 0xD45F, 0xD463, 0xD469, 0xD481, 0xD483, 0xD489, 0xD48D,
+    0xD493, 0xD495, 0xD4A5, 0xD4AB, 0xD4B1, 0xD4C5, 0xD4DD, 0xD4E1,
+    0xD4E3, 0xD4E7, 0xD4F5, 0xD4F9, 0xD50B, 0xD50D, 0xD513, 0xD51F,
+    0xD523, 0xD531, 0xD535, 0xD537, 0xD549, 0xD559, 0xD55F, 0xD565,
+    0xD567, 0xD577, 0xD58B, 0xD591, 0xD597, 0xD5B5, 0xD5B9, 0xD5C1,
+    0xD5C7, 0xD5DF, 0xD5EF, 0xD5F5, 0xD5FB, 0xD603, 0xD60F, 0xD62D,
+    0xD631, 0xD643, 0xD655, 0xD65D, 0xD661, 0xD67B, 0xD685, 0xD687,
+    0xD69D, 0xD6A5, 0xD6AF, 0xD6BD, 0xD6C3, 0xD6C7, 0xD6D9, 0xD6E1,
+    0xD6ED, 0xD709, 0xD70B, 0xD711, 0xD715, 0xD721, 0xD727, 0xD73F,
+    0xD745, 0xD74D, 0xD757, 0xD76B, 0xD77B, 0xD783, 0xD7A1, 0xD7A7,
+    0xD7AD, 0xD7B1, 0xD7B3, 0xD7BD, 0xD7CB, 0xD7D1, 0xD7DB, 0xD7FB,
+    0xD811, 0xD823, 0xD825, 0xD829, 0xD82B, 0xD82F, 0xD837, 0xD84D,
+    0xD855, 0xD867, 0xD873, 0xD88F, 0xD891, 0xD8A1, 0xD8AD, 0xD8BF,
+    0xD8CD, 0xD8D7, 0xD8E9, 0xD8F5, 0xD8FB, 0xD91B, 0xD925, 0xD933,
+    0xD939, 0xD943, 0xD945, 0xD94F, 0xD951, 0xD957, 0xD96D, 0xD96F,
+    0xD973, 0xD979, 0xD981, 0xD98B, 0xD991, 0xD99F, 0xD9A5, 0xD9A9,
+    0xD9B5, 0xD9D3, 0xD9EB, 0xD9F1, 0xD9F7, 0xD9FF, 0xDA05, 0xDA09,
+    0xDA0B, 0xDA0F, 0xDA15, 0xDA1D, 0xDA23, 0xDA29, 0xDA3F, 0xDA51,
+    0xDA59, 0xDA5D, 0xDA5F, 0xDA71, 0xDA77, 0xDA7B, 0xDA7D, 0xDA8D,
+    0xDA9F, 0xDAB3, 0xDABD, 0xDAC3, 0xDAC9, 0xDAE7, 0xDAE9, 0xDAF5,
+    0xDB11, 0xDB17, 0xDB1D, 0xDB23, 0xDB25, 0xDB31, 0xDB3B, 0xDB43,
+    0xDB55, 0xDB67, 0xDB6B, 0xDB73, 0xDB85, 0xDB8F, 0xDB91, 0xDBAD,
+    0xDBAF, 0xDBB9, 0xDBC7, 0xDBCB, 0xDBCD, 0xDBEB, 0xDBF7, 0xDC0D,
+    0xDC27, 0xDC31, 0xDC39, 0xDC3F, 0xDC49, 0xDC51, 0xDC61, 0xDC6F,
+    0xDC75, 0xDC7B, 0xDC85, 0xDC93, 0xDC99, 0xDC9D, 0xDC9F, 0xDCA9,
+    0xDCB5, 0xDCB7, 0xDCBD, 0xDCC7, 0xDCCF, 0xDCD3, 0xDCD5, 0xDCDF,
+    0xDCF9, 0xDD0F, 0xDD15, 0xDD17, 0xDD23, 0xDD35, 0xDD39, 0xDD53,
+    0xDD57, 0xDD5F, 0xDD69, 0xDD6F, 0xDD7D, 0xDD87, 0xDD89, 0xDD9B,
+    0xDDA1, 0xDDAB, 0xDDBF, 0xDDC5, 0xDDCB, 0xDDCF, 0xDDE7, 0xDDE9,
+    0xDDED, 0xDDF5, 0xDDFB, 0xDE0B, 0xDE19, 0xDE29, 0xDE3B, 0xDE3D,
+    0xDE41, 0xDE4D, 0xDE4F, 0xDE59, 0xDE5B, 0xDE61, 0xDE6D, 0xDE77,
+    0xDE7D, 0xDE83, 0xDE97, 0xDE9D, 0xDEA1, 0xDEA7, 0xDECD, 0xDED1,
+    0xDED7, 0xDEE3, 0xDEF1, 0xDEF5, 0xDF01, 0xDF09, 0xDF13, 0xDF1F,
+    0xDF2B, 0xDF33, 0xDF37, 0xDF3D, 0xDF4B, 0xDF55, 0xDF5B, 0xDF67,
+    0xDF69, 0xDF73, 0xDF85, 0xDF87, 0xDF99, 0xDFA3, 0xDFAB, 0xDFB5,
+    0xDFB7, 0xDFC3, 0xDFC7, 0xDFD5, 0xDFF1, 0xDFF3, 0xE003, 0xE005,
+    0xE017, 0xE01D, 0xE027, 0xE02D, 0xE035, 0xE045, 0xE053, 0xE071,
+    0xE07B, 0xE08F, 0xE095, 0xE09F, 0xE0B7, 0xE0B9, 0xE0D5, 0xE0D7,
+    0xE0E3, 0xE0F3, 0xE0F9, 0xE101, 0xE125, 0xE129, 0xE131, 0xE135,
+    0xE143, 0xE14F, 0xE159, 0xE161, 0xE16D, 0xE171, 0xE177, 0xE17F,
+    0xE183, 0xE189, 0xE197, 0xE1AD, 0xE1B5, 0xE1BB, 0xE1BF, 0xE1C1,
+    0xE1CB, 0xE1D1, 0xE1E5, 0xE1EF, 0xE1F7, 0xE1FD, 0xE203, 0xE219,
+    0xE22B, 0xE22D, 0xE23D, 0xE243, 0xE257, 0xE25B, 0xE275, 0xE279,
+    0xE287, 0xE29D, 0xE2AB, 0xE2AF, 0xE2BB, 0xE2C1, 0xE2C9, 0xE2CD,
+    0xE2D3, 0xE2D9, 0xE2F3, 0xE2FD, 0xE2FF, 0xE311, 0xE323, 0xE327,
+    0xE329, 0xE339, 0xE33B, 0xE34D, 0xE351, 0xE357, 0xE35F, 0xE363,
+    0xE369, 0xE375, 0xE377, 0xE37D, 0xE383, 0xE39F, 0xE3C5, 0xE3C9,
+    0xE3D1, 0xE3E1, 0xE3FB, 0xE3FF, 0xE401, 0xE40B, 0xE417, 0xE419,
+    0xE423, 0xE42B, 0xE431, 0xE43B, 0xE447, 0xE449, 0xE453, 0xE455,
+    0xE46D, 0xE471, 0xE48F, 0xE4A9, 0xE4AF, 0xE4B5, 0xE4C7, 0xE4CD,
+    0xE4D3, 0xE4E9, 0xE4EB, 0xE4F5, 0xE507, 0xE521, 0xE525, 0xE537,
+    0xE53F, 0xE545, 0xE54B, 0xE557, 0xE567, 0xE56D, 0xE575, 0xE585,
+    0xE58B, 0xE593, 0xE5A3, 0xE5A5, 0xE5CF, 0xE609, 0xE611, 0xE615,
+    0xE61B, 0xE61D, 0xE621, 0xE629, 0xE639, 0xE63F, 0xE653, 0xE657,
+    0xE663, 0xE66F, 0xE675, 0xE681, 0xE683, 0xE68D, 0xE68F, 0xE695,
+    0xE6AB, 0xE6AD, 0xE6B7, 0xE6BD, 0xE6C5, 0xE6CB, 0xE6D5, 0xE6E3,
+    0xE6E9, 0xE6EF, 0xE6F3, 0xE705, 0xE70D, 0xE717, 0xE71F, 0xE72F,
+    0xE73D, 0xE747, 0xE749, 0xE753, 0xE755, 0xE761, 0xE767, 0xE76B,
+    0xE77F, 0xE789, 0xE791, 0xE7C5, 0xE7CD, 0xE7D7, 0xE7DD, 0xE7DF,
+    0xE7E9, 0xE7F1, 0xE7FB, 0xE801, 0xE807, 0xE80F, 0xE819, 0xE81B,
+    0xE831, 0xE833, 0xE837, 0xE83D, 0xE84B, 0xE84F, 0xE851, 0xE869,
+    0xE875, 0xE879, 0xE893, 0xE8A5, 0xE8A9, 0xE8AF, 0xE8BD, 0xE8DB,
+    0xE8E1, 0xE8E5, 0xE8EB, 0xE8ED, 0xE903, 0xE90B, 0xE90F, 0xE915,
+    0xE917, 0xE92D, 0xE933, 0xE93B, 0xE94B, 0xE951, 0xE95F, 0xE963,
+    0xE969, 0xE97B, 0xE983, 0xE98F, 0xE995, 0xE9A1, 0xE9B9, 0xE9D7,
+    0xE9E7, 0xE9EF, 0xEA11, 0xEA19, 0xEA2F, 0xEA35, 0xEA43, 0xEA4D,
+    0xEA5F, 0xEA6D, 0xEA71, 0xEA7D, 0xEA85, 0xEA89, 0xEAAD, 0xEAB3,
+    0xEAB9, 0xEABB, 0xEAC5, 0xEAC7, 0xEACB, 0xEADF, 0xEAE5, 0xEAEB,
+    0xEAF5, 0xEB01, 0xEB07, 0xEB09, 0xEB31, 0xEB39, 0xEB3F, 0xEB5B,
+    0xEB61, 0xEB63, 0xEB6F, 0xEB81, 0xEB85, 0xEB9D, 0xEBAB, 0xEBB1,
+    0xEBB7, 0xEBC1, 0xEBD5, 0xEBDF, 0xEBED, 0xEBFD, 0xEC0B, 0xEC1B,
+    0xEC21, 0xEC29, 0xEC4D, 0xEC51, 0xEC5D, 0xEC69, 0xEC6F, 0xEC7B,
+    0xECAD, 0xECB9, 0xECBF, 0xECC3, 0xECC9, 0xECCF, 0xECD7, 0xECDD,
+    0xECE7, 0xECE9, 0xECF3, 0xECF5, 0xED07, 0xED11, 0xED1F, 0xED2F,
+    0xED37, 0xED3D, 0xED41, 0xED55, 0xED59, 0xED5B, 0xED65, 0xED6B,
+    0xED79, 0xED8B, 0xED95, 0xEDBB, 0xEDC5, 0xEDD7, 0xEDD9, 0xEDE3,
+    0xEDE5, 0xEDF1, 0xEDF5, 0xEDF7, 0xEDFB, 0xEE09, 0xEE0F, 0xEE19,
+    0xEE21, 0xEE49, 0xEE4F, 0xEE63, 0xEE67, 0xEE73, 0xEE7B, 0xEE81,
+    0xEEA3, 0xEEAB, 0xEEC1, 0xEEC9, 0xEED5, 0xEEDF, 0xEEE1, 0xEEF1,
+    0xEF1B, 0xEF27, 0xEF2F, 0xEF45, 0xEF4D, 0xEF63, 0xEF6B, 0xEF71,
+    0xEF93, 0xEF95, 0xEF9B, 0xEF9F, 0xEFAD, 0xEFB3, 0xEFC3, 0xEFC5,
+    0xEFDB, 0xEFE1, 0xEFE9, 0xF001, 0xF017, 0xF01D, 0xF01F, 0xF02B,
+    0xF02F, 0xF035, 0xF043, 0xF047, 0xF04F, 0xF067, 0xF06B, 0xF071,
+    0xF077, 0xF079, 0xF08F, 0xF0A3, 0xF0A9, 0xF0AD, 0xF0BB, 0xF0BF,
+    0xF0C5, 0xF0CB, 0xF0D3, 0xF0D9, 0xF0E3, 0xF0E9, 0xF0F1, 0xF0F7,
+    0xF107, 0xF115, 0xF11B, 0xF121, 0xF137, 0xF13D, 0xF155, 0xF175,
+    0xF17B, 0xF18D, 0xF193, 0xF1A5, 0xF1AF, 0xF1B7, 0xF1D5, 0xF1E7,
+    0xF1ED, 0xF1FD, 0xF209, 0xF20F, 0xF21B, 0xF21D, 0xF223, 0xF227,
+    0xF233, 0xF23B, 0xF241, 0xF257, 0xF25F, 0xF265, 0xF269, 0xF277,
+    0xF281, 0xF293, 0xF2A7, 0xF2B1, 0xF2B3, 0xF2B9, 0xF2BD, 0xF2BF,
+    0xF2DB, 0xF2ED, 0xF2EF, 0xF2F9, 0xF2FF, 0xF305, 0xF30B, 0xF319,
+    0xF341, 0xF359, 0xF35B, 0xF35F, 0xF367, 0xF373, 0xF377, 0xF38B,
+    0xF38F, 0xF3AF, 0xF3C1, 0xF3D1, 0xF3D7, 0xF3FB, 0xF403, 0xF409,
+    0xF40D, 0xF413, 0xF421, 0xF425, 0xF42B, 0xF445, 0xF44B, 0xF455,
+    0xF463, 0xF475, 0xF47F, 0xF485, 0xF48B, 0xF499, 0xF4A3, 0xF4A9,
+    0xF4AF, 0xF4BD, 0xF4C3, 0xF4DB, 0xF4DF, 0xF4ED, 0xF503, 0xF50B,
+    0xF517, 0xF521, 0xF529, 0xF535, 0xF547, 0xF551, 0xF563, 0xF56B,
+    0xF583, 0xF58D, 0xF595, 0xF599, 0xF5B1, 0xF5B7, 0xF5C9, 0xF5CF,
+    0xF5D1, 0xF5DB, 0xF5F9, 0xF5FB, 0xF605, 0xF607, 0xF60B, 0xF60D,
+    0xF635, 0xF637, 0xF653, 0xF65B, 0xF661, 0xF667, 0xF679, 0xF67F,
+    0xF689, 0xF697, 0xF69B, 0xF6AD, 0xF6CB, 0xF6DD, 0xF6DF, 0xF6EB,
+    0xF709, 0xF70F, 0xF72D, 0xF731, 0xF743, 0xF74F, 0xF751, 0xF755,
+    0xF763, 0xF769, 0xF773, 0xF779, 0xF781, 0xF787, 0xF791, 0xF79D,
+    0xF79F, 0xF7A5, 0xF7B1, 0xF7BB, 0xF7BD, 0xF7CF, 0xF7D3, 0xF7E7,
+    0xF7EB, 0xF7F1, 0xF7FF, 0xF805, 0xF80B, 0xF821, 0xF827, 0xF82D,
+    0xF835, 0xF847, 0xF859, 0xF863, 0xF865, 0xF86F, 0xF871, 0xF877,
+    0xF87B, 0xF881, 0xF88D, 0xF89F, 0xF8A1, 0xF8AB, 0xF8B3, 0xF8B7,
+    0xF8C9, 0xF8CB, 0xF8D1, 0xF8D7, 0xF8DD, 0xF8E7, 0xF8EF, 0xF8F9,
+    0xF8FF, 0xF911, 0xF91D, 0xF925, 0xF931, 0xF937, 0xF93B, 0xF941,
+    0xF94F, 0xF95F, 0xF961, 0xF96D, 0xF971, 0xF977, 0xF99D, 0xF9A3,
+    0xF9A9, 0xF9B9, 0xF9CD, 0xF9E9, 0xF9FD, 0xFA07, 0xFA0D, 0xFA13,
+    0xFA21, 0xFA25, 0xFA3F, 0xFA43, 0xFA51, 0xFA5B, 0xFA6D, 0xFA7B,
+    0xFA97, 0xFA99, 0xFA9D, 0xFAAB, 0xFABB, 0xFABD, 0xFAD9, 0xFADF,
+    0xFAE7, 0xFAED, 0xFB0F, 0xFB17, 0xFB1B, 0xFB2D, 0xFB2F, 0xFB3F,
+    0xFB47, 0xFB4D, 0xFB75, 0xFB7D, 0xFB8F, 0xFB93, 0xFBB1, 0xFBB7,
+    0xFBC3, 0xFBC5, 0xFBE3, 0xFBE9, 0xFBF3, 0xFC01, 0xFC29, 0xFC37,
+    0xFC41, 0xFC43, 0xFC4F, 0xFC59, 0xFC61, 0xFC65, 0xFC6D, 0xFC73,
+    0xFC79, 0xFC95, 0xFC97, 0xFC9B, 0xFCA7, 0xFCB5, 0xFCC5, 0xFCCD,
+    0xFCEB, 0xFCFB, 0xFD0D, 0xFD0F, 0xFD19, 0xFD2B, 0xFD31, 0xFD51,
+    0xFD55, 0xFD67, 0xFD6D, 0xFD6F, 0xFD7B, 0xFD85, 0xFD97, 0xFD99,
+    0xFD9F, 0xFDA9, 0xFDB7, 0xFDC9, 0xFDE5, 0xFDEB, 0xFDF3, 0xFE03,
+    0xFE05, 0xFE09, 0xFE1D, 0xFE27, 0xFE2F, 0xFE41, 0xFE4B, 0xFE4D,
+    0xFE57, 0xFE5F, 0xFE63, 0xFE69, 0xFE75, 0xFE7B, 0xFE8F, 0xFE93,
+    0xFE95, 0xFE9B, 0xFE9F, 0xFEB3, 0xFEBD, 0xFED7, 0xFEE9, 0xFEF3,
+    0xFEF5, 0xFF07, 0xFF0D, 0xFF1D, 0xFF2B, 0xFF2F, 0xFF49, 0xFF4D,
+    0xFF5B, 0xFF65, 0xFF71, 0xFF7F, 0xFF85, 0xFF8B, 0xFF8F, 0xFF9D,
+    0xFFA7, 0xFFA9, 0xFFC7, 0xFFD9, 0xFFEF, 0xFFF1,
+#endif
+};
diff --git a/security/nss/lib/freebl/mpi/stats b/security/nss/lib/freebl/mpi/stats
new file mode 100755
index 000000000..a5deb94c0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/stats
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+
+#
+# Treat each line as a sequence of comma and/or space delimited
+# floating point numbers, and compute basic statistics on them.
+# These are written to standard output
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+$min = 1.7976931348623157E+308;
+$max = 2.2250738585072014E-308;
+$sum = $num = 0;
+
+while(<>) {
+    chomp;
+
+    @nums = split(/[\s,]+/, $_);
+    next if($#nums < 0);
+
+    $num += scalar @nums;
+    foreach (@nums) {
+	$min = $_ if($_ < $min);
+	$max = $_ if($_ > $max);
+	$sum += $_;
+    }
+}
+
+if($num) {
+    $avg = $sum / $num;
+} else {
+    $min = $max = 0;
+}
+
+printf "%d\tmin=%.2f, avg=%.2f, max=%.2f, sum=%.2f\n",
+    $num, $min, $avg, $max, $sum;
+
+# end
diff --git a/security/nss/lib/freebl/mpi/target.mk b/security/nss/lib/freebl/mpi/target.mk
new file mode 100644
index 000000000..dd74564b1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/target.mk
@@ -0,0 +1,233 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+##
+## Define CFLAGS to contain any local options your compiler
+## setup requires.
+##
+## Conditional compilation options are no longer here; see
+## the file 'mpi-config.h' instead.
+##
+MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC
+CFLAGS= -O $(MPICMN)
+#CFLAGS=-ansi -fullwarn -woff 1521 -O3 $(MPICMN)
+#CFLAGS=-ansi -pedantic -Wall -O3 $(MPICMN)
+#CFLAGS=-ansi -pedantic -Wall -g -O2 -DMP_DEBUG=1 $(MPICMN)
+
+ifeq ($(TARGET),mipsIRIX)
+#IRIX
+#MPICMN += -DMP_MONT_USE_MP_MUL 
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+MPICMN += -DMP_USE_UINT_DIGIT
+#MPICMN += -DMP_NO_MP_WORD
+AS_OBJS = mpi_mips.o
+#ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 -exceptions
+ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 
+#CFLAGS=-ansi -n32 -O3 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
+CFLAGS=-ansi -n32 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
+#CFLAGS=-ansi -n32 -g -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
+#CFLAGS=-ansi -64 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE -DMP_NO_MP_WORD \
+ $(MPICMN)
+endif
+
+ifeq ($(TARGET),alphaOSF1)
+#Alpha/OSF1
+MPICMN += -DMP_ASSEMBLY_MULTIPLY
+AS_OBJS+= mpvalpha.o
+#CFLAGS= -O -Olimit 4000 -ieee_with_inexact -std1 -DOSF1 -D_REENTRANT $(MPICMN)
+CFLAGS= -O -Olimit 4000 -ieee_with_inexact -std1 -DOSF1 -D_REENTRANT \
+ -DMP_NO_MP_WORD $(MPICMN)
+endif
+
+ifeq ($(TARGET),v9SOLARIS)
+#Solaris 64
+SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xchip=ultra -xarch=v9a -KPIC -mt
+#SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v9a -KPIC -mt
+SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v9a -KPIC -mt 
+AS_OBJS += montmulfv9.o 
+AS_OBJS += mpi_sparc.o mpv_sparcv9.o
+MPICMN += -DMP_USE_UINT_DIGIT 
+#MPICMN += -DMP_NO_MP_WORD 
+MPICMN += -DMP_ASSEMBLY_MULTIPLY 
+MPICMN += -DMP_USING_MONT_MULF
+CFLAGS= -O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
+ -DSOLARIS2_8 -xarch=v9 -DXP_UNIX $(MPICMN)
+#CFLAGS= -g -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
+ -DSOLARIS2_8 -xarch=v9 -DXP_UNIX $(MPICMN)
+endif
+
+ifeq ($(TARGET),v8plusSOLARIS)
+#Solaris 32
+SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v8plusa -KPIC -mt
+SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v8plusa -KPIC -mt 
+AS_OBJS += montmulfv8.o 
+AS_OBJS += mpi_sparc.o mpv_sparcv8.o
+#AS_OBJS = montmulf.o
+MPICMN += -DMP_ASSEMBLY_MULTIPLY 
+MPICMN += -DMP_USING_MONT_MULF 
+MPICMN += -DMP_USE_UINT_DIGIT
+MPICMN += -DMP_NO_MP_WORD
+CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
+ -DSOLARIS2_6 -xarch=v8plus -DXP_UNIX $(MPICMN)
+endif
+
+ifeq ($(TARGET),v8SOLARIS)
+#Solaris 32
+#SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v8 -KPIC -mt
+#SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v8plusa -KPIC -mt 
+#AS_OBJS = montmulfv8.o mpi_sparc.o mpv_sparcv8.o
+#AS_OBJS = montmulf.o
+#MPICMN += -DMP_USING_MONT_MULF
+#MPICMN += -DMP_ASSEMBLY_MULTIPLY 
+MPICMN += -DMP_USE_LONG_LONG_MULTIPLY -DMP_USE_UINT_DIGIT
+MPICMN += -DMP_NO_MP_WORD
+CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
+ -DSOLARIS2_6 -xarch=v8 -DXP_UNIX $(MPICMN)
+endif
+
+ifeq ($(TARGET),ia64HPUX)
+#HPUX 32 on ia64  -- 64 bit digits SCREAM.
+# This one is for DD32 which is the 32-bit ABI with 64-bit registers.
+CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+#CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+endif
+
+ifeq ($(TARGET),ia64HPUX64)
+#HPUX 32 on ia64
+# This one is for DD64 which is the 64-bit ABI 
+CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD64 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD64 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+endif
+
+ifeq ($(TARGET),PA2.0WHPUX)
+#HPUX64 (HP PA 2.0 Wide) using MAXPY and 64-bit digits
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+AS_OBJS = mpi_hp.o hpma512.o hppa20.o 
+CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +DA2.0W +DS2.0 +O3 +DChpux -DHPUX11  -DXP_UNIX \
+ $(MPICMN)
+#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +DA2.0W +DS2.0 +DChpux -DHPUX11  -DXP_UNIX \
+ $(MPICMN)
+AS = $(CC) $(CFLAGS) -c
+endif
+
+ifeq ($(TARGET),PA2.0NHPUX)
+#HPUX32 (HP PA 2.0 Narrow) hybrid model, using 32-bit digits
+# This one is for DA2.0 (N) which is the 32-bit ABI with 64-bit registers.
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+AS_OBJS = mpi_hp.o hpma512.o hppa20.o 
+CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +DA2.0 +DS2.0 +DChpux -DHPUX11  -DXP_UNIX \
+ -Wl,+k $(MPICMN)
+#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +DA2.0 +DS2.0 +DChpux -DHPUX11  -DXP_UNIX \
+ -Wl,+k $(MPICMN)
+AS = $(CC) $(CFLAGS) -c
+endif
+
+ifeq ($(TARGET),PA1.1HPUX)
+#HPUX32 (HP PA 1.1) Pure 32 bit
+MPICMN += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+#MPICMN += -DMP_USE_LONG_LONG_MULTIPLY
+CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE +DAportable +DS1.1 -DHPUX11 -DXP_UNIX $(MPICMN)
+##CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+# -D_HPUX_SOURCE +DAportable +DS1.1 -DHPUX11 -DXP_UNIX $(MPICMN)
+endif
+
+ifeq ($(TARGET),32AIX)
+#
+CC = xlC_r
+MPICMN += -DMP_USE_UINT_DIGIT
+MPICMN += -DMP_NO_DIV_WORD
+#MPICMN += -DMP_NO_MUL_WORD
+MPICMN += -DMP_NO_ADD_WORD
+MPICMN += -DMP_NO_SUB_WORD
+#MPICMN += -DMP_NO_MP_WORD
+#MPICMN += -DMP_USE_LONG_LONG_MULTIPLY
+CFLAGS = -O -DAIX -DSYSV -qarch=com -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG  $(MPICMN)
+#CFLAGS = -g -DAIX -DSYSV -qarch=com -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG  $(MPICMN)
+#CFLAGS += -pg
+endif
+
+ifeq ($(TARGET),64AIX)
+#
+CC = xlC_r
+MPICMN += -DMP_USE_UINT_DIGIT
+CFLAGS = -O -O2 -DAIX -DSYSV -qarch=com -DAIX_64BIT -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG $(MPICMN)
+OBJECT_MODE=64
+export OBJECT_MODE
+endif
+
+ifeq ($(TARGET),x86LINUX)
+#Linux
+AS_OBJS = mpi_x86.o
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
+MPICMN += -DMP_MONT_USE_MP_MUL -DMP_IS_LITTLE_ENDIAN
+CFLAGS= -O2 -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
+ -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
+ -DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN)
+#CFLAGS= -g -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
+ -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
+ -DXP_UNIX -DDEBUG -UNDEBUG -D_REENTRANT $(MPICMN)
+#CFLAGS= -g -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
+ -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
+ -DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN)
+endif
+
+ifeq ($(TARGET),armLINUX)
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+MPICMN += -DMP_USE_UINT_DIGIT 
+AS_OBJS += mpi_arm.o
+endif
+
+ifeq ($(TARGET),AMD64SOLARIS)
+ASFLAGS += -xarch=generic64
+AS_OBJS = mpi_amd64.o mpi_amd64_sun.o
+MP_CONFIG = -DMP_ASSEMBLY_MULTIPLY -DMPI_AMD64
+MP_CONFIG += -DMP_IS_LITTLE_ENDIAN
+CFLAGS = -xarch=generic64 -xO4 -I. -DMP_API_COMPATIBLE -DMP_IOFUNC $(MP_CONFIG)
+MPICMN += $(MP_CONFIG)
+
+mpi_amd64_asm.o: mpi_amd64_sun.s
+	$(AS) -xarch=generic64 -P -D_ASM mpi_amd64_sun.s
+endif
+
+ifeq ($(TARGET),WIN32)
+ifeq ($(CPU_ARCH),x86_64)
+AS_OBJS = mpi_amd64.obj mpi_amd64_masm.obj mp_comba_amd64_masm.asm
+CFLAGS  = -Od -Z7 -MDd -W3 -nologo -DDEBUG -D_DEBUG -UNDEBUG -DDEBUG_$(USER)
+CFLAGS += -DWIN32 -DWIN64 -D_WINDOWS -D_AMD_64_ -D_M_AMD64 -DWIN95 -DXP_PC
+CFLAGS += $(MPICMN)
+
+$(AS_OBJS): %.obj : %.asm
+	ml64 -Cp -Sn -Zi -coff -nologo -c $<
+
+$(LIBOBJS): %.obj : %.c 
+	cl $(CFLAGS) -Fo$@ -c $<
+else
+AS_OBJS = mpi_x86.obj
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
+MPICMN += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD -DMP_API_COMPATIBLE 
+MPICMN += -DMP_MONT_USE_MP_MUL 
+MPICMN += -DMP_CHAR_STORE_SLOW -DMP_IS_LITTLE_ENDIAN
+CFLAGS  = -Od -Z7 -MDd -W3 -nologo -DDEBUG -D_DEBUG -UNDEBUG -DDEBUG_$(USER)
+CFLAGS += -DWIN32 -D_WINDOWS -D_X86_ -DWIN95 -DXP_PC
+CFLAGS += $(MPICMN)
+
+$(AS_OBJS): %.obj : %.asm
+	ml -Cp -Sn -Zi -coff -nologo -c $<
+
+$(LIBOBJS): %.obj : %.c 
+	cl $(CFLAGS) -Fo$@ -c $<
+
+endif
+endif
diff --git a/security/nss/lib/freebl/mpi/test-arrays.txt b/security/nss/lib/freebl/mpi/test-arrays.txt
new file mode 100644
index 000000000..6c8908c1a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/test-arrays.txt
@@ -0,0 +1,55 @@
+#
+# Test suite table for MPI library
+#
+# Format of entries:
+# suite-name:function-name:description
+#
+# suite-name	The name used to identify this test in mpi-test
+# function-name	The function called to perform this test in mpi-test.c
+# description   A brief description of what the suite tests
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+list:test_list:print out a list of the available test suites
+copy:test_copy:test assignment of mp-int structures
+exchange:test_exch:test exchange of mp-int structures
+zero:test_zero:test zeroing of an mp-int
+set:test_set:test setting an mp-int to a small constant
+absolute-value:test_abs:test the absolute value function
+negate:test_neg:test the arithmetic negation function
+add-digit:test_add_d:test digit addition
+add:test_add:test full addition
+subtract-digit:test_sub_d:test digit subtraction
+subtract:test_sub:test full subtraction
+multiply-digit:test_mul_d:test digit multiplication
+multiply:test_mul:test full multiplication
+square:test_sqr:test full squaring function
+divide-digit:test_div_d:test digit division
+divide-2:test_div_2:test division by two
+divide-2d:test_div_2d:test division & remainder by 2^d
+divide:test_div:test full division
+expt-digit:test_expt_d:test digit exponentiation
+expt:test_expt:test full exponentiation
+expt-2:test_2expt:test power-of-two exponentiation
+modulo-digit:test_mod_d:test digit modular reduction
+modulo:test_mod:test full modular reduction
+mod-add:test_addmod:test modular addition
+mod-subtract:test_submod:test modular subtraction
+mod-multiply:test_mulmod:test modular multiplication
+mod-square:test_sqrmod:test modular squaring function
+mod-expt:test_exptmod:test full modular exponentiation
+mod-expt-digit:test_exptmod_d:test digit modular exponentiation
+mod-inverse:test_invmod:test modular inverse function
+compare-digit:test_cmp_d:test digit comparison function
+compare-zero:test_cmp_z:test zero comparison function
+compare:test_cmp:test general signed comparison
+compare-magnitude:test_cmp_mag:test general magnitude comparison
+parity:test_parity:test parity comparison functions
+gcd:test_gcd:test greatest common divisor functions
+lcm:test_lcm:test least common multiple function
+conversion:test_convert:test general radix conversion facilities
+binary:test_raw:test raw output format
+pprime:test_pprime:test probabilistic primality tester
+fermat:test_fermat:test Fermat pseudoprimality tester
diff --git a/security/nss/lib/freebl/mpi/tests/LICENSE b/security/nss/lib/freebl/mpi/tests/LICENSE
new file mode 100644
index 000000000..c2c5d0190
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/LICENSE
@@ -0,0 +1,6 @@
+Within this directory, each of the file listed below is licensed under 
+the terms given in the file LICENSE-MPL, also in this directory.
+
+pi1k.txt
+pi2k.txt
+pi5k.txt
diff --git a/security/nss/lib/freebl/mpi/tests/LICENSE-MPL b/security/nss/lib/freebl/mpi/tests/LICENSE-MPL
new file mode 100644
index 000000000..41dc2327f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/LICENSE-MPL
@@ -0,0 +1,3 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-1.c b/security/nss/lib/freebl/mpi/tests/mptest-1.c
new file mode 100644
index 000000000..449134668
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-1.c
@@ -0,0 +1,43 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 1: Simple input test (drives single-digit multiply and add,
+ *         as well as I/O routines)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#ifdef MAC_CW_SIOUX
+#include <console.h>
+#endif
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+    int ix;
+    mp_int mp;
+
+#ifdef MAC_CW_SIOUX
+    argc = ccommand(&argv);
+#endif
+
+    mp_init(&mp);
+
+    for (ix = 1; ix < argc; ix++) {
+        mp_read_radix(&mp, argv[ix], 10);
+        mp_print(&mp, stdout);
+        fputc('\n', stdout);
+    }
+
+    mp_clear(&mp);
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-2.c b/security/nss/lib/freebl/mpi/tests/mptest-2.c
new file mode 100644
index 000000000..1505e6afd
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-2.c
@@ -0,0 +1,62 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 2: Basic addition and subtraction test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a, b, c;
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+        return 1;
+    }
+
+    printf("Test 2: Basic addition and subtraction\n\n");
+
+    mp_init(&a);
+    mp_init(&b);
+
+    mp_read_radix(&a, argv[1], 10);
+    mp_read_radix(&b, argv[2], 10);
+    printf("a = ");
+    mp_print(&a, stdout);
+    fputc('\n', stdout);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+
+    mp_init(&c);
+    printf("c = a + b\n");
+
+    mp_add(&a, &b, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    printf("c = a - b\n");
+
+    mp_sub(&a, &b, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mp_clear(&c);
+    mp_clear(&b);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-3.c b/security/nss/lib/freebl/mpi/tests/mptest-3.c
new file mode 100644
index 000000000..86fb24654
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-3.c
@@ -0,0 +1,105 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 3: Multiplication, division, and exponentiation test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include <time.h>
+
+#include "mpi.h"
+
+#define EXPT 0 /* define nonzero to get exponentiate test */
+
+int
+main(int argc, char *argv[])
+{
+    int ix;
+    mp_int a, b, c, d;
+    mp_digit r;
+    mp_err res;
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+        return 1;
+    }
+
+    printf("Test 3: Multiplication and division\n\n");
+    srand(time(NULL));
+
+    mp_init(&a);
+    mp_init(&b);
+
+    mp_read_variable_radix(&a, argv[1], 10);
+    mp_read_variable_radix(&b, argv[2], 10);
+    printf("a = ");
+    mp_print(&a, stdout);
+    fputc('\n', stdout);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+
+    mp_init(&c);
+    printf("\nc = a * b\n");
+
+    mp_mul(&a, &b, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    printf("\nc = b * 32523\n");
+
+    mp_mul_d(&b, 32523, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mp_init(&d);
+    printf("\nc = a / b, d = a mod b\n");
+
+    mp_div(&a, &b, &c, &d);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+    printf("d = ");
+    mp_print(&d, stdout);
+    fputc('\n', stdout);
+
+    ix = rand() % 256;
+    printf("\nc = a / %d, r = a mod %d\n", ix, ix);
+    mp_div_d(&a, (mp_digit)ix, &c, &r);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+    printf("r = %04X\n", r);
+
+#if EXPT
+    printf("\nc = a ** b\n");
+    mp_expt(&a, &b, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+#endif
+
+    ix = rand() % 256;
+    printf("\nc = 2^%d\n", ix);
+    mp_2expt(&c, ix);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mp_clear(&d);
+    mp_clear(&c);
+    mp_clear(&b);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-3a.c b/security/nss/lib/freebl/mpi/tests/mptest-3a.c
new file mode 100644
index 000000000..c6cea7046
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-3a.c
@@ -0,0 +1,123 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 3a: Multiplication vs. squaring timing test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include <time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+    int ix, num, prec = 8;
+    double d1, d2;
+    clock_t start, finish;
+    time_t seed;
+    mp_int a, c, d;
+
+    seed = time(NULL);
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <num-tests> [<precision>]\n", argv[0]);
+        return 1;
+    }
+
+    if ((num = atoi(argv[1])) < 0)
+        num = -num;
+
+    if (!num) {
+        fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]);
+        return 1;
+    }
+
+    if (argc > 2) {
+        if ((prec = atoi(argv[2])) <= 0)
+            prec = 8;
+        else
+            prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;
+    }
+
+    printf("Test 3a: Multiplication vs squaring timing test\n"
+           "Precision:  %d digits (%u bits)\n"
+           "# of tests: %d\n\n",
+           prec, prec * DIGIT_BIT, num);
+
+    mp_init_size(&a, prec);
+
+    mp_init(&c);
+    mp_init(&d);
+
+    printf("Verifying accuracy ... \n");
+    srand((unsigned int)seed);
+    for (ix = 0; ix < num; ix++) {
+        mpp_random_size(&a, prec);
+        mp_mul(&a, &a, &c);
+        mp_sqr(&a, &d);
+
+        if (mp_cmp(&c, &d) != 0) {
+            printf("Error!  Results not accurate:\n");
+            printf("a = ");
+            mp_print(&a, stdout);
+            fputc('\n', stdout);
+            printf("c = ");
+            mp_print(&c, stdout);
+            fputc('\n', stdout);
+            printf("d = ");
+            mp_print(&d, stdout);
+            fputc('\n', stdout);
+            mp_sub(&c, &d, &d);
+            printf("dif ");
+            mp_print(&d, stdout);
+            fputc('\n', stdout);
+            mp_clear(&c);
+            mp_clear(&d);
+            mp_clear(&a);
+            return 1;
+        }
+    }
+    printf("Accuracy is confirmed for the %d test samples\n", num);
+    mp_clear(&d);
+
+    printf("Testing squaring ... \n");
+    srand((unsigned int)seed);
+    start = clock();
+    for (ix = 0; ix < num; ix++) {
+        mpp_random_size(&a, prec);
+        mp_sqr(&a, &c);
+    }
+    finish = clock();
+
+    d2 = (double)(finish - start) / CLOCKS_PER_SEC;
+
+    printf("Testing multiplication ... \n");
+    srand((unsigned int)seed);
+    start = clock();
+    for (ix = 0; ix < num; ix++) {
+        mpp_random(&a);
+        mp_mul(&a, &a, &c);
+    }
+    finish = clock();
+
+    d1 = (double)(finish - start) / CLOCKS_PER_SEC;
+
+    printf("Multiplication time: %.3f sec (%.3f each)\n", d1, d1 / num);
+    printf("Squaring time:       %.3f sec (%.3f each)\n", d2, d2 / num);
+    printf("Improvement:         %.2f%%\n", (1.0 - (d2 / d1)) * 100.0);
+
+    mp_clear(&c);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4.c b/security/nss/lib/freebl/mpi/tests/mptest-4.c
new file mode 100644
index 000000000..0f326ac2c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-4.c
@@ -0,0 +1,111 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 4: Modular arithmetic tests
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+    int ix;
+    mp_int a, b, c, m;
+    mp_digit r;
+
+    if (argc < 4) {
+        fprintf(stderr, "Usage: %s <a> <b> <m>\n", argv[0]);
+        return 1;
+    }
+
+    printf("Test 4: Modular arithmetic\n\n");
+
+    mp_init(&a);
+    mp_init(&b);
+    mp_init(&m);
+
+    mp_read_radix(&a, argv[1], 10);
+    mp_read_radix(&b, argv[2], 10);
+    mp_read_radix(&m, argv[3], 10);
+    printf("a = ");
+    mp_print(&a, stdout);
+    fputc('\n', stdout);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+    printf("m = ");
+    mp_print(&m, stdout);
+    fputc('\n', stdout);
+
+    mp_init(&c);
+    printf("\nc = a (mod m)\n");
+
+    mp_mod(&a, &m, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    printf("\nc = b (mod m)\n");
+
+    mp_mod(&b, &m, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    printf("\nc = b (mod 1853)\n");
+
+    mp_mod_d(&b, 1853, &r);
+    printf("c = %04X\n", r);
+
+    printf("\nc = (a + b) mod m\n");
+
+    mp_addmod(&a, &b, &m, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    printf("\nc = (a - b) mod m\n");
+
+    mp_submod(&a, &b, &m, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    printf("\nc = (a * b) mod m\n");
+
+    mp_mulmod(&a, &b, &m, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    printf("\nc = (a ** b) mod m\n");
+
+    mp_exptmod(&a, &b, &m, &c);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    printf("\nIn-place modular squaring test:\n");
+    for (ix = 0; ix < 5; ix++) {
+        printf("a = (a * a) mod m   a = ");
+        mp_sqrmod(&a, &m, &a);
+        mp_print(&a, stdout);
+        fputc('\n', stdout);
+    }
+
+    mp_clear(&c);
+    mp_clear(&m);
+    mp_clear(&b);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4a.c b/security/nss/lib/freebl/mpi/tests/mptest-4a.c
new file mode 100644
index 000000000..0c8e18872
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-4a.c
@@ -0,0 +1,109 @@
+/*
+ *  mptest4a - modular exponentiation speed test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include <sys/time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+typedef struct {
+    unsigned int sec;
+    unsigned int usec;
+} instant_t;
+
+instant_t
+now(void)
+{
+    struct timeval clk;
+    instant_t res;
+
+    res.sec = res.usec = 0;
+
+    if (gettimeofday(&clk, NULL) != 0)
+        return res;
+
+    res.sec = clk.tv_sec;
+    res.usec = clk.tv_usec;
+
+    return res;
+}
+
+extern mp_err s_mp_pad();
+
+int
+main(int argc, char *argv[])
+{
+    int ix, num, prec = 8;
+    unsigned int d;
+    instant_t start, finish;
+    time_t seed;
+    mp_int a, m, c;
+
+    seed = time(NULL);
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <num-tests> [<precision>]\n", argv[0]);
+        return 1;
+    }
+
+    if ((num = atoi(argv[1])) < 0)
+        num = -num;
+
+    if (!num) {
+        fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]);
+        return 1;
+    }
+
+    if (argc > 2) {
+        if ((prec = atoi(argv[2])) <= 0)
+            prec = 8;
+    }
+
+    printf("Test 3a: Modular exponentiation timing test\n"
+           "Precision:  %d digits (%d bits)\n"
+           "# of tests: %d\n\n",
+           prec, prec * DIGIT_BIT, num);
+
+    mp_init_size(&a, prec);
+    mp_init_size(&m, prec);
+    mp_init_size(&c, prec);
+    s_mp_pad(&a, prec);
+    s_mp_pad(&m, prec);
+    s_mp_pad(&c, prec);
+
+    printf("Testing modular exponentiation ... \n");
+    srand((unsigned int)seed);
+
+    start = now();
+    for (ix = 0; ix < num; ix++) {
+        mpp_random(&a);
+        mpp_random(&c);
+        mpp_random(&m);
+        mp_exptmod(&a, &c, &m, &c);
+    }
+    finish = now();
+
+    d = (finish.sec - start.sec) * 1000000;
+    d -= start.usec;
+    d += finish.usec;
+
+    printf("Total time elapsed:        %u usec\n", d);
+    printf("Time per exponentiation:   %u usec (%.3f sec)\n",
+           (d / num), (double)(d / num) / 1000000);
+
+    mp_clear(&c);
+    mp_clear(&a);
+    mp_clear(&m);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4b.c b/security/nss/lib/freebl/mpi/tests/mptest-4b.c
new file mode 100644
index 000000000..1bb2f911f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-4b.c
@@ -0,0 +1,107 @@
+/*
+ * mptest-4b.c
+ *
+ * Test speed of a large modular exponentiation of a primitive element
+ * modulo a prime.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include <sys/time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+char *g_prime =
+    "34BD53C07350E817CCD49721020F1754527959C421C1533244769D4CF060A8B1C3DA"
+    "25094BE723FB1E2369B55FEEBBE0FAC16425161BF82684062B5EC5D7D47D1B23C117"
+    "0FA19745E44A55E148314E582EB813AC9EE5126295E2E380CACC2F6D206B293E5ED9"
+    "23B54EE961A8C69CD625CE4EC38B70C649D7F014432AEF3A1C93";
+char *g_gen = "5";
+
+typedef struct {
+    unsigned int sec;
+    unsigned int usec;
+} instant_t;
+
+instant_t
+now(void)
+{
+    struct timeval clk;
+    instant_t res;
+
+    res.sec = res.usec = 0;
+
+    if (gettimeofday(&clk, NULL) != 0)
+        return res;
+
+    res.sec = clk.tv_sec;
+    res.usec = clk.tv_usec;
+
+    return res;
+}
+
+extern mp_err s_mp_pad();
+
+int
+main(int argc, char *argv[])
+{
+    instant_t start, finish;
+    mp_int prime, gen, expt, res;
+    unsigned int ix, diff;
+    int num;
+
+    srand(time(NULL));
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <num-tests>\n", argv[0]);
+        return 1;
+    }
+
+    if ((num = atoi(argv[1])) < 0)
+        num = -num;
+
+    if (num == 0)
+        ++num;
+
+    mp_init(&prime);
+    mp_init(&gen);
+    mp_init(&res);
+    mp_read_radix(&prime, g_prime, 16);
+    mp_read_radix(&gen, g_gen, 16);
+
+    mp_init_size(&expt, USED(&prime) - 1);
+    s_mp_pad(&expt, USED(&prime) - 1);
+
+    printf("Testing %d modular exponentations ... \n", num);
+
+    start = now();
+    for (ix = 0; ix < num; ix++) {
+        mpp_random(&expt);
+        mp_exptmod(&gen, &expt, &prime, &res);
+    }
+    finish = now();
+
+    diff = (finish.sec - start.sec) * 1000000;
+    diff += finish.usec;
+    diff -= start.usec;
+
+    printf("%d operations took %u usec (%.3f sec)\n",
+           num, diff, (double)diff / 1000000.0);
+    printf("That is %.3f sec per operation.\n",
+           ((double)diff / 1000000.0) / num);
+
+    mp_clear(&expt);
+    mp_clear(&res);
+    mp_clear(&gen);
+    mp_clear(&prime);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-5.c b/security/nss/lib/freebl/mpi/tests/mptest-5.c
new file mode 100644
index 000000000..dff3ed470
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-5.c
@@ -0,0 +1,85 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 5: Other number theoretic functions
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a, b, c, x, y;
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+        return 1;
+    }
+
+    printf("Test 5: Number theoretic functions\n\n");
+
+    mp_init(&a);
+    mp_init(&b);
+
+    mp_read_radix(&a, argv[1], 10);
+    mp_read_radix(&b, argv[2], 10);
+
+    printf("a = ");
+    mp_print(&a, stdout);
+    fputc('\n', stdout);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+
+    mp_init(&c);
+    printf("\nc = (a, b)\n");
+
+    mp_gcd(&a, &b, &c);
+    printf("Euclid: c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+    /*
+      mp_bgcd(&a, &b, &c);
+      printf("Binary: c = "); mp_print(&c, stdout); fputc('\n', stdout);
+    */
+    mp_init(&x);
+    mp_init(&y);
+    printf("\nc = (a, b) = ax + by\n");
+
+    mp_xgcd(&a, &b, &c, &x, &y);
+    printf("c = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+    printf("x = ");
+    mp_print(&x, stdout);
+    fputc('\n', stdout);
+    printf("y = ");
+    mp_print(&y, stdout);
+    fputc('\n', stdout);
+
+    printf("\nc = a^-1 (mod b)\n");
+    if (mp_invmod(&a, &b, &c) == MP_UNDEF) {
+        printf("a has no inverse mod b\n");
+    } else {
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+    }
+
+    mp_clear(&y);
+    mp_clear(&x);
+    mp_clear(&c);
+    mp_clear(&b);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-5a.c b/security/nss/lib/freebl/mpi/tests/mptest-5a.c
new file mode 100644
index 000000000..c410a6a84
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-5a.c
@@ -0,0 +1,147 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 5a: Greatest common divisor speed test, binary vs. Euclid
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#include <sys/time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+typedef struct {
+    unsigned int sec;
+    unsigned int usec;
+} instant_t;
+
+instant_t
+now(void)
+{
+    struct timeval clk;
+    instant_t res;
+
+    res.sec = res.usec = 0;
+
+    if (gettimeofday(&clk, NULL) != 0)
+        return res;
+
+    res.sec = clk.tv_sec;
+    res.usec = clk.tv_usec;
+
+    return res;
+}
+
+#define PRECISION 16
+
+int
+main(int argc, char *argv[])
+{
+    int ix, num, prec = PRECISION;
+    mp_int a, b, c, d;
+    instant_t start, finish;
+    time_t seed;
+    unsigned int d1, d2;
+
+    seed = time(NULL);
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <num-tests>\n", argv[0]);
+        return 1;
+    }
+
+    if ((num = atoi(argv[1])) < 0)
+        num = -num;
+
+    printf("Test 5a: Euclid vs. Binary, a GCD speed test\n\n"
+           "Number of tests: %d\n"
+           "Precision:       %d digits\n\n",
+           num, prec);
+
+    mp_init_size(&a, prec);
+    mp_init_size(&b, prec);
+    mp_init(&c);
+    mp_init(&d);
+
+    printf("Verifying accuracy ... \n");
+    srand((unsigned int)seed);
+    for (ix = 0; ix < num; ix++) {
+        mpp_random_size(&a, prec);
+        mpp_random_size(&b, prec);
+
+        mp_gcd(&a, &b, &c);
+        mp_bgcd(&a, &b, &d);
+
+        if (mp_cmp(&c, &d) != 0) {
+            printf("Error!  Results not accurate:\n");
+            printf("a = ");
+            mp_print(&a, stdout);
+            fputc('\n', stdout);
+            printf("b = ");
+            mp_print(&b, stdout);
+            fputc('\n', stdout);
+            printf("c = ");
+            mp_print(&c, stdout);
+            fputc('\n', stdout);
+            printf("d = ");
+            mp_print(&d, stdout);
+            fputc('\n', stdout);
+
+            mp_clear(&a);
+            mp_clear(&b);
+            mp_clear(&c);
+            mp_clear(&d);
+            return 1;
+        }
+    }
+    mp_clear(&d);
+    printf("Accuracy confirmed for the %d test samples\n", num);
+
+    printf("Testing Euclid ... \n");
+    srand((unsigned int)seed);
+    start = now();
+    for (ix = 0; ix < num; ix++) {
+        mpp_random_size(&a, prec);
+        mpp_random_size(&b, prec);
+        mp_gcd(&a, &b, &c);
+    }
+    finish = now();
+
+    d1 = (finish.sec - start.sec) * 1000000;
+    d1 -= start.usec;
+    d1 += finish.usec;
+
+    printf("Testing binary ... \n");
+    srand((unsigned int)seed);
+    start = now();
+    for (ix = 0; ix < num; ix++) {
+        mpp_random_size(&a, prec);
+        mpp_random_size(&b, prec);
+        mp_bgcd(&a, &b, &c);
+    }
+    finish = now();
+
+    d2 = (finish.sec - start.sec) * 1000000;
+    d2 -= start.usec;
+    d2 += finish.usec;
+
+    printf("Euclidean algorithm time: %u usec\n", d1);
+    printf("Binary algorithm time:    %u usec\n", d2);
+    printf("Improvement:              %.2f%%\n",
+           (1.0 - ((double)d2 / (double)d1)) * 100.0);
+
+    mp_clear(&c);
+    mp_clear(&b);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-6.c b/security/nss/lib/freebl/mpi/tests/mptest-6.c
new file mode 100644
index 000000000..4febf39c5
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-6.c
@@ -0,0 +1,78 @@
+/*
+ *  Simple test driver for MPI library
+ *
+ *  Test 6: Output functions
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mpi.h"
+
+void
+print_buf(FILE *ofp, char *buf, int len)
+{
+    int ix, brk = 0;
+
+    for (ix = 0; ix < len; ix++) {
+        fprintf(ofp, "%02X ", buf[ix]);
+
+        brk = (brk + 1) & 0xF;
+        if (!brk)
+            fputc('\n', ofp);
+    }
+
+    if (brk)
+        fputc('\n', ofp);
+}
+
+int
+main(int argc, char *argv[])
+{
+    int ix, size;
+    mp_int a;
+    char *buf;
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <a>\n", argv[0]);
+        return 1;
+    }
+
+    printf("Test 6: Output functions\n\n");
+
+    mp_init(&a);
+
+    mp_read_radix(&a, argv[1], 10);
+
+    printf("\nConverting to a string:\n");
+
+    printf("Rx Size Representation\n");
+    for (ix = 2; ix <= MAX_RADIX; ix++) {
+        size = mp_radix_size(&a, ix);
+
+        buf = calloc(size, sizeof(char));
+        mp_toradix(&a, buf, ix);
+        printf("%2d: %3d: %s\n", ix, size, buf);
+        free(buf);
+    }
+
+    printf("\nRaw output:\n");
+    size = mp_raw_size(&a);
+    buf = calloc(size, sizeof(char));
+
+    printf("Size:  %d bytes\n", size);
+
+    mp_toraw(&a, buf);
+    print_buf(stdout, buf, size);
+    free(buf);
+
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-7.c b/security/nss/lib/freebl/mpi/tests/mptest-7.c
new file mode 100644
index 000000000..1e83fbf96
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-7.c
@@ -0,0 +1,85 @@
+/*
+ *  Simple test driver for MPI library
+ *
+ *  Test 7: Random and divisibility tests
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#define MP_IOFUNC 1
+#include "mpi.h"
+
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+    mp_digit num;
+    mp_int a, b;
+
+    srand(time(NULL));
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+        return 1;
+    }
+
+    printf("Test 7: Random & divisibility tests\n\n");
+
+    mp_init(&a);
+    mp_init(&b);
+
+    mp_read_radix(&a, argv[1], 10);
+    mp_read_radix(&b, argv[2], 10);
+
+    printf("a = ");
+    mp_print(&a, stdout);
+    fputc('\n', stdout);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+
+    if (mpp_divis(&a, &b) == MP_YES)
+        printf("a is divisible by b\n");
+    else
+        printf("a is not divisible by b\n");
+
+    if (mpp_divis(&b, &a) == MP_YES)
+        printf("b is divisible by a\n");
+    else
+        printf("b is not divisible by a\n");
+
+    printf("\nb = mpp_random()\n");
+    mpp_random(&b);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+    mpp_random(&b);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+    mpp_random(&b);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+
+    printf("\nTesting a for divisibility by first 170 primes\n");
+    num = 170;
+    if (mpp_divis_primes(&a, &num) == MP_YES)
+        printf("It is divisible by at least one of them\n");
+    else
+        printf("It is not divisible by any of them\n");
+
+    mp_clear(&b);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-8.c b/security/nss/lib/freebl/mpi/tests/mptest-8.c
new file mode 100644
index 000000000..a9d3afff9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-8.c
@@ -0,0 +1,68 @@
+/*
+ *  Simple test driver for MPI library
+ *
+ *  Test 8: Probabilistic primality tester
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#define MP_IOFUNC 1
+#include "mpi.h"
+
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+    int ix;
+    mp_digit num;
+    mp_int a;
+
+    srand(time(NULL));
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <a>\n", argv[0]);
+        return 1;
+    }
+
+    printf("Test 8: Probabilistic primality testing\n\n");
+
+    mp_init(&a);
+
+    mp_read_radix(&a, argv[1], 10);
+
+    printf("a = ");
+    mp_print(&a, stdout);
+    fputc('\n', stdout);
+
+    printf("\nChecking for divisibility by small primes ... \n");
+    num = 170;
+    if (mpp_divis_primes(&a, &num) == MP_YES) {
+        printf("it is not prime\n");
+        goto CLEANUP;
+    }
+    printf("Passed that test (not divisible by any small primes).\n");
+
+    for (ix = 0; ix < 10; ix++) {
+        printf("\nPerforming Rabin-Miller test, iteration %d\n", ix + 1);
+
+        if (mpp_pprime(&a, 5) == MP_NO) {
+            printf("it is not prime\n");
+            goto CLEANUP;
+        }
+    }
+    printf("All tests passed; a is probably prime\n");
+
+CLEANUP:
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-9.c b/security/nss/lib/freebl/mpi/tests/mptest-9.c
new file mode 100644
index 000000000..133264e89
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-9.c
@@ -0,0 +1,109 @@
+/*
+ *    mptest-9.c
+ *
+ *   Test logical functions
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#include "mpi.h"
+#include "mplogic.h"
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a, b, c;
+    int pco;
+    mp_err res;
+
+    printf("Test 9: Logical functions\n\n");
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+        return 1;
+    }
+
+    mp_init(&a);
+    mp_init(&b);
+    mp_init(&c);
+    mp_read_radix(&a, argv[1], 16);
+    mp_read_radix(&b, argv[2], 16);
+
+    printf("a       = ");
+    mp_print(&a, stdout);
+    fputc('\n', stdout);
+    printf("b       = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+
+    mpl_not(&a, &c);
+    printf("~a      = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mpl_and(&a, &b, &c);
+    printf("a & b   = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mpl_or(&a, &b, &c);
+    printf("a | b   = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mpl_xor(&a, &b, &c);
+    printf("a ^ b   = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mpl_rsh(&a, &c, 1);
+    printf("a >>  1 = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+    mpl_rsh(&a, &c, 5);
+    printf("a >>  5 = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+    mpl_rsh(&a, &c, 16);
+    printf("a >> 16 = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mpl_lsh(&a, &c, 1);
+    printf("a <<  1 = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+    mpl_lsh(&a, &c, 5);
+    printf("a <<  5 = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+    mpl_lsh(&a, &c, 16);
+    printf("a << 16 = ");
+    mp_print(&c, stdout);
+    fputc('\n', stdout);
+
+    mpl_num_set(&a, &pco);
+    printf("population(a) = %d\n", pco);
+    mpl_num_set(&b, &pco);
+    printf("population(b) = %d\n", pco);
+
+    res = mpl_parity(&a);
+    if (res == MP_EVEN)
+        printf("a has even parity\n");
+    else
+        printf("a has odd parity\n");
+
+    mp_clear(&c);
+    mp_clear(&b);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-b.c b/security/nss/lib/freebl/mpi/tests/mptest-b.c
new file mode 100644
index 000000000..07f30eaf8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-b.c
@@ -0,0 +1,230 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test GF2m: Binary Polynomial Arithmetic
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mp_gf2m.h"
+
+int
+main(int argc, char *argv[])
+{
+    int ix;
+    mp_int pp, a, b, x, y, order;
+    mp_int c, d, e;
+    mp_digit r;
+    mp_err res;
+    unsigned int p[] = { 163, 7, 6, 3, 0 };
+    unsigned int ptemp[10];
+
+    printf("Test b: Binary Polynomial Arithmetic\n\n");
+
+    mp_init(&pp);
+    mp_init(&a);
+    mp_init(&b);
+    mp_init(&x);
+    mp_init(&y);
+    mp_init(&order);
+
+    mp_read_radix(&pp, "0800000000000000000000000000000000000000C9", 16);
+    mp_read_radix(&a, "1", 16);
+    mp_read_radix(&b, "020A601907B8C953CA1481EB10512F78744A3205FD", 16);
+    mp_read_radix(&x, "03F0EBA16286A2D57EA0991168D4994637E8343E36", 16);
+    mp_read_radix(&y, "00D51FBC6C71A0094FA2CDD545B11C5C0C797324F1", 16);
+    mp_read_radix(&order, "040000000000000000000292FE77E70C12A4234C33", 16);
+    printf("pp = ");
+    mp_print(&pp, stdout);
+    fputc('\n', stdout);
+    printf("a = ");
+    mp_print(&a, stdout);
+    fputc('\n', stdout);
+    printf("b = ");
+    mp_print(&b, stdout);
+    fputc('\n', stdout);
+    printf("x = ");
+    mp_print(&x, stdout);
+    fputc('\n', stdout);
+    printf("y = ");
+    mp_print(&y, stdout);
+    fputc('\n', stdout);
+    printf("order = ");
+    mp_print(&order, stdout);
+    fputc('\n', stdout);
+
+    mp_init(&c);
+    mp_init(&d);
+    mp_init(&e);
+
+    /* Test polynomial conversion */
+    ix = mp_bpoly2arr(&pp, ptemp, 10);
+    if (
+        (ix != 5) ||
+        (ptemp[0] != p[0]) ||
+        (ptemp[1] != p[1]) ||
+        (ptemp[2] != p[2]) ||
+        (ptemp[3] != p[3]) ||
+        (ptemp[4] != p[4])) {
+        printf("Polynomial to array conversion not correct\n");
+        return -1;
+    }
+
+    printf("Polynomial conversion test #1 successful.\n");
+    MP_CHECKOK(mp_barr2poly(p, &c));
+    if (mp_cmp(&pp, &c) != 0) {
+        printf("Array to polynomial conversion not correct\n");
+        return -1;
+    }
+    printf("Polynomial conversion test #2 successful.\n");
+
+    /* Test addition */
+    MP_CHECKOK(mp_badd(&a, &a, &c));
+    if (mp_cmp_z(&c) != 0) {
+        printf("a+a should equal zero\n");
+        return -1;
+    }
+    printf("Addition test #1 successful.\n");
+    MP_CHECKOK(mp_badd(&a, &b, &c));
+    MP_CHECKOK(mp_badd(&b, &c, &c));
+    if (mp_cmp(&c, &a) != 0) {
+        printf("c = (a + b) + b should equal a\n");
+        printf("a = ");
+        mp_print(&a, stdout);
+        fputc('\n', stdout);
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Addition test #2 successful.\n");
+
+    /* Test multiplication */
+    mp_set(&c, 2);
+    MP_CHECKOK(mp_bmul(&b, &c, &c));
+    MP_CHECKOK(mp_badd(&b, &c, &c));
+    mp_set(&d, 3);
+    MP_CHECKOK(mp_bmul(&b, &d, &d));
+    if (mp_cmp(&c, &d) != 0) {
+        printf("c = (2 * b) + b should equal c = 3 * b\n");
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        printf("d = ");
+        mp_print(&d, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Multiplication test #1 successful.\n");
+
+    /* Test modular reduction */
+    MP_CHECKOK(mp_bmod(&b, p, &c));
+    if (mp_cmp(&b, &c) != 0) {
+        printf("c = b mod p should equal b\n");
+        printf("b = ");
+        mp_print(&b, stdout);
+        fputc('\n', stdout);
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Modular reduction test #1 successful.\n");
+    MP_CHECKOK(mp_badd(&b, &pp, &c));
+    MP_CHECKOK(mp_bmod(&c, p, &c));
+    if (mp_cmp(&b, &c) != 0) {
+        printf("c = (b + p) mod p should equal b\n");
+        printf("b = ");
+        mp_print(&b, stdout);
+        fputc('\n', stdout);
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Modular reduction test #2 successful.\n");
+    MP_CHECKOK(mp_bmul(&b, &pp, &c));
+    MP_CHECKOK(mp_bmod(&c, p, &c));
+    if (mp_cmp_z(&c) != 0) {
+        printf("c = (b * p) mod p should equal 0\n");
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Modular reduction test #3 successful.\n");
+
+    /* Test modular multiplication */
+    MP_CHECKOK(mp_bmulmod(&b, &pp, p, &c));
+    if (mp_cmp_z(&c) != 0) {
+        printf("c = (b * p) mod p should equal 0\n");
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Modular multiplication test #1 successful.\n");
+    mp_set(&c, 1);
+    MP_CHECKOK(mp_badd(&pp, &c, &c));
+    MP_CHECKOK(mp_bmulmod(&b, &c, p, &c));
+    if (mp_cmp(&b, &c) != 0) {
+        printf("c = (b * (p + 1)) mod p should equal b\n");
+        printf("b = ");
+        mp_print(&b, stdout);
+        fputc('\n', stdout);
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Modular multiplication test #2 successful.\n");
+
+    /* Test modular squaring */
+    MP_CHECKOK(mp_copy(&b, &c));
+    MP_CHECKOK(mp_bmulmod(&b, &c, p, &c));
+    MP_CHECKOK(mp_bsqrmod(&b, p, &d));
+    if (mp_cmp(&c, &d) != 0) {
+        printf("c = (b * b) mod p should equal d = b^2 mod p\n");
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        printf("d = ");
+        mp_print(&d, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Modular squaring test #1 successful.\n");
+
+    /* Test modular division */
+    MP_CHECKOK(mp_bdivmod(&b, &x, &pp, p, &c));
+    MP_CHECKOK(mp_bmulmod(&c, &x, p, &c));
+    if (mp_cmp(&b, &c) != 0) {
+        printf("c = (b / x) * x mod p should equal b\n");
+        printf("b = ");
+        mp_print(&b, stdout);
+        fputc('\n', stdout);
+        printf("c = ");
+        mp_print(&c, stdout);
+        fputc('\n', stdout);
+        return -1;
+    }
+    printf("Modular division test #1 successful.\n");
+
+CLEANUP:
+
+    mp_clear(&order);
+    mp_clear(&y);
+    mp_clear(&x);
+    mp_clear(&b);
+    mp_clear(&a);
+    mp_clear(&pp);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/pi1k.txt b/security/nss/lib/freebl/mpi/tests/pi1k.txt
new file mode 100644
index 000000000..5ff6209ff
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/pi1k.txt
@@ -0,0 +1 @@
+31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989
diff --git a/security/nss/lib/freebl/mpi/tests/pi2k.txt b/security/nss/lib/freebl/mpi/tests/pi2k.txt
new file mode 100644
index 000000000..9ce82acd1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/pi2k.txt
@@ -0,0 +1 @@
+314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651328230664709384460955058223172535940812848111745028410270193852110555964462294895493038196442881097566593344612847564823378678316527120190914564856692346034861045432664821339360726024914127372458700660631558817488152092096282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179310511854807446237996274956735188575272489122793818301194912983367336244065664308602139494639522473719070217986094370277053921717629317675238467481846766940513200056812714526356082778577134275778960917363717872146844090122495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837297804995105973173281609631859502445945534690830264252230825334468503526193118817101000313783875288658753320838142061717766914730359825349042875546873115956286388235378759375195778185778053217122680661300192787661119590921642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151557485724245415069595082953311686172785588907509838175463746493931925506040092770167113900984882401285836160356370766010471018194295559619894676783744944825537977472684710404753464620804668425906949129331367702898915210475216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992458631503028618297455570674983850549458858692699569092721079750930295532116534498720275596023648066549911988183479775356636980742654252786255181841757467289097777279380008164706001614524919217321721477235014144197356854816136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179049460165346680498862723279178608578438382796797668145410095388378636095068006422512520511739298489608412848862694560424196528502221066118630674427862203919494504712371378696095636437191728746776465757396241389086583264599581339047802759010
diff --git a/security/nss/lib/freebl/mpi/tests/pi5k.txt b/security/nss/lib/freebl/mpi/tests/pi5k.txt
new file mode 100644
index 000000000..901fac2ea
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/pi5k.txt
@@ -0,0 +1 @@
+314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651328230664709384460955058223172535940812848111745028410270193852110555964462294895493038196442881097566593344612847564823378678316527120190914564856692346034861045432664821339360726024914127372458700660631558817488152092096282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179310511854807446237996274956735188575272489122793818301194912983367336244065664308602139494639522473719070217986094370277053921717629317675238467481846766940513200056812714526356082778577134275778960917363717872146844090122495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837297804995105973173281609631859502445945534690830264252230825334468503526193118817101000313783875288658753320838142061717766914730359825349042875546873115956286388235378759375195778185778053217122680661300192787661119590921642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151557485724245415069595082953311686172785588907509838175463746493931925506040092770167113900984882401285836160356370766010471018194295559619894676783744944825537977472684710404753464620804668425906949129331367702898915210475216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992458631503028618297455570674983850549458858692699569092721079750930295532116534498720275596023648066549911988183479775356636980742654252786255181841757467289097777279380008164706001614524919217321721477235014144197356854816136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179049460165346680498862723279178608578438382796797668145410095388378636095068006422512520511739298489608412848862694560424196528502221066118630674427862203919494504712371378696095636437191728746776465757396241389086583264599581339047802759009946576407895126946839835259570982582262052248940772671947826848260147699090264013639443745530506820349625245174939965143142980919065925093722169646151570985838741059788595977297549893016175392846813826868386894277415599185592524595395943104997252468084598727364469584865383673622262609912460805124388439045124413654976278079771569143599770012961608944169486855584840635342207222582848864815845602850601684273945226746767889525213852254995466672782398645659611635488623057745649803559363456817432411251507606947945109659609402522887971089314566913686722874894056010150330861792868092087476091782493858900971490967598526136554978189312978482168299894872265880485756401427047755513237964145152374623436454285844479526586782105114135473573952311342716610213596953623144295248493718711014576540359027993440374200731057853906219838744780847848968332144571386875194350643021845319104848100537061468067491927819119793995206141966342875444064374512371819217999839101591956181467514269123974894090718649423196156794520809514655022523160388193014209376213785595663893778708303906979207734672218256259966150142150306803844773454920260541466592520149744285073251866600213243408819071048633173464965145390579626856100550810665879699816357473638405257145910289706414011097120628043903975951567715770042033786993600723055876317635942187312514712053292819182618612586732157919841484882916447060957527069572209175671167229109816909152801735067127485832228718352093539657251210835791513698820914442100675103346711031412671113699086585163983150197016515116851714376576183515565088490998985998238734552833163550764791853589322618548963213293308985706420467525907091548141654985946163718027098199430992448895757128289059232332609729971208443357326548938239119325974636673058360414281388303203824903758985243744170291327656180937734440307074692112019130203303801976211011004492932151608424448596376698389522868478312355265821314495768572624334418930396864262434107732269780280731891544110104468232527162010526522721116603966655730925471105578537634668206531098965269186205647693125705863566201855810072936065987648611791045334885034611365768675324944166803962657978771855608455296541266540853061434443185867697514566140680070023787765913440171274947042056223053899456131407112700040785473326993908145466464588079727082668306343285878569830523580893306575740679545716377525420211495576158140025012622859413021647155097925923099079654737612551765675135751782966645477917450112996148903046399471329621073404375189573596145890193897131117904297828564750320319869151402870808599048010941214722131794764777262241425485454033215718530614228813758504306332175182979866223717215916077166925474873898665494945011465406284336639379003976926567214638530673609657120918076383271664162748888007869256029022847210403172118608204190004229661711963779213375751149595015660496318629472654736425230817703675159067350235072835405670403867435136222247715891504953098444893330963408780769325993978054193414473774418426312986080998886874132604721
diff --git a/security/nss/lib/freebl/mpi/timetest b/security/nss/lib/freebl/mpi/timetest
new file mode 100755
index 000000000..c6f07bb30
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/timetest
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+# Simple timing test for the MPI library.  Basically, we use prime
+# generation as a timing test, since it exercises most of the pathways
+# of the library fairly heavily.  The 'primegen' tool outputs a line
+# summarizing timing results.  We gather these and process them for
+# statistical information, which is collected into a file.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Avoid using built-in shell echoes
+ECHO=/bin/echo
+MAKE=gmake
+PERL=perl
+
+# Use a fixed seed so timings will be more consistent
+# This one is the 11th-18th decimal digits of 'e'
+#export SEED=45904523
+SEED=45904523; export SEED
+
+#------------------------------------------------------------------------
+
+$ECHO "\n** Running timing tests for MPI library\n"
+
+$ECHO "Bringing 'metime' up to date ... "
+if $MAKE metime ; then
+    :
+else 
+    $ECHO "\nMake failed to build metime.\n"
+    exit 1
+fi
+
+if [ ! -x ./metime ] ; then 
+    $ECHO "\nCannot find 'metime' program, testing cannot continue.\n"
+    exit 1
+fi
+
+#------------------------------------------------------------------------
+
+$ECHO "Bringing 'primegen' up to date ... "
+if $MAKE primegen ; then
+    :
+else
+    $ECHO "\nMake failed to build primegen.\n"
+    exit 1
+fi
+
+if [ ! -x ./primegen ] ; then
+    $ECHO "\nCannot find 'primegen' program, testing cannot continue.\n"
+    exit 1
+fi
+
+#------------------------------------------------------------------------
+
+rm -f timing-results.txt
+touch timing-results.txt
+
+sizes="256 512 1024 2048"
+ntests=10
+
+trap 'echo "oop!";rm -f tt*.tmp timing-results.txt;exit 0' INT HUP
+
+$ECHO "\n-- Modular exponentiation\n"
+$ECHO "Modular exponentiation:" >> timing-results.txt
+
+$ECHO "Running $ntests modular exponentiations per test:"
+for size in $sizes ; do
+    $ECHO "- Gathering statistics for $size bits ... "
+    secs=`./metime $ntests $size | tail -1 | awk '{print $2}'`
+    $ECHO "$size: " $secs " seconds per op" >> timing-results.txt
+    tail -1 timing-results.txt
+done
+
+$ECHO "<done>";
+
+sizes="256 512 1024"
+ntests=1
+
+$ECHO "\n-- Prime generation\n"
+$ECHO "Prime generation:" >> timing-results.txt
+
+$ECHO "Generating $ntests prime values per test:"
+for size in $sizes ; do
+    $ECHO "- Gathering statistics for $size bits ... "
+    ./primegen $size $ntests | grep ticks | awk '{print $7}' | tr -d '(' > tt$$.tmp
+    $ECHO "$size:" >> timing-results.txt
+    $PERL stats tt$$.tmp >> timing-results.txt
+    tail -1 timing-results.txt
+    rm -f tt$$.tmp
+done
+
+$ECHO "<done>"
+
+trap 'rm -f tt*.tmp timing-results.txt' INT HUP
+
+exit 0
+
diff --git a/security/nss/lib/freebl/mpi/types.pl b/security/nss/lib/freebl/mpi/types.pl
new file mode 100755
index 000000000..c5f38afa5
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/types.pl
@@ -0,0 +1,127 @@
+#!/usr/bin/perl
+
+#
+# types.pl - find recommended type definitions for digits and words
+#
+# This script scans the Makefile for the C compiler and compilation
+# flags currently in use, and using this combination, attempts to
+# compile a simple test program that outputs the sizes of the various
+# unsigned integer types, in bytes.  Armed with these, it finds all
+# the "viable" type combinations for mp_digit and mp_word, where
+# viability is defined by the requirement that mp_word be at least two
+# times the precision of mp_digit.
+#
+# Of these, the one with the largest digit size is chosen, and
+# appropriate typedef statements are written to standard output.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+@_=split(/\//,$0);chomp($prog=pop(@_));
+
+# The array of integer types to be considered...
+@TYPES = ( 
+	   "unsigned char", 
+	   "unsigned short", 
+	   "unsigned int", 
+	   "unsigned long"
+);
+
+# Macro names for the maximum unsigned value of each type
+%TMAX = ( 
+	  "unsigned char"   => "UCHAR_MAX",
+	  "unsigned short"  => "USHRT_MAX",
+	  "unsigned int"    => "UINT_MAX",
+	  "unsigned long"   => "ULONG_MAX"
+);
+
+# Read the Makefile to find out which C compiler to use
+open(MFP, "<Makefile") or die "$prog: Makefile: $!\n";
+while(<MFP>) {
+    chomp;
+    if(/^CC=(.*)$/) {
+	$cc = $1;
+	last if $cflags;
+    } elsif(/^CFLAGS=(.*)$/) {
+	$cflags = $1;
+	last if $cc;
+    }
+}
+close(MFP);
+
+# If we couldn't find that, use 'cc' by default
+$cc = "cc" unless $cc;
+
+printf STDERR "Using '%s' as the C compiler.\n", $cc;
+
+print STDERR "Determining type sizes ... \n";
+open(OFP, ">tc$$.c") or die "$prog: tc$$.c: $!\n";
+print OFP "#include <stdio.h>\n\nint main(void)\n{\n";
+foreach $type (@TYPES) {
+    printf OFP "\tprintf(\"%%d\\n\", (int)sizeof(%s));\n", $type;
+}
+print OFP "\n\treturn 0;\n}\n";
+close(OFP);
+
+system("$cc $cflags -o tc$$ tc$$.c");
+
+die "$prog: unable to build test program\n" unless(-x "tc$$");
+
+open(IFP, "./tc$$|") or die "$prog: can't execute test program\n";
+$ix = 0;
+while(<IFP>) {
+    chomp;
+    $size{$TYPES[$ix++]} = $_;
+}
+close(IFP);
+
+unlink("tc$$");
+unlink("tc$$.c");
+
+print STDERR "Selecting viable combinations ... \n";
+while(($type, $size) = each(%size)) {
+    push(@ts, [ $size, $type ]);
+}
+
+# Sort them ascending by size 
+@ts = sort { $a->[0] <=> $b->[0] } @ts;
+
+# Try all possible combinations, finding pairs in which the word size
+# is twice the digit size.  The number of possible pairs is too small
+# to bother doing this more efficiently than by brute force
+for($ix = 0; $ix <= $#ts; $ix++) {
+    $w = $ts[$ix];
+
+    for($jx = 0; $jx <= $#ts; $jx++) {
+	$d = $ts[$jx];
+
+	if($w->[0] == 2 * $d->[0]) {
+	    push(@valid, [ $d, $w ]);
+	}
+    }
+}
+
+# Sort descending by digit size
+@valid = sort { $b->[0]->[0] <=> $a->[0]->[0] } @valid;
+
+# Select the maximum as the recommended combination
+$rec = shift(@valid);
+
+printf("typedef %-18s mp_sign;\n", "char");
+printf("typedef %-18s mp_digit;  /* %d byte type */\n", 
+       $rec->[0]->[1], $rec->[0]->[0]);
+printf("typedef %-18s mp_word;   /* %d byte type */\n", 
+       $rec->[1]->[1], $rec->[1]->[0]);
+printf("typedef %-18s mp_size;\n", "unsigned int");
+printf("typedef %-18s mp_err;\n\n", "int");
+
+printf("#define %-18s (CHAR_BIT*sizeof(mp_digit))\n", "DIGIT_BIT");
+printf("#define %-18s %s\n", "DIGIT_MAX", $TMAX{$rec->[0]->[1]});
+printf("#define %-18s (CHAR_BIT*sizeof(mp_word))\n", "MP_WORD_BIT");
+printf("#define %-18s %s\n\n", "MP_WORD_MAX", $TMAX{$rec->[1]->[1]});
+printf("#define %-18s (DIGIT_MAX+1)\n\n", "RADIX");
+
+printf("#define %-18s \"%%0%dX\"\n", "DIGIT_FMT", (2 * $rec->[0]->[0]));
+
+exit 0;
diff --git a/security/nss/lib/freebl/mpi/utils/LICENSE b/security/nss/lib/freebl/mpi/utils/LICENSE
new file mode 100644
index 000000000..5f96df7ab
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/LICENSE
@@ -0,0 +1,4 @@
+Within this directory, each of the file listed below is licensed under 
+the terms given in the file LICENSE-MPL, also in this directory.
+
+PRIMES
diff --git a/security/nss/lib/freebl/mpi/utils/LICENSE-MPL b/security/nss/lib/freebl/mpi/utils/LICENSE-MPL
new file mode 100644
index 000000000..41dc2327f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/LICENSE-MPL
@@ -0,0 +1,3 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/utils/PRIMES b/security/nss/lib/freebl/mpi/utils/PRIMES
new file mode 100644
index 000000000..ed65703ff
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/PRIMES
@@ -0,0 +1,41 @@
+Probable primes (sorted by number of significant bits)
+
+ 128: 81386202757205669562183851789305348631
+
+ 128: 180241813863264101444573802809858694397
+
+ 128: 245274683055224433281596312431122059021
+
+ 128: 187522309397665259809392608791686659539
+
+ 256: 83252422946206411852330647237287722547866360773229941071371588246436\
+      513990159
+
+ 256: 79132571131322331023736933767063051273085304521895229780914612117520\
+      058517909
+
+ 256: 72081815425552909748220041100909735706208853818662000557743644603407\
+      965465527
+
+ 256: 87504602391905701494845474079163412737334477797316409702279059573654\
+      274811271
+
+ 512: 12233064210800062190450937494718705259777386009095453001870729392786\
+      63450255179083524798507997690270500580265258111668148238355016411719\
+      9168737693316468563
+
+ 512: 12003639081420725322369909586347545220275253633035565716386136197501\
+      88208318984400479275215620499883521216480724155582768193682335576385\
+      2069481074929084063
+
+1024: 16467877625718912296741904171202513097057724053648819680815842057593\
+      20371835940722471475475803725455063836431454757000451907612224427007\
+      63984592414360595161051906727075047683803534852982766542661204179549\
+      77327573530800542562611753617736693359790119074768292178493884576587\
+      0230450429880021317876149636714743053
+
+1024: 16602953991090311275234291158294516471009930684624948451178742895360\
+      86073703307475884280944414508444679430090561246728195735962931545473\
+      40743240318558456247740186704660778277799687988031119436541068736925\
+      20563780233711166724859277827382391527748470939542560819625727876091\
+      5372193745283891895989104479029844957
diff --git a/security/nss/lib/freebl/mpi/utils/README b/security/nss/lib/freebl/mpi/utils/README
new file mode 100644
index 000000000..61c8e2efa
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/README
@@ -0,0 +1,206 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Additional MPI utilities
+------------------------
+
+The files 'mpprime.h' and 'mpprime.c' define some useful extensions to
+the MPI library for dealing with prime numbers (in particular, testing
+for divisbility, and the Rabin-Miller probabilistic primality test).
+
+The files 'mplogic.h' and 'mplogic.c' define extensions to the MPI
+library for doing bitwise logical operations and shifting.
+
+This document assumes you have read the help file for the MPI library
+and understand its conventions.
+
+Divisibility (mpprime.h)
+------------
+
+To test a number for divisibility by another number:
+
+mpp_divis(a, b)		- test if b|a
+mpp_divis_d(a, d)	- test if d|a
+
+Each of these functions returns MP_YES if its initial argument is
+divisible by its second, or MP_NO if it is not.  Other errors may be
+returned as appropriate (such as MP_RANGE if you try to test for
+divisibility by zero).
+
+Randomness (mpprime.h)
+----------
+
+To generate random data:
+
+mpp_random(a)		- fill a with random data
+mpp_random_size(a, p)   - fill a with p digits of random data
+
+The mpp_random_size() function increases the precision of a to at
+least p, then fills all those digits randomly.  The mp_random()
+function fills a to its current precision (as determined by the number
+of significant digits, USED(a))
+
+Note that these functions simply use the C library's rand() function
+to fill a with random digits up to its precision.  This should be
+adequate for primality testing, but should not be used for
+cryptographic applications where truly random values are required for
+security.  
+
+You should call srand() in your driver program in order to seed the
+random generator; this function doesn't call it.
+
+Primality Testing (mpprime.h)
+-----------------
+
+mpp_divis_vector(a, v, s, w)   - is a divisible by any of the s values
+                                 in v, and if so, w = which.
+mpp_divis_primes(a, np)   - is a divisible by any of the first np primes?
+mpp_fermat(a, w)          - is a pseudoprime with respect to witness w?
+mpp_pprime(a, nt)	  - run nt iterations of Rabin-Miller on a.
+
+The mpp_divis_vector() function tests a for divisibility by each
+member of an array of digits.  The array is v, the size of that array
+is s.  Returns MP_YES if a is divisible, and stores the index of the
+offending digit in w.  Returns MP_NO if a is not divisible by any of
+the digits in the array.
+
+A small table of primes is compiled into the library (typically the
+first 128 primes, although you can change this by editing the file
+'primes.c' before you build).  The global variable prime_tab_size
+contains the number of primes in the table, and the values themselves
+are in the array prime_tab[], which is an array of mp_digit.
+
+The mpp_divis_primes() function is basically just a wrapper around
+mpp_divis_vector() that uses prime_tab[] as the test vector.  The np
+parameter is a pointer to an mp_digit -- on input, it should specify
+the number of primes to be tested against.  If a is divisible by any
+of the primes, MP_YES is returned and np is given the prime value that
+divided a (you can use this if you're factoring, for example).
+Otherwise, MP_NO is returned and np is untouched.
+
+The function mpp_fermat() performs Fermat's test, using w as a
+witness.  This test basically relies on the fact that if a is prime,
+and w is relatively prime to a, then:
+
+	w^a = w (mod a)
+
+That is,
+
+	w^(a - 1) = 1 (mod a)
+
+The function returns MP_YES if the test passes, MP_NO if it fails.  If
+w is relatively prime to a, and the test fails, a is definitely
+composite.  If w is relatively prime to a and the test passes, then a
+is either prime, or w is a false witness (the probability of this
+happening depends on the choice of w and of a ... consult a number
+theory textbook for more information about this).  
+
+Note:  If (w, a) != 1, the output of this test is meaningless.
+----
+
+The function mpp_pprime() performs the Rabin-Miller probabilistic
+primality test for nt rounds.  If all the tests pass, MP_YES is
+returned, and a is probably prime.  The probability that an answer of
+MP_YES is incorrect is no greater than 1 in 4^nt, and in fact is
+usually much less than that (this is a pessimistic estimate).  If any
+test fails, MP_NO is returned, and a is definitely composite.
+
+Bruce Schneier recommends at least 5 iterations of this test for most
+cryptographic applications; Knuth suggests that 25 are reasonable.
+Run it as many times as you feel are necessary.
+
+See the programs 'makeprime.c' and 'isprime.c' for reasonable examples
+of how to use these functions for primality testing.
+
+
+Bitwise Logic (mplogic.c)
+-------------
+
+The four commonest logical operations are implemented as:
+
+mpl_not(a, b)		- Compute bitwise (one's) complement, b = ~a
+
+mpl_and(a, b, c)	- Compute bitwise AND, c = a & b
+
+mpl_or(a, b, c)		- Compute bitwise OR, c = a | b
+
+mpl_xor(a, b, c)	- Compute bitwise XOR, c = a ^ b
+
+Left and right shifts are available as well.  These take a number to
+shift, a destination, and a shift amount.  The shift amount must be a
+digit value between 0 and DIGIT_BIT inclusive; if it is not, MP_RANGE
+will be returned and the shift will not happen.
+
+mpl_rsh(a, b, d)	- Compute logical right shift, b = a >> d
+
+mpl_lsh(a, b, d)	- Compute logical left shift, b = a << d
+
+Since these are logical shifts, they fill with zeroes (the library
+uses a signed magnitude representation, so there are no sign bits to
+extend anyway).
+
+
+Command-line Utilities
+----------------------
+
+A handful of interesting command-line utilities are provided.  These
+are:
+
+lap.c		- Find the order of a mod m.  Usage is 'lap <a> <m>'.
+                  This uses a dumb algorithm, so don't use it for 
+                  a really big modulus.
+
+invmod.c	- Find the inverse of a mod m, if it exists.  Usage
+		  is 'invmod <a> <m>'
+
+sieve.c		- A simple bitmap-based implementation of the Sieve
+		  of Eratosthenes.  Used to generate the table of 
+		  primes in primes.c.  Usage is 'sieve <nbits>'
+
+prng.c          - Uses the routines in bbs_rand.{h,c} to generate
+                  one or more 32-bit pseudo-random integers.  This
+                  is mainly an example, not intended for use in a
+                  cryptographic application (the system time is 
+                  the only source of entropy used)
+
+dec2hex.c       - Convert decimal to hexadecimal
+
+hex2dec.c       - Convert hexadecimal to decimal
+
+basecvt.c       - General radix conversion tool (supports 2-64)
+
+isprime.c       - Probabilistically test an integer for primality
+                  using the Rabin-Miller pseudoprime test combined
+                  with division by small primes.
+
+primegen.c      - Generate primes at random.
+
+exptmod.c       - Perform modular exponentiation
+
+ptab.pl		- A Perl script to munge the output of the sieve
+		  program into a compilable C structure.
+
+
+Other Files
+-----------
+
+PRIMES		- Some randomly generated numbers which are prime with
+		  extremely high probability.
+
+README		- You're reading me already.
+
+
+About the Author
+----------------
+
+This software was written by Michael J. Fromberger.  You can contact
+the author as follows:
+
+E-mail:	  <sting@linguist.dartmouth.edu>
+
+Postal:	  8000 Cummings Hall, Thayer School of Engineering
+	  Dartmouth College, Hanover, New Hampshire, USA
+
+PGP key:  http://linguist.dartmouth.edu/~sting/keys/mjf.html
+          9736 188B 5AFA 23D6 D6AA  BE0D 5856 4525 289D 9907
diff --git a/security/nss/lib/freebl/mpi/utils/basecvt.c b/security/nss/lib/freebl/mpi/utils/basecvt.c
new file mode 100644
index 000000000..0e9915406
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/basecvt.c
@@ -0,0 +1,68 @@
+/*
+ *  basecvt.c
+ *
+ *  Convert integer values specified on the command line from one input
+ *  base to another.  Accepts input and output bases between 2 and 36
+ *  inclusive.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+#define IBASE 10
+#define OBASE 16
+#define USAGE "Usage: %s ibase obase [value]\n"
+#define MAXBASE 64
+#define MINBASE 2
+
+int
+main(int argc, char *argv[])
+{
+    int ix, ibase = IBASE, obase = OBASE;
+    mp_int val;
+
+    ix = 1;
+    if (ix < argc) {
+        ibase = atoi(argv[ix++]);
+
+        if (ibase < MINBASE || ibase > MAXBASE) {
+            fprintf(stderr, "%s: input radix must be between %d and %d inclusive\n",
+                    argv[0], MINBASE, MAXBASE);
+            return 1;
+        }
+    }
+    if (ix < argc) {
+        obase = atoi(argv[ix++]);
+
+        if (obase < MINBASE || obase > MAXBASE) {
+            fprintf(stderr, "%s: output radix must be between %d and %d inclusive\n",
+                    argv[0], MINBASE, MAXBASE);
+            return 1;
+        }
+    }
+
+    mp_init(&val);
+    while (ix < argc) {
+        char *out;
+        int outlen;
+
+        mp_read_radix(&val, argv[ix++], ibase);
+
+        outlen = mp_radix_size(&val, obase);
+        out = calloc(outlen, sizeof(char));
+        mp_toradix(&val, out, obase);
+
+        printf("%s\n", out);
+        free(out);
+    }
+
+    mp_clear(&val);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/bbs_rand.c b/security/nss/lib/freebl/mpi/utils/bbs_rand.c
new file mode 100644
index 000000000..fed2fe2e6
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/bbs_rand.c
@@ -0,0 +1,65 @@
+/*
+ *  Blum, Blum & Shub PRNG using the MPI library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "bbs_rand.h"
+
+#define SEED 1
+#define MODULUS 2
+
+/* This modulus is the product of two randomly generated 512-bit
+   prime integers, each of which is congruent to 3 (mod 4).          */
+static char *bbs_modulus =
+    "75A2A6E1D27393B86562B9CE7279A8403CB4258A637DAB5233465373E37837383EDC"
+    "332282B8575927BC4172CE8C147B4894050EE9D2BDEED355C121037270CA2570D127"
+    "7D2390CD1002263326635CC6B259148DE3A1A03201980A925E395E646A5E9164B0EC"
+    "28559EBA58C87447245ADD0651EDA507056A1129E3A3E16E903D64B437";
+
+static int bbs_init = 0; /* flag set when library is initialized */
+static mp_int bbs_state; /* the current state of the generator   */
+
+/* Suggested size of random seed data */
+int bbs_seed_size = (sizeof(bbs_modulus) / 2);
+
+void
+bbs_srand(unsigned char *data, int len)
+{
+    if ((bbs_init & SEED) == 0) {
+        mp_init(&bbs_state);
+        bbs_init |= SEED;
+    }
+
+    mp_read_raw(&bbs_state, (char *)data, len);
+
+} /* end bbs_srand() */
+
+unsigned int
+bbs_rand(void)
+{
+    static mp_int modulus;
+    unsigned int result = 0, ix;
+
+    if ((bbs_init & MODULUS) == 0) {
+        mp_init(&modulus);
+        mp_read_radix(&modulus, bbs_modulus, 16);
+        bbs_init |= MODULUS;
+    }
+
+    for (ix = 0; ix < sizeof(unsigned int); ix++) {
+        mp_digit d;
+
+        mp_sqrmod(&bbs_state, &modulus, &bbs_state);
+        d = DIGIT(&bbs_state, 0);
+
+        result = (result << CHAR_BIT) | (d & UCHAR_MAX);
+    }
+
+    return result;
+
+} /* end bbs_rand() */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS                                                  */
diff --git a/security/nss/lib/freebl/mpi/utils/bbs_rand.h b/security/nss/lib/freebl/mpi/utils/bbs_rand.h
new file mode 100644
index 000000000..d12269bf9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/bbs_rand.h
@@ -0,0 +1,24 @@
+/*
+ *  bbs_rand.h
+ *
+ *  Blum, Blum & Shub PRNG using the MPI library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_BBSRAND_
+#define _H_BBSRAND_
+
+#include <limits.h>
+#include "mpi.h"
+
+#define BBS_RAND_MAX UINT_MAX
+
+/* Suggested length of seed data */
+extern int bbs_seed_size;
+
+void bbs_srand(unsigned char *data, int len);
+unsigned int bbs_rand(void);
+
+#endif /* end _H_BBSRAND_ */
diff --git a/security/nss/lib/freebl/mpi/utils/bbsrand.c b/security/nss/lib/freebl/mpi/utils/bbsrand.c
new file mode 100644
index 000000000..d9151e005
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/bbsrand.c
@@ -0,0 +1,35 @@
+/*
+ *  bbsrand.c
+ *
+ *  Test driver for routines in bbs_rand.h
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+
+#include "bbs_rand.h"
+
+#define NUM_TESTS 100
+
+int
+main(void)
+{
+    unsigned int seed, result, ix;
+
+    seed = time(NULL);
+    bbs_srand((unsigned char *)&seed, sizeof(seed));
+
+    for (ix = 0; ix < NUM_TESTS; ix++) {
+        result = bbs_rand();
+
+        printf("Test %3u: %08X\n", ix + 1, result);
+    }
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/dec2hex.c b/security/nss/lib/freebl/mpi/utils/dec2hex.c
new file mode 100644
index 000000000..ef3a52095
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/dec2hex.c
@@ -0,0 +1,40 @@
+/*
+ *  dec2hex.c
+ *
+ *  Convert decimal integers into hexadecimal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a;
+    char *buf;
+    int len;
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <a>\n", argv[0]);
+        return 1;
+    }
+
+    mp_init(&a);
+    mp_read_radix(&a, argv[1], 10);
+    len = mp_radix_size(&a, 16);
+    buf = malloc(len);
+    mp_toradix(&a, buf, 16);
+
+    printf("%s\n", buf);
+
+    free(buf);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/exptmod.c b/security/nss/lib/freebl/mpi/utils/exptmod.c
new file mode 100644
index 000000000..3ac9078f4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/exptmod.c
@@ -0,0 +1,55 @@
+/*
+ *  exptmod.c
+ *
+ * Command line tool to perform modular exponentiation on arbitrary
+ * precision integers.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a, b, m;
+    mp_err res;
+    char *str;
+    int len, rval = 0;
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <b> <m>\n", argv[0]);
+        return 1;
+    }
+
+    mp_init(&a);
+    mp_init(&b);
+    mp_init(&m);
+    mp_read_radix(&a, argv[1], 10);
+    mp_read_radix(&b, argv[2], 10);
+    mp_read_radix(&m, argv[3], 10);
+
+    if ((res = mp_exptmod(&a, &b, &m, &a)) != MP_OKAY) {
+        fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res));
+        rval = 1;
+    } else {
+        len = mp_radix_size(&a, 10);
+        str = calloc(len, sizeof(char));
+        mp_toradix(&a, str, 10);
+
+        printf("%s\n", str);
+
+        free(str);
+    }
+
+    mp_clear(&a);
+    mp_clear(&b);
+    mp_clear(&m);
+
+    return rval;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/fact.c b/security/nss/lib/freebl/mpi/utils/fact.c
new file mode 100644
index 000000000..da8e61a32
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/fact.c
@@ -0,0 +1,84 @@
+/*
+ * fact.c
+ *
+ * Compute factorial of input integer
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+mp_err mp_fact(mp_int *a, mp_int *b);
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a;
+    mp_err res;
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <number>\n", argv[0]);
+        return 1;
+    }
+
+    mp_init(&a);
+    mp_read_radix(&a, argv[1], 10);
+
+    if ((res = mp_fact(&a, &a)) != MP_OKAY) {
+        fprintf(stderr, "%s: error: %s\n", argv[0],
+                mp_strerror(res));
+        mp_clear(&a);
+        return 1;
+    }
+
+    {
+        char *buf;
+        int len;
+
+        len = mp_radix_size(&a, 10);
+        buf = malloc(len);
+        mp_todecimal(&a, buf);
+
+        puts(buf);
+
+        free(buf);
+    }
+
+    mp_clear(&a);
+    return 0;
+}
+
+mp_err
+mp_fact(mp_int *a, mp_int *b)
+{
+    mp_int ix, s;
+    mp_err res = MP_OKAY;
+
+    if (mp_cmp_z(a) < 0)
+        return MP_UNDEF;
+
+    mp_init(&s);
+    mp_add_d(&s, 1, &s); /* s = 1  */
+    mp_init(&ix);
+    mp_add_d(&ix, 1, &ix); /* ix = 1 */
+
+    for (/*  */; mp_cmp(&ix, a) <= 0; mp_add_d(&ix, 1, &ix)) {
+        if ((res = mp_mul(&s, &ix, &s)) != MP_OKAY)
+            break;
+    }
+
+    mp_clear(&ix);
+
+    /* Copy out results if we got them */
+    if (res == MP_OKAY)
+        mp_copy(&s, b);
+
+    mp_clear(&s);
+
+    return res;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/gcd.c b/security/nss/lib/freebl/mpi/utils/gcd.c
new file mode 100644
index 000000000..9f11a250b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/gcd.c
@@ -0,0 +1,95 @@
+/*
+ *  gcd.c
+ *
+ *  Greatest common divisor
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+char *g_prog = NULL;
+
+void print_mp_int(mp_int *mp, FILE *ofp);
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a, b, x, y;
+    mp_err res;
+    int ext = 0;
+
+    g_prog = argv[0];
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <b>\n", g_prog);
+        return 1;
+    }
+
+    mp_init(&a);
+    mp_read_radix(&a, argv[1], 10);
+    mp_init(&b);
+    mp_read_radix(&b, argv[2], 10);
+
+    /* If we were called 'xgcd', compute x, y so that g = ax + by */
+    if (strcmp(g_prog, "xgcd") == 0) {
+        ext = 1;
+        mp_init(&x);
+        mp_init(&y);
+    }
+
+    if (ext) {
+        if ((res = mp_xgcd(&a, &b, &a, &x, &y)) != MP_OKAY) {
+            fprintf(stderr, "%s: error: %s\n", g_prog, mp_strerror(res));
+            mp_clear(&a);
+            mp_clear(&b);
+            mp_clear(&x);
+            mp_clear(&y);
+            return 1;
+        }
+    } else {
+        if ((res = mp_gcd(&a, &b, &a)) != MP_OKAY) {
+            fprintf(stderr, "%s: error: %s\n", g_prog,
+                    mp_strerror(res));
+            mp_clear(&a);
+            mp_clear(&b);
+            return 1;
+        }
+    }
+
+    print_mp_int(&a, stdout);
+    if (ext) {
+        fputs("x = ", stdout);
+        print_mp_int(&x, stdout);
+        fputs("y = ", stdout);
+        print_mp_int(&y, stdout);
+    }
+
+    mp_clear(&a);
+    mp_clear(&b);
+
+    if (ext) {
+        mp_clear(&x);
+        mp_clear(&y);
+    }
+
+    return 0;
+}
+
+void
+print_mp_int(mp_int *mp, FILE *ofp)
+{
+    char *buf;
+    int len;
+
+    len = mp_radix_size(mp, 10);
+    buf = calloc(len, sizeof(char));
+    mp_todecimal(mp, buf);
+    fprintf(ofp, "%s\n", buf);
+    free(buf);
+}
diff --git a/security/nss/lib/freebl/mpi/utils/hex2dec.c b/security/nss/lib/freebl/mpi/utils/hex2dec.c
new file mode 100644
index 000000000..9b21d22e0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/hex2dec.c
@@ -0,0 +1,40 @@
+/*
+ *  hex2dec.c
+ *
+ *  Convert decimal integers into hexadecimal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a;
+    char *buf;
+    int len;
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <a>\n", argv[0]);
+        return 1;
+    }
+
+    mp_init(&a);
+    mp_read_radix(&a, argv[1], 16);
+    len = mp_radix_size(&a, 10);
+    buf = malloc(len);
+    mp_toradix(&a, buf, 10);
+
+    printf("%s\n", buf);
+
+    free(buf);
+    mp_clear(&a);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/identest.c b/security/nss/lib/freebl/mpi/utils/identest.c
new file mode 100644
index 000000000..321d2c2b0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/identest.c
@@ -0,0 +1,84 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi.h"
+#include "mpprime.h"
+#include <sys/types.h>
+#include <time.h>
+
+#define MAX_PREC (4096 / MP_DIGIT_BIT)
+
+mp_err
+identity_test(void)
+{
+    mp_size preca, precb;
+    mp_err res;
+    mp_int a, b;
+    mp_int t1, t2, t3, t4, t5;
+
+    preca = (rand() % MAX_PREC) + 1;
+    precb = (rand() % MAX_PREC) + 1;
+
+    MP_DIGITS(&a) = 0;
+    MP_DIGITS(&b) = 0;
+    MP_DIGITS(&t1) = 0;
+    MP_DIGITS(&t2) = 0;
+    MP_DIGITS(&t3) = 0;
+    MP_DIGITS(&t4) = 0;
+    MP_DIGITS(&t5) = 0;
+
+    MP_CHECKOK(mp_init(&a));
+    MP_CHECKOK(mp_init(&b));
+    MP_CHECKOK(mp_init(&t1));
+    MP_CHECKOK(mp_init(&t2));
+    MP_CHECKOK(mp_init(&t3));
+    MP_CHECKOK(mp_init(&t4));
+    MP_CHECKOK(mp_init(&t5));
+
+    MP_CHECKOK(mpp_random_size(&a, preca));
+    MP_CHECKOK(mpp_random_size(&b, precb));
+
+    if (mp_cmp(&a, &b) < 0)
+        mp_exch(&a, &b);
+
+    MP_CHECKOK(mp_mod(&a, &b, &t1));       /* t1 = a%b */
+    MP_CHECKOK(mp_div(&a, &b, &t2, NULL)); /* t2 = a/b */
+    MP_CHECKOK(mp_mul(&b, &t2, &t3));      /* t3 = (a/b)*b */
+    MP_CHECKOK(mp_add(&t1, &t3, &t4));     /* t4 = a%b + (a/b)*b */
+    MP_CHECKOK(mp_sub(&t4, &a, &t5));      /* t5 = a%b + (a/b)*b - a */
+    if (mp_cmp_z(&t5) != 0) {
+        res = MP_UNDEF;
+        goto CLEANUP;
+    }
+
+CLEANUP:
+    mp_clear(&t5);
+    mp_clear(&t4);
+    mp_clear(&t3);
+    mp_clear(&t2);
+    mp_clear(&t1);
+    mp_clear(&b);
+    mp_clear(&a);
+    return res;
+}
+
+int
+main(void)
+{
+    unsigned int seed = (unsigned int)time(NULL);
+    unsigned long count = 0;
+    mp_err res;
+
+    srand(seed);
+
+    while (MP_OKAY == (res = identity_test())) {
+        if ((++count % 100) == 0)
+            fputc('.', stderr);
+    }
+
+    fprintf(stderr, "\ntest failed, err %d\n", res);
+    return res;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/invmod.c b/security/nss/lib/freebl/mpi/utils/invmod.c
new file mode 100644
index 000000000..9b4b04d3f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/invmod.c
@@ -0,0 +1,61 @@
+/*
+ *  invmod.c
+ *
+ *  Compute modular inverses
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a, m;
+    mp_err res;
+    char *buf;
+    int len, out = 0;
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <m>\n", argv[0]);
+        return 1;
+    }
+
+    mp_init(&a);
+    mp_init(&m);
+    mp_read_radix(&a, argv[1], 10);
+    mp_read_radix(&m, argv[2], 10);
+
+    if (mp_cmp(&a, &m) > 0)
+        mp_mod(&a, &m, &a);
+
+    switch ((res = mp_invmod(&a, &m, &a))) {
+        case MP_OKAY:
+            len = mp_radix_size(&a, 10);
+            buf = malloc(len);
+
+            mp_toradix(&a, buf, 10);
+            printf("%s\n", buf);
+            free(buf);
+            break;
+
+        case MP_UNDEF:
+            printf("No inverse\n");
+            out = 1;
+            break;
+
+        default:
+            printf("error: %s (%d)\n", mp_strerror(res), res);
+            out = 2;
+            break;
+    }
+
+    mp_clear(&a);
+    mp_clear(&m);
+
+    return out;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/isprime.c b/security/nss/lib/freebl/mpi/utils/isprime.c
new file mode 100644
index 000000000..d2d86957e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/isprime.c
@@ -0,0 +1,89 @@
+/*
+ *  isprime.c
+ *
+ *  Probabilistic primality tester command-line tool
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+#define RM_TESTS 15  /* how many iterations of Rabin-Miller? */
+#define MINIMUM 1024 /* don't bother us with a < this        */
+
+int g_tests = RM_TESTS;
+char *g_prog = NULL;
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a;
+    mp_digit np = prime_tab_size; /* from mpprime.h */
+    int res = 0;
+
+    g_prog = argv[0];
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <a>, where <a> is a decimal integer\n"
+                        "Use '0x' prefix for a hexadecimal value\n",
+                g_prog);
+        return 1;
+    }
+
+    /* Read number of tests from environment, if present */
+    {
+        char *tmp;
+
+        if ((tmp = PR_GetEnvSecure("RM_TESTS")) != NULL) {
+            if ((g_tests = atoi(tmp)) <= 0)
+                g_tests = RM_TESTS;
+        }
+    }
+
+    mp_init(&a);
+    if (argv[1][0] == '0' && argv[1][1] == 'x')
+        mp_read_radix(&a, argv[1] + 2, 16);
+    else
+        mp_read_radix(&a, argv[1], 10);
+
+    if (mp_cmp_d(&a, MINIMUM) <= 0) {
+        fprintf(stderr, "%s: please use a value greater than %d\n",
+                g_prog, MINIMUM);
+        mp_clear(&a);
+        return 1;
+    }
+
+    /* Test for divisibility by small primes */
+    if (mpp_divis_primes(&a, &np) != MP_NO) {
+        printf("Not prime (divisible by small prime %d)\n", np);
+        res = 2;
+        goto CLEANUP;
+    }
+
+    /* Test with Fermat's test, using 2 as a witness */
+    if (mpp_fermat(&a, 2) != MP_YES) {
+        printf("Not prime (failed Fermat test)\n");
+        res = 2;
+        goto CLEANUP;
+    }
+
+    /* Test with Rabin-Miller probabilistic test */
+    if (mpp_pprime(&a, g_tests) == MP_NO) {
+        printf("Not prime (failed pseudoprime test)\n");
+        res = 2;
+        goto CLEANUP;
+    }
+
+    printf("Probably prime, 1 in 4^%d chance of false positive\n", g_tests);
+
+CLEANUP:
+    mp_clear(&a);
+
+    return res;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/lap.c b/security/nss/lib/freebl/mpi/utils/lap.c
new file mode 100644
index 000000000..501e4531d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/lap.c
@@ -0,0 +1,90 @@
+/*
+ *  lap.c
+ *
+ *  Find least annihilating power of a mod m
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "mpi.h"
+
+void sig_catch(int ign);
+
+int g_quit = 0;
+
+int
+main(int argc, char *argv[])
+{
+    mp_int a, m, p, k;
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <a> <m>\n", argv[0]);
+        return 1;
+    }
+
+    mp_init(&a);
+    mp_init(&m);
+    mp_init(&p);
+    mp_add_d(&p, 1, &p);
+
+    mp_read_radix(&a, argv[1], 10);
+    mp_read_radix(&m, argv[2], 10);
+
+    mp_init_copy(&k, &a);
+
+    signal(SIGINT, sig_catch);
+#ifndef __OS2__
+    signal(SIGHUP, sig_catch);
+#endif
+    signal(SIGTERM, sig_catch);
+
+    while (mp_cmp(&p, &m) < 0) {
+        if (g_quit) {
+            int len;
+            char *buf;
+
+            len = mp_radix_size(&p, 10);
+            buf = malloc(len);
+            mp_toradix(&p, buf, 10);
+
+            fprintf(stderr, "Terminated at: %s\n", buf);
+            free(buf);
+            return 1;
+        }
+        if (mp_cmp_d(&k, 1) == 0) {
+            int len;
+            char *buf;
+
+            len = mp_radix_size(&p, 10);
+            buf = malloc(len);
+            mp_toradix(&p, buf, 10);
+
+            printf("%s\n", buf);
+
+            free(buf);
+            break;
+        }
+
+        mp_mulmod(&k, &a, &m, &k);
+        mp_add_d(&p, 1, &p);
+    }
+
+    if (mp_cmp(&p, &m) >= 0)
+        printf("No annihilating power.\n");
+
+    mp_clear(&p);
+    mp_clear(&m);
+    mp_clear(&a);
+    return 0;
+}
+
+void
+sig_catch(int ign)
+{
+    g_quit = 1;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/makeprime.c b/security/nss/lib/freebl/mpi/utils/makeprime.c
new file mode 100644
index 000000000..401b7532b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/makeprime.c
@@ -0,0 +1,116 @@
+/*
+ * makeprime.c
+ *
+ * A simple prime generator function (and test driver).  Prints out the
+ * first prime it finds greater than or equal to the starting value.
+ *
+ * Usage: makeprime <start>
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+/* These two must be included for make_prime() to work */
+
+#include "mpi.h"
+#include "mpprime.h"
+
+/*
+  make_prime(p, nr)
+
+  Find the smallest prime integer greater than or equal to p, where
+  primality is verified by 'nr' iterations of the Rabin-Miller
+  probabilistic primality test.  The caller is responsible for
+  generating the initial value of p.
+
+  Returns MP_OKAY if a prime has been generated, otherwise the error
+  code indicates some other problem.  The value of p is clobbered; the
+  caller should keep a copy if the value is needed.
+ */
+mp_err make_prime(mp_int *p, int nr);
+
+/* The main() is not required -- it's just a test driver */
+int
+main(int argc, char *argv[])
+{
+    mp_int start;
+    mp_err res;
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <start-value>\n", argv[0]);
+        return 1;
+    }
+
+    mp_init(&start);
+    if (argv[1][0] == '0' && tolower(argv[1][1]) == 'x') {
+        mp_read_radix(&start, argv[1] + 2, 16);
+    } else {
+        mp_read_radix(&start, argv[1], 10);
+    }
+    mp_abs(&start, &start);
+
+    if ((res = make_prime(&start, 5)) != MP_OKAY) {
+        fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res));
+        mp_clear(&start);
+
+        return 1;
+
+    } else {
+        char *buf = malloc(mp_radix_size(&start, 10));
+
+        mp_todecimal(&start, buf);
+        printf("%s\n", buf);
+        free(buf);
+
+        mp_clear(&start);
+
+        return 0;
+    }
+
+} /* end main() */
+
+/*------------------------------------------------------------------------*/
+
+mp_err
+make_prime(mp_int *p, int nr)
+{
+    mp_err res;
+
+    if (mp_iseven(p)) {
+        mp_add_d(p, 1, p);
+    }
+
+    do {
+        mp_digit which = prime_tab_size;
+
+        /*  First test for divisibility by a few small primes */
+        if ((res = mpp_divis_primes(p, &which)) == MP_YES)
+            continue;
+        else if (res != MP_NO)
+            goto CLEANUP;
+
+        /* If that passes, try one iteration of Fermat's test */
+        if ((res = mpp_fermat(p, 2)) == MP_NO)
+            continue;
+        else if (res != MP_YES)
+            goto CLEANUP;
+
+        /* If that passes, run Rabin-Miller as often as requested */
+        if ((res = mpp_pprime(p, nr)) == MP_YES)
+            break;
+        else if (res != MP_NO)
+            goto CLEANUP;
+
+    } while ((res = mp_add_d(p, 2, p)) == MP_OKAY);
+
+CLEANUP:
+    return res;
+
+} /* end make_prime() */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS                                                  */
diff --git a/security/nss/lib/freebl/mpi/utils/metime.c b/security/nss/lib/freebl/mpi/utils/metime.c
new file mode 100644
index 000000000..122875ee0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/metime.c
@@ -0,0 +1,102 @@
+/*
+ *  metime.c
+ *
+ * Modular exponentiation timing test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+double clk_to_sec(clock_t start, clock_t stop);
+
+int
+main(int argc, char *argv[])
+{
+    int ix, num, prec = 8;
+    unsigned int seed;
+    clock_t start, stop;
+    double sec;
+
+    mp_int a, m, c;
+
+    if (PR_GetEnvSecure("SEED") != NULL)
+        seed = abs(atoi(PR_GetEnvSecure("SEED")));
+    else
+        seed = (unsigned int)time(NULL);
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <num-tests> [<nbits>]\n", argv[0]);
+        return 1;
+    }
+
+    if ((num = atoi(argv[1])) < 0)
+        num = -num;
+
+    if (!num) {
+        fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]);
+        return 1;
+    }
+
+    if (argc > 2) {
+        if ((prec = atoi(argv[2])) <= 0)
+            prec = 8;
+        else
+            prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;
+    }
+
+    printf("Modular exponentiation timing test\n"
+           "Precision:  %d digits (%d bits)\n"
+           "# of tests: %d\n\n",
+           prec, prec * DIGIT_BIT, num);
+
+    mp_init_size(&a, prec);
+    mp_init_size(&m, prec);
+    mp_init_size(&c, prec);
+
+    srand(seed);
+
+    start = clock();
+    for (ix = 0; ix < num; ix++) {
+
+        mpp_random_size(&a, prec);
+        mpp_random_size(&c, prec);
+        mpp_random_size(&m, prec);
+        /* set msb and lsb of m */
+        DIGIT(&m, 0) |= 1;
+        DIGIT(&m, USED(&m) - 1) |= (mp_digit)1 << (DIGIT_BIT - 1);
+        if (mp_cmp(&a, &m) > 0)
+            mp_sub(&a, &m, &a);
+
+        mp_exptmod(&a, &c, &m, &c);
+    }
+    stop = clock();
+
+    sec = clk_to_sec(start, stop);
+
+    printf("Total:      %.3f seconds\n", sec);
+    printf("Individual: %.3f seconds\n", sec / num);
+
+    mp_clear(&c);
+    mp_clear(&a);
+    mp_clear(&m);
+
+    return 0;
+}
+
+double
+clk_to_sec(clock_t start, clock_t stop)
+{
+    return (double)(stop - start) / CLOCKS_PER_SEC;
+}
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS                                                  */
diff --git a/security/nss/lib/freebl/mpi/utils/pi.c b/security/nss/lib/freebl/mpi/utils/pi.c
new file mode 100644
index 000000000..7e3109786
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/pi.c
@@ -0,0 +1,171 @@
+/*
+ * pi.c
+ *
+ * Compute pi to an arbitrary number of digits.  Uses Machin's formula,
+ * like everyone else on the planet:
+ *
+ *    pi = 16 * arctan(1/5) - 4 * arctan(1/239)
+ *
+ * This is pretty effective for up to a few thousand digits, but it
+ * gets pretty slow after that.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include "mpi.h"
+
+mp_err arctan(mp_digit mul, mp_digit x, mp_digit prec, mp_int *sum);
+
+int
+main(int argc, char *argv[])
+{
+    mp_err res;
+    mp_digit ndigits;
+    mp_int sum1, sum2;
+    clock_t start, stop;
+    int out = 0;
+
+    /* Make the user specify precision on the command line */
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <num-digits>\n", argv[0]);
+        return 1;
+    }
+
+    if ((ndigits = abs(atoi(argv[1]))) == 0) {
+        fprintf(stderr, "%s: you must request at least 1 digit\n", argv[0]);
+        return 1;
+    }
+
+    start = clock();
+    mp_init(&sum1);
+    mp_init(&sum2);
+
+    /* sum1 = 16 * arctan(1/5)  */
+    if ((res = arctan(16, 5, ndigits, &sum1)) != MP_OKAY) {
+        fprintf(stderr, "%s: arctan: %s\n", argv[0], mp_strerror(res));
+        out = 1;
+        goto CLEANUP;
+    }
+
+    /* sum2 = 4 * arctan(1/239) */
+    if ((res = arctan(4, 239, ndigits, &sum2)) != MP_OKAY) {
+        fprintf(stderr, "%s: arctan: %s\n", argv[0], mp_strerror(res));
+        out = 1;
+        goto CLEANUP;
+    }
+
+    /* pi = sum1 - sum2         */
+    if ((res = mp_sub(&sum1, &sum2, &sum1)) != MP_OKAY) {
+        fprintf(stderr, "%s: mp_sub: %s\n", argv[0], mp_strerror(res));
+        out = 1;
+        goto CLEANUP;
+    }
+    stop = clock();
+
+    /* Write the output in decimal */
+    {
+        char *buf = malloc(mp_radix_size(&sum1, 10));
+
+        if (buf == NULL) {
+            fprintf(stderr, "%s: out of memory\n", argv[0]);
+            out = 1;
+            goto CLEANUP;
+        }
+        mp_todecimal(&sum1, buf);
+        printf("%s\n", buf);
+        free(buf);
+    }
+
+    fprintf(stderr, "Computation took %.2f sec.\n",
+            (double)(stop - start) / CLOCKS_PER_SEC);
+
+CLEANUP:
+    mp_clear(&sum1);
+    mp_clear(&sum2);
+
+    return out;
+}
+
+/* Compute sum := mul * arctan(1/x), to 'prec' digits of precision */
+mp_err
+arctan(mp_digit mul, mp_digit x, mp_digit prec, mp_int *sum)
+{
+    mp_int t, v;
+    mp_digit q = 1, rd;
+    mp_err res;
+    int sign = 1;
+
+    prec += 3; /* push inaccuracies off the end */
+
+    mp_init(&t);
+    mp_set(&t, 10);
+    mp_init(&v);
+    if ((res = mp_expt_d(&t, prec, &t)) != MP_OKAY || /* get 10^prec    */
+        (res = mp_mul_d(&t, mul, &t)) != MP_OKAY ||   /* ... times mul  */
+        (res = mp_mul_d(&t, x, &t)) != MP_OKAY)       /* ... times x    */
+        goto CLEANUP;
+
+    /*
+    The extra multiplication by x in the above takes care of what
+    would otherwise have to be a special case for 1 / x^1 during the
+    first loop iteration.  A little sneaky, but effective.
+
+    We compute arctan(1/x) by the formula:
+
+         1     1       1       1
+     - - ----- + ----- - ----- + ...
+     x   3 x^3   5 x^5   7 x^7
+
+    We multiply through by 'mul' beforehand, which gives us a couple
+    more iterations and more precision
+   */
+
+    x *= x; /* works as long as x < sqrt(RADIX), which it is here */
+
+    mp_zero(sum);
+
+    do {
+        if ((res = mp_div_d(&t, x, &t, &rd)) != MP_OKAY)
+            goto CLEANUP;
+
+        if (sign < 0 && rd != 0)
+            mp_add_d(&t, 1, &t);
+
+        if ((res = mp_div_d(&t, q, &v, &rd)) != MP_OKAY)
+            goto CLEANUP;
+
+        if (sign < 0 && rd != 0)
+            mp_add_d(&v, 1, &v);
+
+        if (sign > 0)
+            res = mp_add(sum, &v, sum);
+        else
+            res = mp_sub(sum, &v, sum);
+
+        if (res != MP_OKAY)
+            goto CLEANUP;
+
+        sign *= -1;
+        q += 2;
+
+    } while (mp_cmp_z(&t) != 0);
+
+    /* Chop off inaccurate low-order digits */
+    mp_div_d(sum, 1000, sum, NULL);
+
+CLEANUP:
+    mp_clear(&v);
+    mp_clear(&t);
+
+    return res;
+}
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS                                                  */
diff --git a/security/nss/lib/freebl/mpi/utils/primegen.c b/security/nss/lib/freebl/mpi/utils/primegen.c
new file mode 100644
index 000000000..f62a56a4e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/primegen.c
@@ -0,0 +1,159 @@
+/*
+ *  primegen.c
+ *
+ * Generates random integers which are prime with a high degree of
+ * probability using the Miller-Rabin probabilistic primality testing
+ * algorithm.
+ *
+ * Usage:
+ *    primegen <bits> [<num>]
+ *
+ *    <bits>   - number of significant bits each prime should have
+ *    <num>    - number of primes to generate
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpprime.h"
+
+#define NUM_TESTS 5 /* Number of Rabin-Miller iterations to test with */
+
+#ifdef DEBUG
+#define FPUTC(x, y) fputc(x, y)
+#else
+#define FPUTC(x, y)
+#endif
+
+int
+main(int argc, char *argv[])
+{
+    unsigned char *raw;
+    char *out;
+    unsigned long nTries;
+    int rawlen, bits, outlen, ngen, ix, jx;
+    int g_strong = 0;
+    mp_int testval;
+    mp_err res;
+    clock_t start, end;
+
+    /* We'll just use the C library's rand() for now, although this
+     won't be good enough for cryptographic purposes */
+    if ((out = PR_GetEnvSecure("SEED")) == NULL) {
+        srand((unsigned int)time(NULL));
+    } else {
+        srand((unsigned int)atoi(out));
+    }
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <bits> [<count> [strong]]\n", argv[0]);
+        return 1;
+    }
+
+    if ((bits = abs(atoi(argv[1]))) < CHAR_BIT) {
+        fprintf(stderr, "%s: please request at least %d bits.\n",
+                argv[0], CHAR_BIT);
+        return 1;
+    }
+
+    /* If optional third argument is given, use that as the number of
+     primes to generate; otherwise generate one prime only.
+   */
+    if (argc < 3) {
+        ngen = 1;
+    } else {
+        ngen = abs(atoi(argv[2]));
+    }
+
+    /* If fourth argument is given, and is the word "strong", we'll 
+     generate strong (Sophie Germain) primes. 
+   */
+    if (argc > 3 && strcmp(argv[3], "strong") == 0)
+        g_strong = 1;
+
+    /* testval - candidate being tested; nTries - number tried so far */
+    if ((res = mp_init(&testval)) != MP_OKAY) {
+        fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res));
+        return 1;
+    }
+
+    if (g_strong) {
+        printf("Requested %d strong prime value(s) of %d bits.\n",
+               ngen, bits);
+    } else {
+        printf("Requested %d prime value(s) of %d bits.\n", ngen, bits);
+    }
+
+    rawlen = (bits / CHAR_BIT) + ((bits % CHAR_BIT) ? 1 : 0) + 1;
+
+    if ((raw = calloc(rawlen, sizeof(unsigned char))) == NULL) {
+        fprintf(stderr, "%s: out of memory, sorry.\n", argv[0]);
+        return 1;
+    }
+
+    /* This loop is one for each prime we need to generate */
+    for (jx = 0; jx < ngen; jx++) {
+
+        raw[0] = 0; /* sign is positive */
+
+        /*	Pack the initializer with random bytes	*/
+        for (ix = 1; ix < rawlen; ix++)
+            raw[ix] = (rand() * rand()) & UCHAR_MAX;
+
+        raw[1] |= 0x80;       /* set high-order bit of test value     */
+        raw[rawlen - 1] |= 1; /* set low-order bit of test value      */
+
+        /* Make an mp_int out of the initializer */
+        mp_read_raw(&testval, (char *)raw, rawlen);
+
+        /* Initialize candidate counter */
+        nTries = 0;
+
+        start = clock(); /* time generation for this prime */
+        do {
+            res = mpp_make_prime(&testval, bits, g_strong, &nTries);
+            if (res != MP_NO)
+                break;
+            /* This code works whether digits are 16 or 32 bits */
+            res = mp_add_d(&testval, 32 * 1024, &testval);
+            res = mp_add_d(&testval, 32 * 1024, &testval);
+            FPUTC(',', stderr);
+        } while (1);
+        end = clock();
+
+        if (res != MP_YES) {
+            break;
+        }
+        FPUTC('\n', stderr);
+        puts("The following value is probably prime:");
+        outlen = mp_radix_size(&testval, 10);
+        out = calloc(outlen, sizeof(unsigned char));
+        mp_toradix(&testval, (char *)out, 10);
+        printf("10: %s\n", out);
+        mp_toradix(&testval, (char *)out, 16);
+        printf("16: %s\n\n", out);
+        free(out);
+
+        printf("Number of candidates tried: %lu\n", nTries);
+        printf("This computation took %ld clock ticks (%.2f seconds)\n",
+               (end - start), ((double)(end - start) / CLOCKS_PER_SEC));
+
+        FPUTC('\n', stderr);
+    } /* end of loop to generate all requested primes */
+
+    if (res != MP_OKAY)
+        fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res));
+
+    free(raw);
+    mp_clear(&testval);
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/prng.c b/security/nss/lib/freebl/mpi/utils/prng.c
new file mode 100644
index 000000000..38748d18e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/prng.c
@@ -0,0 +1,57 @@
+/*
+ *  prng.c
+ *
+ *  Command-line pseudo-random number generator
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <time.h>
+
+#ifdef __OS2__
+#include <types.h>
+#include <process.h>
+#else
+#include <unistd.h>
+#endif
+
+#include "bbs_rand.h"
+
+int
+main(int argc, char *argv[])
+{
+    unsigned char *seed;
+    unsigned int ix, num = 1;
+    pid_t pid;
+
+    if (argc > 1) {
+        num = atoi(argv[1]);
+        if (num <= 0)
+            num = 1;
+    }
+
+    pid = getpid();
+    srand(time(NULL) * (unsigned int)pid);
+
+    /* Not a perfect seed, but not bad */
+    seed = malloc(bbs_seed_size);
+    for (ix = 0; ix < bbs_seed_size; ix++) {
+        seed[ix] = rand() % UCHAR_MAX;
+    }
+
+    bbs_srand(seed, bbs_seed_size);
+    memset(seed, 0, bbs_seed_size);
+    free(seed);
+
+    while (num-- > 0) {
+        ix = bbs_rand();
+
+        printf("%u\n", ix);
+    }
+
+    return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/ptab.pl b/security/nss/lib/freebl/mpi/utils/ptab.pl
new file mode 100755
index 000000000..ef2e565be
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/ptab.pl
@@ -0,0 +1,26 @@
+#!/usr/bin/perl
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+while(<>) {
+    chomp;
+    push(@primes, $_);
+}
+
+printf("mp_size   prime_tab_size = %d;\n", ($#primes + 1));
+print "mp_digit  prime_tab[] = {\n";
+
+print "\t";
+$last = pop(@primes);
+foreach $prime (sort {$a<=>$b} @primes) {
+    printf("0x%04X, ", $prime);
+    $brk = ($brk + 1) % 8;
+    print "\n\t" if(!$brk);
+}
+printf("0x%04X", $last);
+print "\n" if($brk);
+print "};\n\n";
+
+exit 0;
diff --git a/security/nss/lib/freebl/mpi/utils/sieve.c b/security/nss/lib/freebl/mpi/utils/sieve.c
new file mode 100644
index 000000000..57768af9e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/sieve.c
@@ -0,0 +1,243 @@
+/*
+ * sieve.c
+ *
+ * Finds prime numbers using the Sieve of Eratosthenes
+ *
+ * This implementation uses a bitmap to represent all odd integers in a
+ * given range.  We iterate over this bitmap, crossing off the
+ * multiples of each prime we find.  At the end, all the remaining set
+ * bits correspond to prime integers.
+ *
+ * Here, we make two passes -- once we have generated a sieve-ful of
+ * primes, we copy them out, reset the sieve using the highest
+ * generated prime from the first pass as a base.  Then we cross out
+ * all the multiples of all the primes we found the first time through,
+ * and re-sieve.  In this way, we get double use of the memory we
+ * allocated for the sieve the first time though.  Since we also
+ * implicitly ignore multiples of 2, this amounts to 4 times the
+ * values.
+ *
+ * This could (and probably will) be generalized to re-use the sieve a
+ * few more times.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+
+typedef unsigned char byte;
+
+typedef struct {
+    int size;
+    byte *bits;
+    long base;
+    int next;
+    int nbits;
+} sieve;
+
+void sieve_init(sieve *sp, long base, int nbits);
+void sieve_grow(sieve *sp, int nbits);
+long sieve_next(sieve *sp);
+void sieve_reset(sieve *sp, long base);
+void sieve_cross(sieve *sp, long val);
+void sieve_clear(sieve *sp);
+
+#define S_ISSET(S, B) (((S)->bits[(B) / CHAR_BIT] >> ((B) % CHAR_BIT)) & 1)
+#define S_SET(S, B) ((S)->bits[(B) / CHAR_BIT] |= (1 << ((B) % CHAR_BIT)))
+#define S_CLR(S, B) ((S)->bits[(B) / CHAR_BIT] &= ~(1 << ((B) % CHAR_BIT)))
+#define S_VAL(S, B) ((S)->base + (2 * (B)))
+#define S_BIT(S, V) (((V) - ((S)->base)) / 2)
+
+int
+main(int argc, char *argv[])
+{
+    sieve s;
+    long pr, *p;
+    int c, ix, cur = 0;
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <width>\n", argv[0]);
+        return 1;
+    }
+
+    c = atoi(argv[1]);
+    if (c < 0)
+        c = -c;
+
+    fprintf(stderr, "%s: sieving to %d positions\n", argv[0], c);
+
+    sieve_init(&s, 3, c);
+
+    c = 0;
+    while ((pr = sieve_next(&s)) > 0) {
+        ++c;
+    }
+
+    p = calloc(c, sizeof(long));
+    if (!p) {
+        fprintf(stderr, "%s: out of memory after first half\n", argv[0]);
+        sieve_clear(&s);
+        exit(1);
+    }
+
+    fprintf(stderr, "%s: half done ... \n", argv[0]);
+
+    for (ix = 0; ix < s.nbits; ix++) {
+        if (S_ISSET(&s, ix)) {
+            p[cur] = S_VAL(&s, ix);
+            printf("%ld\n", p[cur]);
+            ++cur;
+        }
+    }
+
+    sieve_reset(&s, p[cur - 1]);
+    fprintf(stderr, "%s: crossing off %d found primes ... \n", argv[0], cur);
+    for (ix = 0; ix < cur; ix++) {
+        sieve_cross(&s, p[ix]);
+        if (!(ix % 1000))
+            fputc('.', stderr);
+    }
+    fputc('\n', stderr);
+
+    free(p);
+
+    fprintf(stderr, "%s: sieving again from %ld ... \n", argv[0], p[cur - 1]);
+    c = 0;
+    while ((pr = sieve_next(&s)) > 0) {
+        ++c;
+    }
+
+    fprintf(stderr, "%s: done!\n", argv[0]);
+    for (ix = 0; ix < s.nbits; ix++) {
+        if (S_ISSET(&s, ix)) {
+            printf("%ld\n", S_VAL(&s, ix));
+        }
+    }
+
+    sieve_clear(&s);
+
+    return 0;
+}
+
+void
+sieve_init(sieve *sp, long base, int nbits)
+{
+    sp->size = (nbits / CHAR_BIT);
+
+    if (nbits % CHAR_BIT)
+        ++sp->size;
+
+    sp->bits = calloc(sp->size, sizeof(byte));
+    memset(sp->bits, UCHAR_MAX, sp->size);
+    if (!(base & 1))
+        ++base;
+    sp->base = base;
+
+    sp->next = 0;
+    sp->nbits = sp->size * CHAR_BIT;
+}
+
+void
+sieve_grow(sieve *sp, int nbits)
+{
+    int ns = (nbits / CHAR_BIT);
+
+    if (nbits % CHAR_BIT)
+        ++ns;
+
+    if (ns > sp->size) {
+        byte *tmp;
+        int ix;
+
+        tmp = calloc(ns, sizeof(byte));
+        if (tmp == NULL) {
+            fprintf(stderr, "Error: out of memory in sieve_grow\n");
+            return;
+        }
+
+        memcpy(tmp, sp->bits, sp->size);
+        for (ix = sp->size; ix < ns; ix++) {
+            tmp[ix] = UCHAR_MAX;
+        }
+
+        free(sp->bits);
+        sp->bits = tmp;
+        sp->size = ns;
+
+        sp->nbits = sp->size * CHAR_BIT;
+    }
+}
+
+long
+sieve_next(sieve *sp)
+{
+    long out;
+    int ix = 0;
+    long val;
+
+    if (sp->next > sp->nbits)
+        return -1;
+
+    out = S_VAL(sp, sp->next);
+#ifdef DEBUG
+    fprintf(stderr, "Sieving %ld\n", out);
+#endif
+
+    /* Sieve out all multiples of the current prime */
+    val = out;
+    while (ix < sp->nbits) {
+        val += out;
+        ix = S_BIT(sp, val);
+        if ((val & 1) && ix < sp->nbits) { /* && S_ISSET(sp, ix)) { */
+            S_CLR(sp, ix);
+#ifdef DEBUG
+            fprintf(stderr, "Crossing out %ld (bit %d)\n", val, ix);
+#endif
+        }
+    }
+
+    /* Scan ahead to the next prime */
+    ++sp->next;
+    while (sp->next < sp->nbits && !S_ISSET(sp, sp->next))
+        ++sp->next;
+
+    return out;
+}
+
+void
+sieve_cross(sieve *sp, long val)
+{
+    int ix = 0;
+    long cur = val;
+
+    while (cur < sp->base)
+        cur += val;
+
+    ix = S_BIT(sp, cur);
+    while (ix < sp->nbits) {
+        if (cur & 1)
+            S_CLR(sp, ix);
+        cur += val;
+        ix = S_BIT(sp, cur);
+    }
+}
+
+void
+sieve_reset(sieve *sp, long base)
+{
+    memset(sp->bits, UCHAR_MAX, sp->size);
+    sp->base = base;
+    sp->next = 0;
+}
+
+void
+sieve_clear(sieve *sp)
+{
+    if (sp->bits)
+        free(sp->bits);
+
+    sp->bits = NULL;
+}
diff --git a/security/nss/lib/freebl/mpi/vis_32.il b/security/nss/lib/freebl/mpi/vis_32.il
new file mode 100644
index 000000000..d2e8024ac
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_32.il
@@ -0,0 +1,1291 @@
+! 
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+! The interface to the VIS instructions as declared below (and in the VIS
+! User's Manual) will not change, but the macro implementation might change
+! in the future.
+
+!--------------------------------------------------------------------
+! Pure edge handling instructions
+!
+! int vis_edge8(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge8,8
+	edge8	%o0,%o1,%o0
+	.end
+!
+! int vis_edge8l(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge8l,8
+	edge8l	%o0,%o1,%o0
+	.end
+!
+! int vis_edge16(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge16,8
+	edge16	%o0,%o1,%o0
+	.end
+!
+! int vis_edge16l(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge16l,8
+	edge16l	%o0,%o1,%o0
+	.end
+!
+! int vis_edge32(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge32,8
+	edge32	%o0,%o1,%o0
+	.end
+!
+! int vis_edge32l(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge32l,8
+	edge32l	%o0,%o1,%o0
+	.end
+
+!--------------------------------------------------------------------
+! Edge handling instructions with negative return values if cc set
+!
+! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge8cc,8
+	edge8	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %icc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge8lcc,8
+	edge8l	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %icc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge16cc,8
+	edge16	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %icc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge16lcc,8
+	edge16l	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %icc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge32cc,8
+	edge32	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %icc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge32lcc,8
+	edge32l	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %icc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+
+!--------------------------------------------------------------------
+! Alignment instructions
+!
+! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
+!
+	.inline vis_alignaddr,8
+	alignaddr	%o0,%o1,%o0
+	.end
+!
+! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
+!
+	.inline vis_alignaddrl,8
+	alignaddrl	%o0,%o1,%o0
+	.end
+!
+! double vis_faligndata(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_faligndata,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	faligndata	%f4,%f10,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Partitioned comparison instructions
+!
+! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmple16,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fcmple16	%f4,%f10,%o0
+	.end
+!
+! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpne16,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fcmpne16	%f4,%f10,%o0
+	.end
+!
+! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmple32,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fcmple32	%f4,%f10,%o0
+	.end
+!
+! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpne32,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fcmpne32	%f4,%f10,%o0
+	.end
+!
+! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpgt16,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fcmpgt16	%f4,%f10,%o0
+	.end
+!
+! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpeq16,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fcmpeq16	%f4,%f10,%o0
+	.end
+!
+! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpgt32,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fcmpgt32	%f4,%f10,%o0
+	.end
+!
+! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpeq32,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fcmpeq32	%f4,%f10,%o0
+	.end
+
+!--------------------------------------------------------------------
+! Partitioned arithmetic
+!
+! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fmul8x16,12
+	st	%o0,[%sp+0x44]
+	ld	[%sp+0x44],%f4
+	st	%o1,[%sp+0x48]
+	st	%o2,[%sp+0x4c]
+	ldd	[%sp+0x48],%f10
+	fmul8x16	%f4,%f10,%f0
+	.end
+!
+! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+!
+	.inline vis_fmul8x16_dummy,16
+	st	%o0,[%sp+0x44]
+	ld	[%sp+0x44],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fmul8x16	%f4,%f10,%f0
+	.end
+!
+! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fmul8x16au,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fmul8x16au	%f4,%f10,%f0
+	.end
+!
+! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fmul8x16al,8
+	st	%o0,[%sp+0x44]
+	ld	[%sp+0x44],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fmul8x16al	%f4,%f10,%f0
+	.end
+!
+! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fmul8sux16,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fmul8sux16	%f4,%f10,%f0
+	.end
+!
+! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fmul8ulx16,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fmul8ulx16	%f4,%f10,%f0
+	.end
+!
+! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fmuld8sux16,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fmuld8sux16	%f4,%f10,%f0
+	.end
+!
+! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fmuld8ulx16,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fmuld8ulx16	%f4,%f10,%f0
+	.end
+!
+! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpadd16,16
+	std	%o0,[%sp+0x40]
+	ldd	[%sp+0x40],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fpadd16	%f4,%f10,%f0
+	.end
+!
+! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpadd16s,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fpadd16s	%f4,%f10,%f0
+	.end
+!
+! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpadd32,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fpadd32	%f4,%f10,%f0
+	.end
+!
+! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpadd32s,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fpadd32s	%f4,%f10,%f0
+	.end
+!
+! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpsub16,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fpsub16	%f4,%f10,%f0
+	.end
+!
+! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpsub16s,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fpsub16s	%f4,%f10,%f0
+	.end
+!
+! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpsub32,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fpsub32	%f4,%f10,%f0
+	.end
+!
+! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpsub32s,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fpsub32s	%f4,%f10,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Pixel packing
+!
+! float vis_fpack16(double /*frs2*/);
+!
+	.inline vis_fpack16,8
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	fpack16	%f4,%f0
+	.end
+
+!
+! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+!
+	.inline vis_fpack16_pair,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fpack16	%f4,%f0
+	fpack16	%f10,%f1
+	.end
+!
+! void vis_st2_fpack16(double, double, double *)
+!
+	.inline vis_st2_fpack16,20
+ 	std	%o0,[%sp+0x48]
+ 	ldd	[%sp+0x48],%f4
+ 	std	%o2,[%sp+0x48]
+ 	ldd	[%sp+0x48],%f10
+ 	fpack16	%f4,%f0
+ 	fpack16	%f10,%f1
+ 	st	%f0,[%o4+0]
+ 	st	%f1,[%o4+4]
+ 	.end
+!
+! void vis_std_fpack16(double, double, double *)
+!
+	.inline vis_std_fpack16,20
+	std     %o0,[%sp+0x48]
+	ldd     [%sp+0x48],%f4
+	std     %o2,[%sp+0x48]
+	ldd     [%sp+0x48],%f10
+	fpack16 %f4,%f0
+	fpack16 %f10,%f1
+	std     %f0,[%o4]
+	.end
+!
+! void vis_st2_fpackfix(double, double, double *)
+!
+	.inline vis_st2_fpackfix,20
+ 	std	%o0,[%sp+0x48]
+ 	ldd	[%sp+0x48],%f4
+ 	std	%o2,[%sp+0x48]
+ 	ldd	[%sp+0x48],%f10
+ 	fpackfix %f4,%f0
+ 	fpackfix %f10,%f1
+ 	st	%f0,[%o4+0]
+ 	st	%f1,[%o4+4]
+ 	.end
+!
+! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpack16_to_hi,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f0
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	fpack16	%f4,%f0
+	.end
+
+! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpack16_to_lo,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f0
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	fpack16	%f4,%f3
+	fmovs	%f3,%f1		/* without this, optimizer goes wrong */
+	.end
+
+!
+! double vis_fpack32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpack32,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fpack32	%f4,%f10,%f0
+	.end
+!
+! float vis_fpackfix(double /*frs2*/);
+!
+	.inline vis_fpackfix,8
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	fpackfix	%f4,%f0
+	.end
+!
+! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+!
+	.inline vis_fpackfix_pair,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f6
+	fpackfix	%f4,%f0
+	fpackfix	%f6,%f1
+	.end
+
+!--------------------------------------------------------------------
+! Motion estimation
+!
+! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
+!
+	.inline vis_pdist,24
+	std	%o4,[%sp+0x48]
+	ldd	[%sp+0x48],%f0
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	pdist	%f4,%f10,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Channel merging
+!
+! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpmerge,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fpmerge	%f4,%f10,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Pixel expansion
+!
+! double vis_fexpand(float /*frs2*/);
+!
+	.inline vis_fexpand,4
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	fexpand	%f4,%f0
+	.end
+
+! double vis_fexpand_hi(double /*frs2*/);
+!
+	.inline vis_fexpand_hi,8
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	fexpand	%f4,%f0
+	.end
+
+! double vis_fexpand_lo(double /*frs2*/);
+!
+	.inline vis_fexpand_lo,8
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	fmovs	%f5, %f2
+	fexpand	%f2,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Bitwise logical operations
+!
+! double vis_fnor(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fnor,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fnor	%f4,%f10,%f0
+	.end
+!
+! float vis_fnors(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fnors,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fnors	%f4,%f10,%f0
+	.end
+!
+! double vis_fandnot(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fandnot,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fandnot1	%f4,%f10,%f0
+	.end
+!
+! float vis_fandnots(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fandnots,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fandnot1s	%f4,%f10,%f0
+	.end
+!
+! double vis_fnot(double /*frs1*/);
+!
+	.inline vis_fnot,8
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	fnot1	%f4,%f0
+	.end
+!
+! float vis_fnots(float /*frs1*/);
+!
+	.inline vis_fnots,4
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	fnot1s	%f4,%f0
+	.end
+!
+! double vis_fxor(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fxor,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fxor	%f4,%f10,%f0
+	.end
+!
+! float vis_fxors(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fxors,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fxors	%f4,%f10,%f0
+	.end
+!
+! double vis_fnand(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fnand,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fnand	%f4,%f10,%f0
+	.end
+!
+! float vis_fnands(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fnands,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fnands	%f4,%f10,%f0
+	.end
+!
+! double vis_fand(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fand,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fand	%f4,%f10,%f0
+	.end
+!
+! float vis_fands(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fands,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fands	%f4,%f10,%f0
+	.end
+!
+! double vis_fxnor(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fxnor,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fxnor	%f4,%f10,%f0
+	.end
+!
+! float vis_fxnors(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fxnors,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fxnors	%f4,%f10,%f0
+	.end
+!
+! double vis_fsrc(double /*frs1*/);
+!
+	.inline vis_fsrc,8
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	fsrc1	%f4,%f0
+	.end
+!
+! float vis_fsrcs(float /*frs1*/);
+!
+	.inline vis_fsrcs,4
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	fsrc1s	%f4,%f0
+	.end
+!
+! double vis_fornot(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fornot,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	fornot1	%f4,%f10,%f0
+	.end
+!
+! float vis_fornots(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fornots,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fornot1s	%f4,%f10,%f0
+	.end
+!
+! double vis_for(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_for,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	std	%o2,[%sp+0x48]
+	ldd	[%sp+0x48],%f10
+	for	%f4,%f10,%f0
+	.end
+!
+! float vis_fors(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fors,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	st	%o1,[%sp+0x48]
+	ld	[%sp+0x48],%f10
+	fors	%f4,%f10,%f0
+	.end
+!
+! double vis_fzero(/* void */)
+!
+	.inline	vis_fzero,0
+	fzero	%f0
+	.end
+!
+! float vis_fzeros(/* void */)
+!
+	.inline	vis_fzeros,0
+	fzeros	%f0
+	.end
+!
+! double vis_fone(/* void */)
+!
+	.inline	vis_fone,0
+	fone	%f0
+	.end
+!
+! float vis_fones(/* void */)
+!
+	.inline	vis_fones,0
+	fones	%f0
+	.end
+
+!--------------------------------------------------------------------
+! Partial store instructions
+!
+! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST8P,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]%o3,0xc0	! ASI_PST8_P
+	.end
+!
+! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST8PL,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]%o3,0xc8	! ASI_PST8_PL
+	.end
+!
+! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
+!
+	.inline vis_stdfa_ASI_PST8P_int_pair,16
+        ld	[%o0],%f4
+        ld	[%o1],%f5
+	stda	%f4,[%o2]%o3,0xc0	! ASI_PST8_P
+	.end
+!
+! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST8S,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]%o3,0xc1	! ASI_PST8_S
+	.end
+!
+! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST16P,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]%o3,0xc2	! ASI_PST16_P
+	.end
+!
+! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST16S,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]%o3,0xc3	! ASI_PST16_S
+	.end
+!
+! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST32P,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]%o3,0xc4	! ASI_PST32_P
+	.end
+!
+! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST32S,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]%o3,0xc5	! ASI_PST32_S
+	.end
+
+!--------------------------------------------------------------------
+! Short store instructions
+!
+! vis_stdfa_ASI_FL8P(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL8P,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0xd0	! ASI_FL8_P
+	.end
+!
+! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
+!
+	.inline vis_stdfa_ASI_FL8P_index,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2+%o3]0xd0 ! ASI_FL8_P
+	.end
+!
+! vis_stdfa_ASI_FL8S(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL8S,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0xd1	! ASI_FL8_S
+	.end
+!
+! vis_stdfa_ASI_FL16P(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL16P,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0xd2	! ASI_FL16_P
+	.end
+!
+! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
+!
+	.inline vis_stdfa_ASI_FL16P_index,16
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2+%o3]0xd2 ! ASI_FL16_P
+	.end
+!
+! vis_stdfa_ASI_FL16S(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL16S,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0xd3	! ASI_FL16_S
+	.end
+!
+! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL8PL,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0xd8	! ASI_FL8_PL
+	.end
+!
+! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL8SL,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0xd9	! ASI_FL8_SL
+	.end
+!
+! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL16PL,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0xda	! ASI_FL16_PL
+	.end
+!
+! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL16SL,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0xdb	! ASI_FL16_SL
+	.end
+
+!--------------------------------------------------------------------
+! Short load instructions
+!
+! double vis_lddfa_ASI_FL8P(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL8P,4
+	ldda	[%o0]0xd0,%f4	! ASI_FL8_P
+	fmovd	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
+!
+	.inline vis_lddfa_ASI_FL8P_index,8
+	ldda	[%o0+%o1]0xd0,%f4
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
+!
+	.inline vis_lddfa_ASI_FL8P_hi,8
+	sra     %o1,16,%o1
+	ldda	[%o0+%o1]0xd0,%f4
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
+!
+	.inline vis_lddfa_ASI_FL8P_lo,8
+	sll     %o1,16,%o1
+	sra     %o1,16,%o1
+	ldda	[%o0+%o1]0xd0,%f4
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8S(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL8S,4
+	ldda	[%o0]0xd1,%f4	! ASI_FL8_S
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16P(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL16P,4
+	ldda	[%o0]0xd2,%f4	! ASI_FL16_P
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
+!
+	.inline vis_lddfa_ASI_FL16P_index,8
+	ldda	[%o0+%o1]0xd2,%f4 ! ASI_FL16_P
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16S(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL16S,4
+	ldda	[%o0]0xd3,%f4	! ASI_FL16_S
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8PL(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL8PL,4
+	ldda	[%o0]0xd8,%f4	! ASI_FL8_PL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
+!
+	.inline vis_lddfa_ASI_FL8PL_index,8
+	ldda	[%o0+%o1]0xd8,%f4	! ASI_FL8_PL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8SL(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL8SL,4
+	ldda	[%o0]0xd9,%f4	! ASI_FL8_SL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16PL(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL16PL,4
+	ldda	[%o0]0xda,%f4	! ASI_FL16_PL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
+!
+	.inline vis_lddfa_ASI_FL16PL_index,8
+	ldda	[%o0+%o1]0xda,%f4	! ASI_FL16_PL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16SL(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL16SL,4
+	ldda	[%o0]0xdb,%f4	! ASI_FL16_SL
+	fmovd	%f4,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Graphics status register
+!
+! unsigned int vis_read_gsr(void)
+!
+	.inline vis_read_gsr,0
+	rd	%gsr,%o0
+	.end
+!
+! void vis_write_gsr(unsigned int /* GSR */)
+!
+	.inline vis_write_gsr,4
+	wr	%g0,%o0,%gsr
+	.end
+
+!--------------------------------------------------------------------
+! Voxel texture mapping
+!
+! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
+!
+	.inline	vis_array8,12
+	sllx	%o0,32,%o0
+	srl	%o1,0,%o1	! clear the most significant 32 bits of %o1
+	or	%o0,%o1,%o3	! join %o0 and %o1 into %o3
+	array8	%o3,%o2,%o0
+	.end
+!
+! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
+!
+	.inline	vis_array16,12
+	sllx	%o0,32,%o0
+	srl	%o1,0,%o1	! clear the most significant 32 bits of %o1
+	or	%o0,%o1,%o3	! join %o0 and %o1 into %o3
+	array16	%o3,%o2,%o0
+	.end
+!
+! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
+!
+	.inline	vis_array32,12
+	sllx	%o0,32,%o0
+	srl	%o1,0,%o1	! clear the most significant 32 bits of %o1
+	or	%o0,%o1,%o3	! join %o0 and %o1 into %o3
+	array32	%o3,%o2,%o0
+	.end
+
+!--------------------------------------------------------------------
+! Register aliasing and type casts
+!
+! float vis_read_hi(double /* frs1 */);
+!
+	.inline vis_read_hi,8
+	std	%o0,[%sp+0x48]	! store double frs1
+	ldd	[%sp+0x48],%f0	! %f0:%f1 = double frs1; return %f0;
+	.end
+!
+! float vis_read_lo(double /* frs1 */);
+!
+	.inline vis_read_lo,8
+	std	%o0,[%sp+0x48]	! store double frs1
+	ldd	[%sp+0x48],%f0	! %f0:%f1 = double frs1;
+	fmovs	%f1,%f0		! %f0 = low word (frs1); return %f0;
+	.end
+!
+! double vis_write_hi(double /* frs1 */, float /* frs2 */);
+!
+	.inline vis_write_hi,12
+	std	%o0,[%sp+0x48]	! store double frs1;
+	ldd	[%sp+0x48],%f0	! %f0:%f1 = double frs1;
+	st	%o2,[%sp+0x44]	! store float frs2;
+	ld	[%sp+0x44],%f2	! %f2 = float frs2;
+	fmovs	%f2,%f0		! %f0 = float frs2; return %f0:f1;
+	.end
+!
+! double vis_write_lo(double /* frs1 */, float /* frs2 */);
+!
+	.inline vis_write_lo,12
+	std	%o0,[%sp+0x48]	! store double frs1;
+	ldd	[%sp+0x48],%f0	! %f0:%f1 = double frs1;
+	st	%o2,[%sp+0x44]	! store float frs2;
+	ld	[%sp+0x44],%f2	! %f2 = float frs2;
+	fmovs	%f2,%f1		! %f1 = float frs2; return %f0:f1;
+	.end
+!
+! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
+!
+	.inline vis_freg_pair,8
+	st	%o0,[%sp+0x48]	! store float frs1
+	ld	[%sp+0x48],%f0
+	st	%o1,[%sp+0x48]	! store float frs2
+	ld	[%sp+0x48],%f1
+	.end
+!
+! float vis_to_float(unsigned int /*value*/);
+!
+	.inline vis_to_float,4
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f0
+	.end
+!
+! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+!
+	.inline vis_to_double,8
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f0
+	.end
+!
+! double vis_to_double_dup(unsigned int /*value*/);
+!
+	.inline vis_to_double_dup,4
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f1
+	fmovs	%f1,%f0		! duplicate value
+	.end
+!
+! double vis_ll_to_double(unsigned long long /*value*/);
+!
+	.inline vis_ll_to_double,8
+	std     %o0,[%sp+0x48]
+	ldd     [%sp+0x48],%f0
+	.end
+
+!--------------------------------------------------------------------
+! Address space identifier (ASI) register
+!
+! unsigned int vis_read_asi(void)
+!
+	.inline vis_read_asi,0
+	rd	%asi,%o0
+	.end
+!
+! void vis_write_asi(unsigned int /* ASI */)
+!
+	.inline vis_write_asi,4
+	wr	%g0,%o0,%asi
+	.end
+
+!--------------------------------------------------------------------
+! Load/store from/into alternate space
+!
+! float vis_ldfa_ASI_REG(void *rs1)
+!
+	.inline vis_ldfa_ASI_REG,4
+	lda	[%o0+0]%asi,%f4
+	fmovs	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! float vis_ldfa_ASI_P(void *rs1)
+!
+	.inline vis_ldfa_ASI_P,4
+	lda	[%o0]0x80,%f4	! ASI_P
+	fmovs	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! float vis_ldfa_ASI_PL(void *rs1)
+!
+	.inline vis_ldfa_ASI_PL,4
+	lda	[%o0]0x88,%f4	! ASI_PL
+	fmovs	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! double vis_lddfa_ASI_REG(void *rs1)
+!
+	.inline vis_lddfa_ASI_REG,4
+	ldda	[%o0+0]%asi,%f4
+	fmovd	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! double vis_lddfa_ASI_P(void *rs1)
+!
+	.inline vis_lddfa_ASI_P,4
+	ldda	[%o0]0x80,%f4	! ASI_P
+	fmovd	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! double vis_lddfa_ASI_PL(void *rs1)
+!
+	.inline vis_lddfa_ASI_PL,4
+	ldda	[%o0]0x88,%f4	! ASI_PL
+	fmovd	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! vis_stfa_ASI_REG(float frs, void *rs1)
+!
+	.inline vis_stfa_ASI_REG,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	sta	%f4,[%o1+0]%asi
+	.end
+!
+! vis_stfa_ASI_P(float frs, void *rs1)
+!
+	.inline vis_stfa_ASI_P,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	sta	%f4,[%o1]0x80	! ASI_P
+	.end
+!
+! vis_stfa_ASI_PL(float frs, void *rs1)
+!
+	.inline vis_stfa_ASI_PL,8
+	st	%o0,[%sp+0x48]
+	ld	[%sp+0x48],%f4
+	sta	%f4,[%o1]0x88	! ASI_PL
+	.end
+!
+! vis_stdfa_ASI_REG(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_REG,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2+0]%asi
+	.end
+!
+! vis_stdfa_ASI_P(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_P,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0x80	! ASI_P
+	.end
+!
+! vis_stdfa_ASI_PL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_PL,12
+	std	%o0,[%sp+0x48]
+	ldd	[%sp+0x48],%f4
+	stda	%f4,[%o2]0x88	! ASI_PL
+	.end
+!
+! unsigned short vis_lduha_ASI_REG(void *rs1)
+!
+	.inline vis_lduha_ASI_REG,4
+	lduha	[%o0+0]%asi,%o0
+	.end
+!
+! unsigned short vis_lduha_ASI_P(void *rs1)
+!
+	.inline vis_lduha_ASI_P,4
+	lduha	[%o0]0x80,%o0	! ASI_P
+	.end
+!
+! unsigned short vis_lduha_ASI_PL(void *rs1)
+!
+	.inline vis_lduha_ASI_PL,4
+	lduha	[%o0]0x88,%o0	! ASI_PL
+	.end
+!
+! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
+!
+	.inline vis_lduha_ASI_P_index,8
+	lduha	[%o0+%o1]0x80,%o0	! ASI_P
+	.end
+!
+! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
+!
+	.inline vis_lduha_ASI_PL_index,8
+	lduha	[%o0+%o1]0x88,%o0	! ASI_PL
+	.end
+
+!--------------------------------------------------------------------
+! Prefetch
+!
+! void vis_prefetch_read(void * /*address*/);
+!
+	.inline vis_prefetch_read,4
+	prefetch	[%o0+0],0
+	.end
+!
+! void vis_prefetch_write(void * /*address*/);
+!
+	.inline vis_prefetch_write,4
+	prefetch	[%o0+0],2
+	.end
diff --git a/security/nss/lib/freebl/mpi/vis_64.il b/security/nss/lib/freebl/mpi/vis_64.il
new file mode 100644
index 000000000..cbe2b5aa2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_64.il
@@ -0,0 +1,997 @@
+! 
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+! This file is to be used in place of vis.il in 64-bit builds.
+
+!--------------------------------------------------------------------
+! Pure edge handling instructions
+!
+! int vis_edge8(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge8,16
+	edge8	%o0,%o1,%o0
+	.end
+!
+! int vis_edge8l(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge8l,16
+	edge8l	%o0,%o1,%o0
+	.end
+!
+! int vis_edge16(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge16,16
+	edge16	%o0,%o1,%o0
+	.end
+!
+! int vis_edge16l(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge16l,16
+	edge16l	%o0,%o1,%o0
+	.end
+!
+! int vis_edge32(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge32,16
+	edge32	%o0,%o1,%o0
+	.end
+!
+! int vis_edge32l(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge32l,16
+	edge32l	%o0,%o1,%o0
+	.end
+
+!--------------------------------------------------------------------
+! Edge handling instructions with negative return values if cc set
+!
+! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge8cc,16
+	edge8	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %xcc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge8lcc,16
+	edge8l	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %xcc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge16cc,16
+	edge16	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %xcc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge16lcc,16
+	edge16l	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %xcc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge32cc,16
+	edge32	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %xcc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+!
+! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
+!
+	.inline vis_edge32lcc,16
+	edge32l	%o0,%o1,%o0
+	mov     0,%o1
+	movgu   %xcc,-1024,%o1
+	or      %o1,%o0,%o0
+	.end
+
+!--------------------------------------------------------------------
+! Alignment instructions
+!
+! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
+!
+	.inline vis_alignaddr,12
+	alignaddr	%o0,%o1,%o0
+	.end
+!
+! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
+!
+	.inline vis_alignaddrl,12
+	alignaddrl	%o0,%o1,%o0
+	.end
+!
+! double vis_faligndata(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_faligndata,16
+	faligndata	%f0,%f2,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Partitioned comparison instructions
+!
+! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmple16,16
+	fcmple16	%f0,%f2,%o0
+	.end
+!
+! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpne16,16
+	fcmpne16	%f0,%f2,%o0
+	.end
+!
+! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmple32,16
+	fcmple32	%f0,%f2,%o0
+	.end
+!
+! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpne32,16
+	fcmpne32	%f0,%f2,%o0
+	.end
+!
+! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpgt16,16
+	fcmpgt16	%f0,%f2,%o0
+	.end
+!
+! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpeq16,16
+	fcmpeq16	%f0,%f2,%o0
+	.end
+!
+! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpgt32,16
+	fcmpgt32	%f0,%f2,%o0
+	.end
+!
+! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fcmpeq32,16
+	fcmpeq32	%f0,%f2,%o0
+	.end
+
+!--------------------------------------------------------------------
+! Partitioned arithmetic
+!
+! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fmul8x16,12
+	fmul8x16	%f1,%f2,%f0
+	.end
+!
+! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+!
+	.inline vis_fmul8x16_dummy,16
+	fmul8x16	%f1,%f4,%f0
+	.end
+!
+! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fmul8x16au,8
+	fmul8x16au	%f1,%f3,%f0
+	.end
+!
+! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fmul8x16al,8
+	fmul8x16al	%f1,%f3,%f0
+	.end
+!
+! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fmul8sux16,16
+	fmul8sux16	%f0,%f2,%f0
+	.end
+!
+! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fmul8ulx16,16
+	fmul8ulx16	%f0,%f2,%f0
+	.end
+!
+! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fmuld8sux16,8
+	fmuld8sux16	%f1,%f3,%f0
+	.end
+!
+! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fmuld8ulx16,8
+	fmuld8ulx16	%f1,%f3,%f0
+	.end
+!
+! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpadd16,16
+	fpadd16	%f0,%f2,%f0
+	.end
+!
+! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpadd16s,8
+	fpadd16s	%f1,%f3,%f0
+	.end
+!
+! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpadd32,16
+	fpadd32	%f0,%f2,%f0
+	.end
+!
+! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpadd32s,8
+	fpadd32s	%f1,%f3,%f0
+	.end
+!
+! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpsub16,16
+	fpsub16	%f0,%f2,%f0
+	.end
+!
+! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpsub16s,8
+	fpsub16s	%f1,%f3,%f0
+	.end
+!
+! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpsub32,16
+	fpsub32	%f0,%f2,%f0
+	.end
+!
+! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpsub32s,8
+	fpsub32s	%f1,%f3,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Pixel packing
+!
+! float vis_fpack16(double /*frs2*/);
+!
+	.inline vis_fpack16,8
+	fpack16	%f0,%f0
+	.end
+!
+! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+!
+	.inline vis_fpack16_pair,16
+	fpack16	%f0,%f0
+	fpack16	%f2,%f1
+	.end
+!
+! void vis_st2_fpack16(double, double, double *)
+!
+	.inline vis_st2_fpack16,24
+ 	fpack16	%f0,%f0
+ 	fpack16	%f2,%f1
+ 	st	%f0,[%o2+0]
+ 	st	%f1,[%o2+4]
+ 	.end
+!
+! void vis_std_fpack16(double, double, double *)
+!
+	.inline vis_std_fpack16,24
+	fpack16	%f0,%f0
+	fpack16	%f2,%f1
+	std	%f0,[%o2]
+	.end
+!
+! void vis_st2_fpackfix(double, double, double *)
+!
+	.inline vis_st2_fpackfix,24
+ 	fpackfix %f0,%f0
+ 	fpackfix %f2,%f1
+ 	st	%f0,[%o2+0]
+ 	st	%f1,[%o2+4]
+ 	.end
+!
+! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpack16_to_hi,16
+	fpack16	%f2,%f0
+	.end
+
+! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpack16_to_lo,16
+	fpack16	%f2,%f3
+	fmovs	%f3,%f1		/* without this, optimizer goes wrong */
+	.end
+
+!
+! double vis_fpack32(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fpack32,16
+	fpack32	%f0,%f2,%f0
+	.end
+!
+! float vis_fpackfix(double /*frs2*/);
+!
+	.inline vis_fpackfix,8
+	fpackfix	%f0,%f0
+	.end
+!
+! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+!
+	.inline vis_fpackfix_pair,16
+	fpackfix	%f0,%f0
+	fpackfix	%f2,%f1
+	.end
+
+!--------------------------------------------------------------------
+! Motion estimation
+!
+! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/, 
+!		       double pxls2 /*frs2*/);
+!
+	.inline vis_pxldist64,24
+	pdist	%f2,%f4,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Channel merging
+!
+! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fpmerge,8
+	fpmerge	%f1,%f3,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Pixel expansion
+!
+! double vis_fexpand(float /*frs2*/);
+!
+	.inline vis_fexpand,4
+	fexpand	%f1,%f0
+	.end
+
+! double vis_fexpand_hi(double /*frs2*/);
+!
+	.inline vis_fexpand_hi,8
+	fexpand	%f0,%f0
+	.end
+
+! double vis_fexpand_lo(double /*frs2*/);
+!
+	.inline vis_fexpand_lo,8
+	fexpand	%f1,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Bitwise logical operations
+!
+! double vis_fnor(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fnor,16
+	fnor	%f0,%f2,%f0
+	.end
+!
+! float vis_fnors(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fnors,8
+	fnors	%f1,%f3,%f0
+	.end
+!
+! double vis_fandnot(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fandnot,16
+	fandnot1 %f0,%f2,%f0
+	.end
+!
+! float vis_fandnots(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fandnots,8
+	fandnot1s %f1,%f3,%f0
+	.end
+!
+! double vis_fnot(double /*frs1*/);
+!
+	.inline vis_fnot,8
+	fnot1	%f0,%f0
+	.end
+!
+! float vis_fnots(float /*frs1*/);
+!
+	.inline vis_fnots,4
+	fnot1s	%f1,%f0
+	.end
+!
+! double vis_fxor(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fxor,16
+	fxor	%f0,%f2,%f0
+	.end
+!
+! float vis_fxors(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fxors,8
+	fxors	%f1,%f3,%f0
+	.end
+!
+! double vis_fnand(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fnand,16
+	fnand	%f0,%f2,%f0
+	.end
+!
+! float vis_fnands(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fnands,8
+	fnands	%f1,%f3,%f0
+	.end
+!
+! double vis_fand(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fand,16
+	fand	%f0,%f2,%f0
+	.end
+!
+! float vis_fands(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fands,8
+	fands	%f1,%f3,%f0
+	.end
+!
+! double vis_fxnor(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fxnor,16
+	fxnor	%f0,%f2,%f0
+	.end
+!
+! float vis_fxnors(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fxnors,8
+	fxnors	%f1,%f3,%f0
+	.end
+!
+! double vis_fsrc(double /*frs1*/);
+!
+	.inline vis_fsrc,8
+	fsrc1	%f0,%f0
+	.end
+!
+! float vis_fsrcs(float /*frs1*/);
+!
+	.inline vis_fsrcs,4
+	fsrc1s	%f1,%f0
+	.end
+!
+! double vis_fornot(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_fornot,16
+	fornot1	%f0,%f2,%f0
+	.end
+!
+! float vis_fornots(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fornots,8
+	fornot1s %f1,%f3,%f0
+	.end
+!
+! double vis_for(double /*frs1*/, double /*frs2*/);
+!
+	.inline vis_for,16
+	for	%f0,%f2,%f0
+	.end
+!
+! float vis_fors(float /*frs1*/, float /*frs2*/);
+!
+	.inline vis_fors,8
+	fors	%f1,%f3,%f0
+	.end
+!
+! double vis_fzero(/* void */)
+!
+	.inline	vis_fzero,0
+	fzero	%f0
+	.end
+!
+! float vis_fzeros(/* void */)
+!
+	.inline	vis_fzeros,0
+	fzeros	%f0
+	.end
+!
+! double vis_fone(/* void */)
+!
+	.inline	vis_fone,0
+	fone	%f0
+	.end
+!
+! float vis_fones(/* void */)
+!
+	.inline	vis_fones,0
+	fones	%f0
+	.end
+
+!--------------------------------------------------------------------
+! Partial store instructions
+!
+! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST8P,20
+	stda	%f0,[%o1]%o2,0xc0	! ASI_PST8_P
+	.end
+!
+! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST8PL,20
+	stda	%f0,[%o1]%o2,0xc8	! ASI_PST8_PL
+	.end
+!
+! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
+!
+	.inline vis_stdfa_ASI_PST8P_int_pair,28
+        ld	[%o0],%f4
+        ld	[%o1],%f5
+	stda	%f4,[%o2]%o3,0xc0	! ASI_PST8_P
+	.end
+!
+! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST8S,20
+	stda	%f0,[%o1]%o2,0xc1	! ASI_PST8_S
+	.end
+!
+! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST16P,20
+	stda	%f0,[%o1]%o2,0xc2	! ASI_PST16_P
+	.end
+!
+! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST16S,20
+	stda	%f0,[%o1]%o2,0xc3	! ASI_PST16_S
+	.end
+!
+! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST32P,20
+	stda	%f0,[%o1]%o2,0xc4	! ASI_PST32_P
+	.end
+!
+! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
+!
+	.inline vis_stdfa_ASI_PST32S,20
+	stda	%f0,[%o1]%o2,0xc5	! ASI_PST32_S
+	.end
+
+!--------------------------------------------------------------------
+! Short store instructions
+!
+! vis_stdfa_ASI_FL8P(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL8P,16
+	stda	%f0,[%o1]0xd0	! ASI_FL8_P
+	.end
+!
+! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
+!
+	.inline vis_stdfa_ASI_FL8P_index,24
+	stda	%f0,[%o1+%o2]0xd0 ! ASI_FL8_P
+	.end
+!
+! vis_stdfa_ASI_FL8S(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL8S,16
+	stda	%f0,[%o1]0xd1	! ASI_FL8_S
+	.end
+!
+! vis_stdfa_ASI_FL16P(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL16P,16
+	stda	%f0,[%o1]0xd2	! ASI_FL16_P
+	.end
+!
+! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
+!
+	.inline vis_stdfa_ASI_FL16P_index,24
+	stda	%f0,[%o1+%o2]0xd2 ! ASI_FL16_P
+	.end
+!
+! vis_stdfa_ASI_FL16S(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL16S,16
+	stda	%f0,[%o1]0xd3	! ASI_FL16_S
+	.end
+!
+! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL8PL,16
+	stda	%f0,[%o1]0xd8	! ASI_FL8_PL
+	.end
+!
+! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL8SL,16
+	stda	%f0,[%o1]0xd9	! ASI_FL8_SL
+	.end
+!
+! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL16PL,16
+	stda	%f0,[%o1]0xda	! ASI_FL16_PL
+	.end
+!
+! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_FL16SL,16
+	stda	%f0,[%o1]0xdb	! ASI_FL16_SL
+	.end
+
+!--------------------------------------------------------------------
+! Short load instructions
+!
+! double vis_lddfa_ASI_FL8P(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL8P,8
+	ldda	[%o0]0xd0,%f4	! ASI_FL8_P
+	fmovd	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
+!
+	.inline vis_lddfa_ASI_FL8P_index,16
+	ldda	[%o0+%o1]0xd0,%f4
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
+!
+	.inline vis_lddfa_ASI_FL8P_hi,12
+	sra     %o1,16,%o1
+	ldda	[%o0+%o1]0xd0,%f4
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
+!
+	.inline vis_lddfa_ASI_FL8P_lo,12
+	sll     %o1,16,%o1
+	sra     %o1,16,%o1
+	ldda	[%o0+%o1]0xd0,%f4
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8S(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL8S,8
+	ldda	[%o0]0xd1,%f4	! ASI_FL8_S
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16P(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL16P,8
+	ldda	[%o0]0xd2,%f4	! ASI_FL16_P
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
+!
+	.inline vis_lddfa_ASI_FL16P_index,16
+	ldda	[%o0+%o1]0xd2,%f4 ! ASI_FL16_P
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16S(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL16S,8
+	ldda	[%o0]0xd3,%f4	! ASI_FL16_S
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8PL(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL8PL,8
+	ldda	[%o0]0xd8,%f4	! ASI_FL8_PL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
+!
+	.inline vis_lddfa_ASI_FL8PL_index,16
+	ldda	[%o0+%o1]0xd8,%f4	! ASI_FL8_PL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL8SL(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL8SL,8
+	ldda	[%o0]0xd9,%f4	! ASI_FL8_SL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16PL(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL16PL,8
+	ldda	[%o0]0xda,%f4	! ASI_FL16_PL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
+!
+	.inline vis_lddfa_ASI_FL16PL_index,16
+	ldda	[%o0+%o1]0xda,%f4	! ASI_FL16_PL
+	fmovd	%f4,%f0
+	.end
+!
+! double vis_lddfa_ASI_FL16SL(void *rs1)
+!
+	.inline vis_lddfa_ASI_FL16SL,8
+	ldda	[%o0]0xdb,%f4	! ASI_FL16_SL
+	fmovd	%f4,%f0
+	.end
+
+!--------------------------------------------------------------------
+! Graphics status register
+!
+! unsigned int vis_read_gsr(void)
+!
+	.inline vis_read_gsr,0
+	rd	%gsr,%o0
+	.end
+!
+! void vis_write_gsr(unsigned int /* GSR */)
+!
+	.inline vis_write_gsr,4
+	wr	%g0,%o0,%gsr
+	.end
+
+!--------------------------------------------------------------------
+! Voxel texture mapping
+!
+! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
+!
+	.inline	vis_array8,12
+	array8	%o0,%o1,%o0
+	.end
+!
+! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
+!
+	.inline	vis_array16,12
+	array16	%o0,%o1,%o0
+	.end
+!
+! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
+!
+	.inline	vis_array32,12
+	array32	%o0,%o1,%o0
+	.end
+
+!--------------------------------------------------------------------
+! Register aliasing and type casts
+!
+! float vis_read_hi(double /* frs1 */);
+!
+	.inline vis_read_hi,8
+	fmovs	%f0,%f0
+	.end
+!
+! float vis_read_lo(double /* frs1 */);
+!
+	.inline vis_read_lo,8
+	fmovs	%f1,%f0		! %f0 = low word (frs1); return %f0;
+	.end
+!
+! double vis_write_hi(double /* frs1 */, float /* frs2 */);
+!
+	.inline vis_write_hi,12
+	fmovs	%f3,%f0		! %f3 = float frs2; return %f0:f1;
+	.end
+!
+! double vis_write_lo(double /* frs1 */, float /* frs2 */);
+!
+	.inline vis_write_lo,12
+	fmovs	%f3,%f1		! %f3 = float frs2; return %f0:f1;
+	.end
+!
+! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
+!
+	.inline vis_freg_pair,8
+	fmovs	%f1,%f0		! %f1 = float frs1; put in hi;
+	fmovs	%f3,%f1		! %f3 = float frs2; put in lo; return %f0:f1;
+	.end
+!
+! float vis_to_float(unsigned int /*value*/);
+!
+	.inline vis_to_float,4
+	st	%o0,[%sp+2183]
+	ld	[%sp+2183],%f0
+	.end
+!
+! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+!
+	.inline vis_to_double,8
+	st	%o0,[%sp+2183]
+	ld	[%sp+2183],%f0
+	st	%o1,[%sp+2183]
+	ld	[%sp+2183],%f1
+	.end
+!
+! double vis_to_double_dup(unsigned int /*value*/);
+!
+	.inline vis_to_double_dup,4
+	st	%o0,[%sp+2183]
+	ld	[%sp+2183],%f1
+	fmovs	%f1,%f0		! duplicate value
+	.end
+!
+! double vis_ll_to_double(unsigned long long /*value*/);
+!
+	.inline vis_ll_to_double,8
+	stx     %o0,[%sp+2183]
+	ldd     [%sp+2183],%f0
+        .end
+
+!--------------------------------------------------------------------
+! Address space identifier (ASI) register
+!
+! unsigned int vis_read_asi(void)
+!
+	.inline vis_read_asi,0
+	rd	%asi,%o0
+	.end
+!
+! void vis_write_asi(unsigned int /* ASI */)
+!
+	.inline vis_write_asi,4
+	wr	%g0,%o0,%asi
+	.end
+
+!--------------------------------------------------------------------
+! Load/store from/into alternate space
+!
+! float vis_ldfa_ASI_REG(void *rs1)
+!
+	.inline vis_ldfa_ASI_REG,8
+	lda	[%o0+0]%asi,%f4
+	fmovs	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! float vis_ldfa_ASI_P(void *rs1)
+!
+	.inline vis_ldfa_ASI_P,8
+	lda	[%o0]0x80,%f4	! ASI_P
+	fmovs	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! float vis_ldfa_ASI_PL(void *rs1)
+!
+	.inline vis_ldfa_ASI_PL,8
+	lda	[%o0]0x88,%f4	! ASI_PL
+	fmovs	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! double vis_lddfa_ASI_REG(void *rs1)
+!
+	.inline vis_lddfa_ASI_REG,8
+	ldda	[%o0+0]%asi,%f4
+	fmovd	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! double vis_lddfa_ASI_P(void *rs1)
+!
+	.inline vis_lddfa_ASI_P,8
+	ldda	[%o0]0x80,%f4	! ASI_P
+	fmovd	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! double vis_lddfa_ASI_PL(void *rs1)
+!
+	.inline vis_lddfa_ASI_PL,8
+	ldda	[%o0]0x88,%f4	! ASI_PL
+	fmovd	%f4,%f0	        ! Compiler can clean this up
+	.end
+!
+! vis_stfa_ASI_REG(float frs, void *rs1)
+!
+	.inline vis_stfa_ASI_REG,12
+	sta	%f1,[%o1+0]%asi
+	.end
+!
+! vis_stfa_ASI_P(float frs, void *rs1)
+!
+	.inline vis_stfa_ASI_P,12
+	sta	%f1,[%o1]0x80	! ASI_P
+	.end
+!
+! vis_stfa_ASI_PL(float frs, void *rs1)
+!
+	.inline vis_stfa_ASI_PL,12
+	sta	%f1,[%o1]0x88	! ASI_PL
+	.end
+!
+! vis_stdfa_ASI_REG(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_REG,16
+	stda	%f0,[%o1+0]%asi
+	.end
+!
+! vis_stdfa_ASI_P(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_P,16
+	stda	%f0,[%o1]0x80	! ASI_P
+	.end
+!
+! vis_stdfa_ASI_PL(double frd, void *rs1)
+!
+	.inline vis_stdfa_ASI_PL,16
+	stda	%f0,[%o1]0x88	! ASI_PL
+	.end
+!
+! unsigned short vis_lduha_ASI_REG(void *rs1)
+!
+	.inline vis_lduha_ASI_REG,8
+	lduha	[%o0+0]%asi,%o0
+	.end
+!
+! unsigned short vis_lduha_ASI_P(void *rs1)
+!
+	.inline vis_lduha_ASI_P,8
+	lduha	[%o0]0x80,%o0	! ASI_P
+	.end
+!
+! unsigned short vis_lduha_ASI_PL(void *rs1)
+!
+	.inline vis_lduha_ASI_PL,8
+	lduha	[%o0]0x88,%o0	! ASI_PL
+	.end
+!
+! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
+!
+	.inline vis_lduha_ASI_P_index,16
+	lduha	[%o0+%o1]0x80,%o0	! ASI_P
+	.end
+!
+! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
+!
+	.inline vis_lduha_ASI_PL_index,16
+	lduha	[%o0+%o1]0x88,%o0	! ASI_PL
+	.end
+
+!--------------------------------------------------------------------
+! Prefetch
+!
+! void vis_prefetch_read(void * /*address*/);
+!
+	.inline vis_prefetch_read,8
+	prefetch	[%o0+0],0
+	.end
+!
+! void vis_prefetch_write(void * /*address*/);
+!
+	.inline vis_prefetch_write,8
+	prefetch	[%o0+0],2
+	.end
diff --git a/security/nss/lib/freebl/mpi/vis_proto.h b/security/nss/lib/freebl/mpi/vis_proto.h
new file mode 100644
index 000000000..275de59df
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_proto.h
@@ -0,0 +1,234 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Prototypes for the inline templates in vis.il
+ */
+
+#ifndef VIS_PROTO_H
+#define VIS_PROTO_H
+
+#pragma ident "@(#)vis_proto.h	1.3	97/03/30 SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Pure edge handling instructions */
+int vis_edge8(void * /*frs1*/, void * /*frs2*/);
+int vis_edge8l(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16l(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32l(void * /*frs1*/, void * /*frs2*/);
+
+/* Edge handling instructions with negative return values if cc set. */
+int vis_edge8cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge8lcc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16lcc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32lcc(void * /*frs1*/, void * /*frs2*/);
+
+/* Alignment instructions. */
+void *vis_alignaddr(void * /*rs1*/, int /*rs2*/);
+void *vis_alignaddrl(void * /*rs1*/, int /*rs2*/);
+double vis_faligndata(double /*frs1*/, double /*frs2*/);
+
+/* Partitioned comparison instructions. */
+int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+
+/* Partitioned multiplication. */
+#if 0
+double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+#endif
+double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+
+/* Partitioned addition & subtraction. */
+double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+
+/* Pixel packing & clamping. */
+float vis_fpack16(double /*frs2*/);
+double vis_fpack32(double /*frs1*/, double /*frs2*/);
+float vis_fpackfix(double /*frs2*/);
+
+/* Combined pack ops. */
+double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+void vis_st2_fpack16(double, double, double *);
+void vis_std_fpack16(double, double, double *);
+void vis_st2_fpackfix(double, double, double *);
+
+double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+
+/* Motion estimation. */
+double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
+
+/* Channel merging. */
+double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+
+/* Pixel expansion. */
+double vis_fexpand(float /*frs2*/);
+double vis_fexpand_hi(double /*frs2*/);
+double vis_fexpand_lo(double /*frs2*/);
+
+/* Bitwise logical operators. */
+double vis_fnor(double /*frs1*/, double /*frs2*/);
+float vis_fnors(float /*frs1*/, float /*frs2*/);
+double vis_fandnot(double /*frs1*/, double /*frs2*/);
+float vis_fandnots(float /*frs1*/, float /*frs2*/);
+double vis_fnot(double /*frs1*/);
+float vis_fnots(float /*frs1*/);
+double vis_fxor(double /*frs1*/, double /*frs2*/);
+float vis_fxors(float /*frs1*/, float /*frs2*/);
+double vis_fnand(double /*frs1*/, double /*frs2*/);
+float vis_fnands(float /*frs1*/, float /*frs2*/);
+double vis_fand(double /*frs1*/, double /*frs2*/);
+float vis_fands(float /*frs1*/, float /*frs2*/);
+double vis_fxnor(double /*frs1*/, double /*frs2*/);
+float vis_fxnors(float /*frs1*/, float /*frs2*/);
+double vis_fsrc(double /*frs1*/);
+float vis_fsrcs(float /*frs1*/);
+double vis_fornot(double /*frs1*/, double /*frs2*/);
+float vis_fornots(float /*frs1*/, float /*frs2*/);
+double vis_for(double /*frs1*/, double /*frs2*/);
+float vis_fors(float /*frs1*/, float /*frs2*/);
+double vis_fzero(void);
+float vis_fzeros(void);
+double vis_fone(void);
+float vis_fones(void);
+
+/* Partial stores. */
+void vis_stdfa_ASI_PST8P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8PL(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8P_int_pair(void * /*rs1*/, void * /*rs2*/,
+                                  void * /*rs3*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST16P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST16S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST32P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST32S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+
+/* Byte & short stores. */
+void vis_stdfa_ASI_FL8P(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8P_index(double /*frd*/, void * /*rs1*/, long /*index*/);
+void vis_stdfa_ASI_FL8S(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16P(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16P_index(double /*frd*/, void * /*rs1*/, long /*index*/);
+void vis_stdfa_ASI_FL16S(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8PL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8SL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16PL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16SL(double /*frd*/, void * /*rs1*/);
+
+/* Byte & short loads. */
+double vis_lddfa_ASI_FL8P(void * /*rs1*/);
+double vis_lddfa_ASI_FL8P_index(void * /*rs1*/, long /*index*/);
+double vis_lddfa_ASI_FL8P_hi(void * /*rs1*/, unsigned int /*index*/);
+double vis_lddfa_ASI_FL8P_lo(void * /*rs1*/, unsigned int /*index*/);
+double vis_lddfa_ASI_FL8S(void * /*rs1*/);
+double vis_lddfa_ASI_FL16P(void * /*rs1*/);
+double vis_lddfa_ASI_FL16P_index(void * /*rs1*/, long /*index*/);
+double vis_lddfa_ASI_FL16S(void * /*rs1*/);
+double vis_lddfa_ASI_FL8PL(void * /*rs1*/);
+double vis_lddfa_ASI_FL8SL(void * /*rs1*/);
+double vis_lddfa_ASI_FL16PL(void * /*rs1*/);
+double vis_lddfa_ASI_FL16SL(void * /*rs1*/);
+
+/* Direct write to GSR, read from GSR */
+void vis_write_gsr(unsigned int /*GSR*/);
+unsigned int vis_read_gsr(void);
+
+/* Voxel texture mapping. */
+#if !defined(_NO_LONGLONG)
+unsigned long vis_array8(unsigned long long /*rs1*/, int /*rs2*/);
+unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/);
+unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/);
+#endif /* !defined(_NO_LONGLONG) */
+
+/* Register aliasing and type casts. */
+float vis_read_hi(double /*frs1*/);
+float vis_read_lo(double /*frs1*/);
+double vis_write_hi(double /*frs1*/, float /*frs2*/);
+double vis_write_lo(double /*frs1*/, float /*frs2*/);
+double vis_freg_pair(float /*frs1*/, float /*frs2*/);
+float vis_to_float(unsigned int /*value*/);
+double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+double vis_to_double_dup(unsigned int /*value*/);
+#if !defined(_NO_LONGLONG)
+double vis_ll_to_double(unsigned long long /*value*/);
+#endif /* !defined(_NO_LONGLONG) */
+
+/* Miscellany (no inlines) */
+void vis_error(char * /*fmt*/, int /*a0*/);
+void vis_sim_init(void);
+
+/* For better performance */
+#define vis_fmul8x16(farg, darg) vis_fmul8x16_dummy((farg), 0, (darg))
+
+/* Nicknames for explicit ASI loads and stores. */
+#define vis_st_u8 vis_stdfa_ASI_FL8P
+#define vis_st_u8_i vis_stdfa_ASI_FL8P_index
+#define vis_st_u8_le vis_stdfa_ASI_FL8PL
+#define vis_st_u16 vis_stdfa_ASI_FL16P
+#define vis_st_u16_i vis_stdfa_ASI_FL16P_index
+#define vis_st_u16_le vis_stdfa_ASI_FL16PL
+
+#define vis_ld_u8 vis_lddfa_ASI_FL8P
+#define vis_ld_u8_i vis_lddfa_ASI_FL8P_index
+#define vis_ld_u8_le vis_lddfa_ASI_FL8PL
+#define vis_ld_u16 vis_lddfa_ASI_FL16P
+#define vis_ld_u16_i vis_lddfa_ASI_FL16P_index
+#define vis_ld_u16_le vis_lddfa_ASI_FL16PL
+
+#define vis_pst_8 vis_stdfa_ASI_PST8P
+#define vis_pst_16 vis_stdfa_ASI_PST16P
+#define vis_pst_32 vis_stdfa_ASI_PST32P
+
+#define vis_st_u8s vis_stdfa_ASI_FL8S
+#define vis_st_u8s_le vis_stdfa_ASI_FL8SL
+#define vis_st_u16s vis_stdfa_ASI_FL16S
+#define vis_st_u16s_le vis_stdfa_ASI_FL16SL
+
+#define vis_ld_u8s vis_lddfa_ASI_FL8S
+#define vis_ld_u8s_le vis_lddfa_ASI_FL8SL
+#define vis_ld_u16s vis_lddfa_ASI_FL16S
+#define vis_ld_u16s_le vis_lddfa_ASI_FL16SL
+
+#define vis_pst_8s vis_stdfa_ASI_PST8S
+#define vis_pst_16s vis_stdfa_ASI_PST16S
+#define vis_pst_32s vis_stdfa_ASI_PST32S
+
+/* "<" and ">=" may be implemented in terms of ">" and "<=". */
+#define vis_fcmplt16(a, b) vis_fcmpgt16((b), (a))
+#define vis_fcmplt32(a, b) vis_fcmpgt32((b), (a))
+#define vis_fcmpge16(a, b) vis_fcmple16((b), (a))
+#define vis_fcmpge32(a, b) vis_fcmple32((b), (a))
+
+#ifdef __cplusplus
+} // End of extern "C"
+#endif /* __cplusplus */
+
+#endif /* VIS_PROTO_H */
diff --git a/security/nss/lib/freebl/nsslowhash.c b/security/nss/lib/freebl/nsslowhash.c
new file mode 100644
index 000000000..5ed039689
--- /dev/null
+++ b/security/nss/lib/freebl/nsslowhash.c
@@ -0,0 +1,150 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "prtypes.h"
+#include "secerr.h"
+#include "blapi.h"
+#include "hasht.h"
+#include "plhash.h"
+#include "nsslowhash.h"
+#include "blapii.h"
+
+struct NSSLOWInitContextStr {
+    int count;
+};
+
+struct NSSLOWHASHContextStr {
+    const SECHashObject *hashObj;
+    void *hashCtxt;
+};
+
+static int
+nsslow_GetFIPSEnabled(void)
+{
+#ifdef LINUX
+    FILE *f;
+    char d;
+    size_t size;
+
+    f = fopen("/proc/sys/crypto/fips_enabled", "r");
+    if (!f)
+        return 0;
+
+    size = fread(&d, 1, 1, f);
+    fclose(f);
+    if (size != 1)
+        return 0;
+    if (d != '1')
+        return 0;
+#endif
+    return 1;
+}
+
+static NSSLOWInitContext dummyContext = { 0 };
+static PRBool post_failed = PR_TRUE;
+
+NSSLOWInitContext *
+NSSLOW_Init(void)
+{
+#ifdef FREEBL_NO_DEPEND
+    (void)FREEBL_InitStubs();
+#endif
+
+    /* make sure the FIPS product is installed if we are trying to
+     * go into FIPS mode */
+    if (nsslow_GetFIPSEnabled()) {
+        if (BL_FIPSEntryOK(PR_TRUE) != SECSuccess) {
+            PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+            post_failed = PR_TRUE;
+            return NULL;
+        }
+    }
+    post_failed = PR_FALSE;
+
+    return &dummyContext;
+}
+
+void
+NSSLOW_Shutdown(NSSLOWInitContext *context)
+{
+    PORT_Assert(context == &dummyContext);
+    return;
+}
+
+void
+NSSLOW_Reset(NSSLOWInitContext *context)
+{
+    PORT_Assert(context == &dummyContext);
+    return;
+}
+
+NSSLOWHASHContext *
+NSSLOWHASH_NewContext(NSSLOWInitContext *initContext,
+                      HASH_HashType hashType)
+{
+    NSSLOWHASHContext *context;
+
+    if (post_failed) {
+        PORT_SetError(SEC_ERROR_PKCS11_DEVICE_ERROR);
+        return NULL;
+    }
+
+    if (initContext != &dummyContext) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return (NULL);
+    }
+
+    context = PORT_ZNew(NSSLOWHASHContext);
+    if (!context) {
+        return NULL;
+    }
+    context->hashObj = HASH_GetRawHashObject(hashType);
+    if (!context->hashObj) {
+        PORT_Free(context);
+        return NULL;
+    }
+    context->hashCtxt = context->hashObj->create();
+    if (!context->hashCtxt) {
+        PORT_Free(context);
+        return NULL;
+    }
+
+    return context;
+}
+
+void
+NSSLOWHASH_Begin(NSSLOWHASHContext *context)
+{
+    return context->hashObj->begin(context->hashCtxt);
+}
+
+void
+NSSLOWHASH_Update(NSSLOWHASHContext *context, const unsigned char *buf,
+                  unsigned int len)
+{
+    return context->hashObj->update(context->hashCtxt, buf, len);
+}
+
+void
+NSSLOWHASH_End(NSSLOWHASHContext *context, unsigned char *buf,
+               unsigned int *ret, unsigned int len)
+{
+    return context->hashObj->end(context->hashCtxt, buf, ret, len);
+}
+
+void
+NSSLOWHASH_Destroy(NSSLOWHASHContext *context)
+{
+    context->hashObj->destroy(context->hashCtxt, PR_TRUE);
+    PORT_Free(context);
+}
+
+unsigned int
+NSSLOWHASH_Length(NSSLOWHASHContext *context)
+{
+    return context->hashObj->length;
+}
diff --git a/security/nss/lib/freebl/nsslowhash.h b/security/nss/lib/freebl/nsslowhash.h
new file mode 100644
index 000000000..d8f058715
--- /dev/null
+++ b/security/nss/lib/freebl/nsslowhash.h
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Provide FIPS validated hashing for applications that only need hashing.
+ * NOTE: mac'ing requires keys and will not work in this interface.
+ * Also NOTE: this only works with Hashing. Only the FIPS interface is enabled.
+ */
+
+#ifndef _NSSLOWHASH_H_
+#define _NSSLOWHASH_H_
+
+typedef struct NSSLOWInitContextStr NSSLOWInitContext;
+typedef struct NSSLOWHASHContextStr NSSLOWHASHContext;
+
+NSSLOWInitContext *NSSLOW_Init(void);
+void NSSLOW_Shutdown(NSSLOWInitContext *context);
+void NSSLOW_Reset(NSSLOWInitContext *context);
+NSSLOWHASHContext *NSSLOWHASH_NewContext(
+    NSSLOWInitContext *initContext,
+    HASH_HashType hashType);
+void NSSLOWHASH_Begin(NSSLOWHASHContext *context);
+void NSSLOWHASH_Update(NSSLOWHASHContext *context,
+                       const unsigned char *buf,
+                       unsigned int len);
+void NSSLOWHASH_End(NSSLOWHASHContext *context,
+                    unsigned char *buf,
+                    unsigned int *ret, unsigned int len);
+void NSSLOWHASH_Destroy(NSSLOWHASHContext *context);
+unsigned int NSSLOWHASH_Length(NSSLOWHASHContext *context);
+
+#endif
diff --git a/security/nss/lib/freebl/os2_rand.c b/security/nss/lib/freebl/os2_rand.c
new file mode 100644
index 000000000..407b08014
--- /dev/null
+++ b/security/nss/lib/freebl/os2_rand.c
@@ -0,0 +1,334 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define INCL_DOS
+#define INCL_DOSERRORS
+#include <os2.h>
+#include "secrng.h"
+#include "prerror.h"
+#include <stdlib.h>
+#include <time.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+static BOOL
+clockTickTime(unsigned long *phigh, unsigned long *plow)
+{
+    APIRET rc = NO_ERROR;
+    QWORD qword = { 0, 0 };
+
+    rc = DosTmrQueryTime(&qword);
+    if (rc != NO_ERROR)
+        return FALSE;
+
+    *phigh = qword.ulHi;
+    *plow = qword.ulLo;
+
+    return TRUE;
+}
+
+size_t
+RNG_GetNoise(void *buf, size_t maxbuf)
+{
+    unsigned long high = 0;
+    unsigned long low = 0;
+    clock_t val = 0;
+    int n = 0;
+    int nBytes = 0;
+    time_t sTime;
+
+    if (maxbuf <= 0)
+        return 0;
+
+    clockTickTime(&high, &low);
+
+    /* get the maximally changing bits first */
+    nBytes = sizeof(low) > maxbuf ? maxbuf : sizeof(low);
+    memcpy(buf, &low, nBytes);
+    n += nBytes;
+    maxbuf -= nBytes;
+
+    if (maxbuf <= 0)
+        return n;
+
+    nBytes = sizeof(high) > maxbuf ? maxbuf : sizeof(high);
+    memcpy(((char *)buf) + n, &high, nBytes);
+    n += nBytes;
+    maxbuf -= nBytes;
+
+    if (maxbuf <= 0)
+        return n;
+
+    /* get the number of milliseconds that have elapsed since application started */
+    val = clock();
+
+    nBytes = sizeof(val) > maxbuf ? maxbuf : sizeof(val);
+    memcpy(((char *)buf) + n, &val, nBytes);
+    n += nBytes;
+    maxbuf -= nBytes;
+
+    if (maxbuf <= 0)
+        return n;
+
+    /* get the time in seconds since midnight Jan 1, 1970 */
+    time(&sTime);
+    nBytes = sizeof(sTime) > maxbuf ? maxbuf : sizeof(sTime);
+    memcpy(((char *)buf) + n, &sTime, nBytes);
+    n += nBytes;
+
+    return n;
+}
+
+static BOOL
+EnumSystemFiles(void (*func)(const char *))
+{
+    APIRET rc;
+    ULONG sysInfo = 0;
+    char bootLetter[2];
+    char sysDir[_MAX_PATH] = "";
+    char filename[_MAX_PATH];
+    HDIR hdir = HDIR_CREATE;
+    ULONG numFiles = 1;
+    FILEFINDBUF3 fileBuf = { 0 };
+    ULONG buflen = sizeof(FILEFINDBUF3);
+
+    if (DosQuerySysInfo(QSV_BOOT_DRIVE, QSV_BOOT_DRIVE, (PVOID)&sysInfo,
+                        sizeof(ULONG)) == NO_ERROR) {
+        bootLetter[0] = sysInfo + 'A' - 1;
+        bootLetter[1] = '\0';
+        strcpy(sysDir, bootLetter);
+        strcpy(sysDir + 1, ":\\OS2\\");
+
+        strcpy(filename, sysDir);
+        strcat(filename, "*.*");
+    }
+
+    rc = DosFindFirst(filename, &hdir, FILE_NORMAL, &fileBuf, buflen,
+                      &numFiles, FIL_STANDARD);
+    if (rc == NO_ERROR) {
+        do {
+            // pass the full pathname to the callback
+            sprintf(filename, "%s%s", sysDir, fileBuf.achName);
+            (*func)(filename);
+
+            numFiles = 1;
+            rc = DosFindNext(hdir, &fileBuf, buflen, &numFiles);
+            if (rc != NO_ERROR && rc != ERROR_NO_MORE_FILES)
+                printf("DosFindNext errod code = %d\n", rc);
+        } while (rc == NO_ERROR);
+
+        rc = DosFindClose(hdir);
+        if (rc != NO_ERROR)
+            printf("DosFindClose error code = %d", rc);
+    } else
+        printf("DosFindFirst error code = %d", rc);
+
+    return TRUE;
+}
+
+static int dwNumFiles, dwReadEvery, dwFileToRead = 0;
+
+static void
+CountFiles(const char *file)
+{
+    dwNumFiles++;
+}
+
+static void
+ReadFiles(const char *file)
+{
+    if ((dwNumFiles % dwReadEvery) == 0)
+        RNG_FileForRNG(file);
+
+    dwNumFiles++;
+}
+
+static void
+ReadSingleFile(const char *filename)
+{
+    unsigned char buffer[1024];
+    FILE *file;
+
+    file = fopen((char *)filename, "rb");
+    if (file != NULL) {
+        while (fread(buffer, 1, sizeof(buffer), file) > 0)
+            ;
+        fclose(file);
+    }
+}
+
+static void
+ReadOneFile(const char *file)
+{
+    if (dwNumFiles == dwFileToRead) {
+        ReadSingleFile(file);
+    }
+
+    dwNumFiles++;
+}
+
+static void
+ReadSystemFiles(void)
+{
+    // first count the number of files
+    dwNumFiles = 0;
+    if (!EnumSystemFiles(CountFiles))
+        return;
+
+    RNG_RandomUpdate(&dwNumFiles, sizeof(dwNumFiles));
+
+    // now read 10 files
+    if (dwNumFiles == 0)
+        return;
+
+    dwReadEvery = dwNumFiles / 10;
+    if (dwReadEvery == 0)
+        dwReadEvery = 1; // less than 10 files
+
+    dwNumFiles = 0;
+    EnumSystemFiles(ReadFiles);
+}
+
+void
+RNG_SystemInfoForRNG(void)
+{
+    unsigned long *plong = 0;
+    PTIB ptib;
+    PPIB ppib;
+    APIRET rc = NO_ERROR;
+    DATETIME dt;
+    COUNTRYCODE cc = { 0 };
+    COUNTRYINFO ci = { 0 };
+    unsigned long actual = 0;
+    char path[_MAX_PATH] = "";
+    char fullpath[_MAX_PATH] = "";
+    unsigned long pathlength = sizeof(path);
+    FSALLOCATE fsallocate;
+    FILESTATUS3 fstatus;
+    unsigned long defaultdrive = 0;
+    unsigned long logicaldrives = 0;
+    unsigned long sysInfo[QSV_MAX] = { 0 };
+    char buffer[20];
+    int nBytes = 0;
+
+    nBytes = RNG_GetNoise(buffer, sizeof(buffer));
+    RNG_RandomUpdate(buffer, nBytes);
+
+    /* allocate memory and use address and memory */
+    plong = (unsigned long *)malloc(sizeof(*plong));
+    RNG_RandomUpdate(&plong, sizeof(plong));
+    RNG_RandomUpdate(plong, sizeof(*plong));
+    free(plong);
+
+    /* process info */
+    rc = DosGetInfoBlocks(&ptib, &ppib);
+    if (rc == NO_ERROR) {
+        RNG_RandomUpdate(ptib, sizeof(*ptib));
+        RNG_RandomUpdate(ppib, sizeof(*ppib));
+    }
+
+    /* time */
+    rc = DosGetDateTime(&dt);
+    if (rc == NO_ERROR) {
+        RNG_RandomUpdate(&dt, sizeof(dt));
+    }
+
+    /* country */
+    rc = DosQueryCtryInfo(sizeof(ci), &cc, &ci, &actual);
+    if (rc == NO_ERROR) {
+        RNG_RandomUpdate(&cc, sizeof(cc));
+        RNG_RandomUpdate(&ci, sizeof(ci));
+        RNG_RandomUpdate(&actual, sizeof(actual));
+    }
+
+    /* current directory */
+    rc = DosQueryCurrentDir(0, path, &pathlength);
+    strcat(fullpath, "\\");
+    strcat(fullpath, path);
+    if (rc == NO_ERROR) {
+        RNG_RandomUpdate(fullpath, strlen(fullpath));
+        // path info
+        rc = DosQueryPathInfo(fullpath, FIL_STANDARD, &fstatus, sizeof(fstatus));
+        if (rc == NO_ERROR) {
+            RNG_RandomUpdate(&fstatus, sizeof(fstatus));
+        }
+    }
+
+    /* file system info */
+    rc = DosQueryFSInfo(0, FSIL_ALLOC, &fsallocate, sizeof(fsallocate));
+    if (rc == NO_ERROR) {
+        RNG_RandomUpdate(&fsallocate, sizeof(fsallocate));
+    }
+
+    /* drive info */
+    rc = DosQueryCurrentDisk(&defaultdrive, &logicaldrives);
+    if (rc == NO_ERROR) {
+        RNG_RandomUpdate(&defaultdrive, sizeof(defaultdrive));
+        RNG_RandomUpdate(&logicaldrives, sizeof(logicaldrives));
+    }
+
+    /* system info */
+    rc = DosQuerySysInfo(1L, QSV_MAX, (PVOID)&sysInfo, sizeof(ULONG) * QSV_MAX);
+    if (rc == NO_ERROR) {
+        RNG_RandomUpdate(&sysInfo, sizeof(sysInfo));
+    }
+
+    // now let's do some files
+    ReadSystemFiles();
+
+    /* more noise */
+    nBytes = RNG_GetNoise(buffer, sizeof(buffer));
+    RNG_RandomUpdate(buffer, nBytes);
+}
+
+void
+RNG_FileForRNG(const char *filename)
+{
+    struct stat stat_buf;
+    unsigned char buffer[1024];
+    FILE *file = 0;
+    int nBytes = 0;
+    static int totalFileBytes = 0;
+
+    if (stat((char *)filename, &stat_buf) < 0)
+        return;
+
+    RNG_RandomUpdate((unsigned char *)&stat_buf, sizeof(stat_buf));
+
+    file = fopen((char *)filename, "r");
+    if (file != NULL) {
+        for (;;) {
+            size_t bytes = fread(buffer, 1, sizeof(buffer), file);
+
+            if (bytes == 0)
+                break;
+
+            RNG_RandomUpdate(buffer, bytes);
+            totalFileBytes += bytes;
+            if (totalFileBytes > 250000)
+                break;
+        }
+        fclose(file);
+    }
+
+    nBytes = RNG_GetNoise(buffer, 20);
+    RNG_RandomUpdate(buffer, nBytes);
+}
+
+static void
+rng_systemJitter(void)
+{
+    dwNumFiles = 0;
+    EnumSystemFiles(ReadOneFile);
+    dwFileToRead++;
+    if (dwFileToRead >= dwNumFiles) {
+        dwFileToRead = 0;
+    }
+}
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+    return rng_systemFromNoise(dest, maxLen);
+}
diff --git a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
new file mode 100644
index 000000000..3c803c167
--- /dev/null
+++ b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
@@ -0,0 +1,881 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This implementation of poly1305 is by Andrew Moon
+ * (https://github.com/floodyberry/poly1305-donna) and released as public
+ * domain. It implements SIMD vectorization based on the algorithm described in
+ * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
+ * block size. */
+
+#include <emmintrin.h>
+#include <stdint.h>
+
+#include "poly1305.h"
+#include "blapii.h"
+
+#define ALIGN(x) __attribute__((aligned(x)))
+#define INLINE inline
+#define U8TO64_LE(m) (*(uint64_t *)(m))
+#define U8TO32_LE(m) (*(uint32_t *)(m))
+#define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v
+
+typedef __m128i xmmi;
+typedef unsigned __int128 uint128_t;
+
+static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = { (1 << 26) - 1, 0, (1 << 26) - 1, 0 };
+static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = { 5, 0, 5, 0 };
+static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = { (1 << 24), 0, (1 << 24), 0 };
+
+static uint128_t INLINE
+add128(uint128_t a, uint128_t b)
+{
+    return a + b;
+}
+
+static uint128_t INLINE
+add128_64(uint128_t a, uint64_t b)
+{
+    return a + b;
+}
+
+static uint128_t INLINE
+mul64x64_128(uint64_t a, uint64_t b)
+{
+    return (uint128_t)a * b;
+}
+
+static uint64_t INLINE
+lo128(uint128_t a)
+{
+    return (uint64_t)a;
+}
+
+static uint64_t INLINE
+shr128(uint128_t v, const int shift)
+{
+    return (uint64_t)(v >> shift);
+}
+
+static uint64_t INLINE
+shr128_pair(uint64_t hi, uint64_t lo, const int shift)
+{
+    return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
+}
+
+typedef struct poly1305_power_t {
+    union {
+        xmmi v;
+        uint64_t u[2];
+        uint32_t d[4];
+    } R20, R21, R22, R23, R24, S21, S22, S23, S24;
+} poly1305_power;
+
+typedef struct poly1305_state_internal_t {
+    poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 bytes of free storage */
+    union {
+        xmmi H[5]; /*  80 bytes  */
+        uint64_t HH[10];
+    };
+    /* uint64_t r0,r1,r2;       [24 bytes] */
+    /* uint64_t pad0,pad1;      [16 bytes] */
+    uint64_t started;      /*   8 bytes  */
+    uint64_t leftover;     /*   8 bytes  */
+    uint8_t buffer[64];    /*  64 bytes  */
+} poly1305_state_internal; /* 448 bytes total + 63 bytes for alignment = 511 bytes raw */
+
+static poly1305_state_internal INLINE
+    *
+    poly1305_aligned_state(poly1305_state *state)
+{
+    return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
+}
+
+/* copy 0-63 bytes */
+static void INLINE NO_SANITIZE_ALIGNMENT
+poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes)
+{
+    size_t offset = src - dst;
+    if (bytes & 32) {
+        _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0)));
+        _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(dst + offset + 16)));
+        dst += 32;
+    }
+    if (bytes & 16) {
+        _mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset)));
+        dst += 16;
+    }
+    if (bytes & 8) {
+        *(uint64_t *)dst = *(uint64_t *)(dst + offset);
+        dst += 8;
+    }
+    if (bytes & 4) {
+        *(uint32_t *)dst = *(uint32_t *)(dst + offset);
+        dst += 4;
+    }
+    if (bytes & 2) {
+        *(uint16_t *)dst = *(uint16_t *)(dst + offset);
+        dst += 2;
+    }
+    if (bytes & 1) {
+        *(uint8_t *)dst = *(uint8_t *)(dst + offset);
+    }
+}
+
+/* zero 0-15 bytes */
+static void INLINE
+poly1305_block_zero(uint8_t *dst, size_t bytes)
+{
+    if (bytes & 8) {
+        *(uint64_t *)dst = 0;
+        dst += 8;
+    }
+    if (bytes & 4) {
+        *(uint32_t *)dst = 0;
+        dst += 4;
+    }
+    if (bytes & 2) {
+        *(uint16_t *)dst = 0;
+        dst += 2;
+    }
+    if (bytes & 1) {
+        *(uint8_t *)dst = 0;
+    }
+}
+
+static size_t INLINE
+poly1305_min(size_t a, size_t b)
+{
+    return (a < b) ? a : b;
+}
+
+void
+Poly1305Init(poly1305_state *state, const unsigned char key[32])
+{
+    poly1305_state_internal *st = poly1305_aligned_state(state);
+    poly1305_power *p;
+    uint64_t r0, r1, r2;
+    uint64_t t0, t1;
+
+    /* clamp key */
+    t0 = U8TO64_LE(key + 0);
+    t1 = U8TO64_LE(key + 8);
+    r0 = t0 & 0xffc0fffffff;
+    t0 >>= 44;
+    t0 |= t1 << 20;
+    r1 = t0 & 0xfffffc0ffff;
+    t1 >>= 24;
+    r2 = t1 & 0x00ffffffc0f;
+
+    /* store r in un-used space of st->P[1] */
+    p = &st->P[1];
+    p->R20.d[1] = (uint32_t)(r0);
+    p->R20.d[3] = (uint32_t)(r0 >> 32);
+    p->R21.d[1] = (uint32_t)(r1);
+    p->R21.d[3] = (uint32_t)(r1 >> 32);
+    p->R22.d[1] = (uint32_t)(r2);
+    p->R22.d[3] = (uint32_t)(r2 >> 32);
+
+    /* store pad */
+    p->R23.d[1] = U8TO32_LE(key + 16);
+    p->R23.d[3] = U8TO32_LE(key + 20);
+    p->R24.d[1] = U8TO32_LE(key + 24);
+    p->R24.d[3] = U8TO32_LE(key + 28);
+
+    /* H = 0 */
+    st->H[0] = _mm_setzero_si128();
+    st->H[1] = _mm_setzero_si128();
+    st->H[2] = _mm_setzero_si128();
+    st->H[3] = _mm_setzero_si128();
+    st->H[4] = _mm_setzero_si128();
+
+    st->started = 0;
+    st->leftover = 0;
+}
+
+static void
+poly1305_first_block(poly1305_state_internal *st, const uint8_t *m)
+{
+    const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
+    const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
+    const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
+    xmmi T5, T6;
+    poly1305_power *p;
+    uint128_t d[3];
+    uint64_t r0, r1, r2;
+    uint64_t r20, r21, r22, s22;
+    uint64_t pad0, pad1;
+    uint64_t c;
+    uint64_t i;
+
+    /* pull out stored info */
+    p = &st->P[1];
+
+    r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
+    r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
+    r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
+    pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
+    pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
+
+    /* compute powers r^2,r^4 */
+    r20 = r0;
+    r21 = r1;
+    r22 = r2;
+    for (i = 0; i < 2; i++) {
+        s22 = r22 * (5 << 2);
+
+        d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22));
+        d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21));
+        d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20));
+
+        r20 = lo128(d[0]) & 0xfffffffffff;
+        c = shr128(d[0], 44);
+        d[1] = add128_64(d[1], c);
+        r21 = lo128(d[1]) & 0xfffffffffff;
+        c = shr128(d[1], 44);
+        d[2] = add128_64(d[2], c);
+        r22 = lo128(d[2]) & 0x3ffffffffff;
+        c = shr128(d[2], 42);
+        r20 += c * 5;
+        c = (r20 >> 44);
+        r20 = r20 & 0xfffffffffff;
+        r21 += c;
+
+        p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
+        p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
+        p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
+        p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
+        p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))), _MM_SHUFFLE(1, 0, 1, 0));
+        p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
+        p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
+        p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
+        p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
+        p--;
+    }
+
+    /* put saved info back */
+    p = &st->P[1];
+    p->R20.d[1] = (uint32_t)(r0);
+    p->R20.d[3] = (uint32_t)(r0 >> 32);
+    p->R21.d[1] = (uint32_t)(r1);
+    p->R21.d[3] = (uint32_t)(r1 >> 32);
+    p->R22.d[1] = (uint32_t)(r2);
+    p->R22.d[3] = (uint32_t)(r2 >> 32);
+    p->R23.d[1] = (uint32_t)(pad0);
+    p->R23.d[3] = (uint32_t)(pad0 >> 32);
+    p->R24.d[1] = (uint32_t)(pad1);
+    p->R24.d[3] = (uint32_t)(pad1 >> 32);
+
+    /* H = [Mx,My] */
+    T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
+    T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
+    st->H[0] = _mm_and_si128(MMASK, T5);
+    st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+    T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
+    st->H[2] = _mm_and_si128(MMASK, T5);
+    st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+    st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
+}
+
+static void
+poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
+{
+    const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
+    const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
+    const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
+
+    poly1305_power *p;
+    xmmi H0, H1, H2, H3, H4;
+    xmmi T0, T1, T2, T3, T4, T5, T6;
+    xmmi M0, M1, M2, M3, M4;
+    xmmi C1, C2;
+
+    H0 = st->H[0];
+    H1 = st->H[1];
+    H2 = st->H[2];
+    H3 = st->H[3];
+    H4 = st->H[4];
+
+    while (bytes >= 64) {
+        /* H *= [r^4,r^4] */
+        p = &st->P[0];
+        T0 = _mm_mul_epu32(H0, p->R20.v);
+        T1 = _mm_mul_epu32(H0, p->R21.v);
+        T2 = _mm_mul_epu32(H0, p->R22.v);
+        T3 = _mm_mul_epu32(H0, p->R23.v);
+        T4 = _mm_mul_epu32(H0, p->R24.v);
+        T5 = _mm_mul_epu32(H1, p->S24.v);
+        T6 = _mm_mul_epu32(H1, p->R20.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(H2, p->S23.v);
+        T6 = _mm_mul_epu32(H2, p->S24.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(H3, p->S22.v);
+        T6 = _mm_mul_epu32(H3, p->S23.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(H4, p->S21.v);
+        T6 = _mm_mul_epu32(H4, p->S22.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(H1, p->R21.v);
+        T6 = _mm_mul_epu32(H1, p->R22.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(H2, p->R20.v);
+        T6 = _mm_mul_epu32(H2, p->R21.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(H3, p->S24.v);
+        T6 = _mm_mul_epu32(H3, p->R20.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(H4, p->S23.v);
+        T6 = _mm_mul_epu32(H4, p->S24.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(H1, p->R23.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(H2, p->R22.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(H3, p->R21.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(H4, p->R20.v);
+        T4 = _mm_add_epi64(T4, T5);
+
+        /* H += [Mx,My]*[r^2,r^2] */
+        T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
+        T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
+        M0 = _mm_and_si128(MMASK, T5);
+        M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+        T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
+        M2 = _mm_and_si128(MMASK, T5);
+        M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+        M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
+
+        p = &st->P[1];
+        T5 = _mm_mul_epu32(M0, p->R20.v);
+        T6 = _mm_mul_epu32(M0, p->R21.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(M1, p->S24.v);
+        T6 = _mm_mul_epu32(M1, p->R20.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(M2, p->S23.v);
+        T6 = _mm_mul_epu32(M2, p->S24.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(M3, p->S22.v);
+        T6 = _mm_mul_epu32(M3, p->S23.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(M4, p->S21.v);
+        T6 = _mm_mul_epu32(M4, p->S22.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(M0, p->R22.v);
+        T6 = _mm_mul_epu32(M0, p->R23.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(M1, p->R21.v);
+        T6 = _mm_mul_epu32(M1, p->R22.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(M2, p->R20.v);
+        T6 = _mm_mul_epu32(M2, p->R21.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(M3, p->S24.v);
+        T6 = _mm_mul_epu32(M3, p->R20.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(M4, p->S23.v);
+        T6 = _mm_mul_epu32(M4, p->S24.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(M0, p->R24.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(M1, p->R23.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(M2, p->R22.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(M3, p->R21.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(M4, p->R20.v);
+        T4 = _mm_add_epi64(T4, T5);
+
+        /* H += [Mx,My] */
+        T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_loadl_epi64((xmmi *)(m + 48)));
+        T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_loadl_epi64((xmmi *)(m + 56)));
+        M0 = _mm_and_si128(MMASK, T5);
+        M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+        T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
+        M2 = _mm_and_si128(MMASK, T5);
+        M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+        M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
+
+        T0 = _mm_add_epi64(T0, M0);
+        T1 = _mm_add_epi64(T1, M1);
+        T2 = _mm_add_epi64(T2, M2);
+        T3 = _mm_add_epi64(T3, M3);
+        T4 = _mm_add_epi64(T4, M4);
+
+        /* reduce */
+        C1 = _mm_srli_epi64(T0, 26);
+        C2 = _mm_srli_epi64(T3, 26);
+        T0 = _mm_and_si128(T0, MMASK);
+        T3 = _mm_and_si128(T3, MMASK);
+        T1 = _mm_add_epi64(T1, C1);
+        T4 = _mm_add_epi64(T4, C2);
+        C1 = _mm_srli_epi64(T1, 26);
+        C2 = _mm_srli_epi64(T4, 26);
+        T1 = _mm_and_si128(T1, MMASK);
+        T4 = _mm_and_si128(T4, MMASK);
+        T2 = _mm_add_epi64(T2, C1);
+        T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
+        C1 = _mm_srli_epi64(T2, 26);
+        C2 = _mm_srli_epi64(T0, 26);
+        T2 = _mm_and_si128(T2, MMASK);
+        T0 = _mm_and_si128(T0, MMASK);
+        T3 = _mm_add_epi64(T3, C1);
+        T1 = _mm_add_epi64(T1, C2);
+        C1 = _mm_srli_epi64(T3, 26);
+        T3 = _mm_and_si128(T3, MMASK);
+        T4 = _mm_add_epi64(T4, C1);
+
+        /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
+        H0 = T0;
+        H1 = T1;
+        H2 = T2;
+        H3 = T3;
+        H4 = T4;
+
+        m += 64;
+        bytes -= 64;
+    }
+
+    st->H[0] = H0;
+    st->H[1] = H1;
+    st->H[2] = H2;
+    st->H[3] = H3;
+    st->H[4] = H4;
+}
+
+static size_t
+poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
+{
+    const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
+    const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
+    const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
+
+    poly1305_power *p;
+    xmmi H0, H1, H2, H3, H4;
+    xmmi M0, M1, M2, M3, M4;
+    xmmi T0, T1, T2, T3, T4, T5, T6;
+    xmmi C1, C2;
+
+    uint64_t r0, r1, r2;
+    uint64_t t0, t1, t2, t3, t4;
+    uint64_t c;
+    size_t consumed = 0;
+
+    H0 = st->H[0];
+    H1 = st->H[1];
+    H2 = st->H[2];
+    H3 = st->H[3];
+    H4 = st->H[4];
+
+    /* p = [r^2,r^2] */
+    p = &st->P[1];
+
+    if (bytes >= 32) {
+        /* H *= [r^2,r^2] */
+        T0 = _mm_mul_epu32(H0, p->R20.v);
+        T1 = _mm_mul_epu32(H0, p->R21.v);
+        T2 = _mm_mul_epu32(H0, p->R22.v);
+        T3 = _mm_mul_epu32(H0, p->R23.v);
+        T4 = _mm_mul_epu32(H0, p->R24.v);
+        T5 = _mm_mul_epu32(H1, p->S24.v);
+        T6 = _mm_mul_epu32(H1, p->R20.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(H2, p->S23.v);
+        T6 = _mm_mul_epu32(H2, p->S24.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(H3, p->S22.v);
+        T6 = _mm_mul_epu32(H3, p->S23.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(H4, p->S21.v);
+        T6 = _mm_mul_epu32(H4, p->S22.v);
+        T0 = _mm_add_epi64(T0, T5);
+        T1 = _mm_add_epi64(T1, T6);
+        T5 = _mm_mul_epu32(H1, p->R21.v);
+        T6 = _mm_mul_epu32(H1, p->R22.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(H2, p->R20.v);
+        T6 = _mm_mul_epu32(H2, p->R21.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(H3, p->S24.v);
+        T6 = _mm_mul_epu32(H3, p->R20.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(H4, p->S23.v);
+        T6 = _mm_mul_epu32(H4, p->S24.v);
+        T2 = _mm_add_epi64(T2, T5);
+        T3 = _mm_add_epi64(T3, T6);
+        T5 = _mm_mul_epu32(H1, p->R23.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(H2, p->R22.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(H3, p->R21.v);
+        T4 = _mm_add_epi64(T4, T5);
+        T5 = _mm_mul_epu32(H4, p->R20.v);
+        T4 = _mm_add_epi64(T4, T5);
+
+        /* H += [Mx,My] */
+        T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
+        T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
+        M0 = _mm_and_si128(MMASK, T5);
+        M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+        T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
+        M2 = _mm_and_si128(MMASK, T5);
+        M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+        M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
+
+        T0 = _mm_add_epi64(T0, M0);
+        T1 = _mm_add_epi64(T1, M1);
+        T2 = _mm_add_epi64(T2, M2);
+        T3 = _mm_add_epi64(T3, M3);
+        T4 = _mm_add_epi64(T4, M4);
+
+        /* reduce */
+        C1 = _mm_srli_epi64(T0, 26);
+        C2 = _mm_srli_epi64(T3, 26);
+        T0 = _mm_and_si128(T0, MMASK);
+        T3 = _mm_and_si128(T3, MMASK);
+        T1 = _mm_add_epi64(T1, C1);
+        T4 = _mm_add_epi64(T4, C2);
+        C1 = _mm_srli_epi64(T1, 26);
+        C2 = _mm_srli_epi64(T4, 26);
+        T1 = _mm_and_si128(T1, MMASK);
+        T4 = _mm_and_si128(T4, MMASK);
+        T2 = _mm_add_epi64(T2, C1);
+        T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
+        C1 = _mm_srli_epi64(T2, 26);
+        C2 = _mm_srli_epi64(T0, 26);
+        T2 = _mm_and_si128(T2, MMASK);
+        T0 = _mm_and_si128(T0, MMASK);
+        T3 = _mm_add_epi64(T3, C1);
+        T1 = _mm_add_epi64(T1, C2);
+        C1 = _mm_srli_epi64(T3, 26);
+        T3 = _mm_and_si128(T3, MMASK);
+        T4 = _mm_add_epi64(T4, C1);
+
+        /* H = (H*[r^2,r^2] + [Mx,My]) */
+        H0 = T0;
+        H1 = T1;
+        H2 = T2;
+        H3 = T3;
+        H4 = T4;
+
+        consumed = 32;
+    }
+
+    /* finalize, H *= [r^2,r] */
+    r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
+    r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
+    r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
+
+    p->R20.d[2] = (uint32_t)(r0)&0x3ffffff;
+    p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
+    p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
+    p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
+    p->R24.d[2] = (uint32_t)((r2 >> 16));
+    p->S21.d[2] = p->R21.d[2] * 5;
+    p->S22.d[2] = p->R22.d[2] * 5;
+    p->S23.d[2] = p->R23.d[2] * 5;
+    p->S24.d[2] = p->R24.d[2] * 5;
+
+    /* H *= [r^2,r] */
+    T0 = _mm_mul_epu32(H0, p->R20.v);
+    T1 = _mm_mul_epu32(H0, p->R21.v);
+    T2 = _mm_mul_epu32(H0, p->R22.v);
+    T3 = _mm_mul_epu32(H0, p->R23.v);
+    T4 = _mm_mul_epu32(H0, p->R24.v);
+    T5 = _mm_mul_epu32(H1, p->S24.v);
+    T6 = _mm_mul_epu32(H1, p->R20.v);
+    T0 = _mm_add_epi64(T0, T5);
+    T1 = _mm_add_epi64(T1, T6);
+    T5 = _mm_mul_epu32(H2, p->S23.v);
+    T6 = _mm_mul_epu32(H2, p->S24.v);
+    T0 = _mm_add_epi64(T0, T5);
+    T1 = _mm_add_epi64(T1, T6);
+    T5 = _mm_mul_epu32(H3, p->S22.v);
+    T6 = _mm_mul_epu32(H3, p->S23.v);
+    T0 = _mm_add_epi64(T0, T5);
+    T1 = _mm_add_epi64(T1, T6);
+    T5 = _mm_mul_epu32(H4, p->S21.v);
+    T6 = _mm_mul_epu32(H4, p->S22.v);
+    T0 = _mm_add_epi64(T0, T5);
+    T1 = _mm_add_epi64(T1, T6);
+    T5 = _mm_mul_epu32(H1, p->R21.v);
+    T6 = _mm_mul_epu32(H1, p->R22.v);
+    T2 = _mm_add_epi64(T2, T5);
+    T3 = _mm_add_epi64(T3, T6);
+    T5 = _mm_mul_epu32(H2, p->R20.v);
+    T6 = _mm_mul_epu32(H2, p->R21.v);
+    T2 = _mm_add_epi64(T2, T5);
+    T3 = _mm_add_epi64(T3, T6);
+    T5 = _mm_mul_epu32(H3, p->S24.v);
+    T6 = _mm_mul_epu32(H3, p->R20.v);
+    T2 = _mm_add_epi64(T2, T5);
+    T3 = _mm_add_epi64(T3, T6);
+    T5 = _mm_mul_epu32(H4, p->S23.v);
+    T6 = _mm_mul_epu32(H4, p->S24.v);
+    T2 = _mm_add_epi64(T2, T5);
+    T3 = _mm_add_epi64(T3, T6);
+    T5 = _mm_mul_epu32(H1, p->R23.v);
+    T4 = _mm_add_epi64(T4, T5);
+    T5 = _mm_mul_epu32(H2, p->R22.v);
+    T4 = _mm_add_epi64(T4, T5);
+    T5 = _mm_mul_epu32(H3, p->R21.v);
+    T4 = _mm_add_epi64(T4, T5);
+    T5 = _mm_mul_epu32(H4, p->R20.v);
+    T4 = _mm_add_epi64(T4, T5);
+
+    C1 = _mm_srli_epi64(T0, 26);
+    C2 = _mm_srli_epi64(T3, 26);
+    T0 = _mm_and_si128(T0, MMASK);
+    T3 = _mm_and_si128(T3, MMASK);
+    T1 = _mm_add_epi64(T1, C1);
+    T4 = _mm_add_epi64(T4, C2);
+    C1 = _mm_srli_epi64(T1, 26);
+    C2 = _mm_srli_epi64(T4, 26);
+    T1 = _mm_and_si128(T1, MMASK);
+    T4 = _mm_and_si128(T4, MMASK);
+    T2 = _mm_add_epi64(T2, C1);
+    T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
+    C1 = _mm_srli_epi64(T2, 26);
+    C2 = _mm_srli_epi64(T0, 26);
+    T2 = _mm_and_si128(T2, MMASK);
+    T0 = _mm_and_si128(T0, MMASK);
+    T3 = _mm_add_epi64(T3, C1);
+    T1 = _mm_add_epi64(T1, C2);
+    C1 = _mm_srli_epi64(T3, 26);
+    T3 = _mm_and_si128(T3, MMASK);
+    T4 = _mm_add_epi64(T4, C1);
+
+    /* H = H[0]+H[1] */
+    H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
+    H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
+    H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
+    H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
+    H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
+
+    t0 = _mm_cvtsi128_si32(H0);
+    c = (t0 >> 26);
+    t0 &= 0x3ffffff;
+    t1 = _mm_cvtsi128_si32(H1) + c;
+    c = (t1 >> 26);
+    t1 &= 0x3ffffff;
+    t2 = _mm_cvtsi128_si32(H2) + c;
+    c = (t2 >> 26);
+    t2 &= 0x3ffffff;
+    t3 = _mm_cvtsi128_si32(H3) + c;
+    c = (t3 >> 26);
+    t3 &= 0x3ffffff;
+    t4 = _mm_cvtsi128_si32(H4) + c;
+    c = (t4 >> 26);
+    t4 &= 0x3ffffff;
+    t0 = t0 + (c * 5);
+    c = (t0 >> 26);
+    t0 &= 0x3ffffff;
+    t1 = t1 + c;
+
+    st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull;
+    st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
+    st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull;
+
+    return consumed;
+}
+
+void
+Poly1305Update(poly1305_state *state, const unsigned char *m, size_t bytes)
+{
+    poly1305_state_internal *st = poly1305_aligned_state(state);
+    size_t want;
+
+    /* need at least 32 initial bytes to start the accelerated branch */
+    if (!st->started) {
+        if ((st->leftover == 0) && (bytes > 32)) {
+            poly1305_first_block(st, m);
+            m += 32;
+            bytes -= 32;
+        } else {
+            want = poly1305_min(32 - st->leftover, bytes);
+            poly1305_block_copy(st->buffer + st->leftover, m, want);
+            bytes -= want;
+            m += want;
+            st->leftover += want;
+            if ((st->leftover < 32) || (bytes == 0))
+                return;
+            poly1305_first_block(st, st->buffer);
+            st->leftover = 0;
+        }
+        st->started = 1;
+    }
+
+    /* handle leftover */
+    if (st->leftover) {
+        want = poly1305_min(64 - st->leftover, bytes);
+        poly1305_block_copy(st->buffer + st->leftover, m, want);
+        bytes -= want;
+        m += want;
+        st->leftover += want;
+        if (st->leftover < 64)
+            return;
+        poly1305_blocks(st, st->buffer, 64);
+        st->leftover = 0;
+    }
+
+    /* process 64 byte blocks */
+    if (bytes >= 64) {
+        want = (bytes & ~63);
+        poly1305_blocks(st, m, want);
+        m += want;
+        bytes -= want;
+    }
+
+    if (bytes) {
+        poly1305_block_copy(st->buffer + st->leftover, m, bytes);
+        st->leftover += bytes;
+    }
+}
+
+void
+Poly1305Finish(poly1305_state *state, unsigned char mac[16])
+{
+    poly1305_state_internal *st = poly1305_aligned_state(state);
+    size_t leftover = st->leftover;
+    uint8_t *m = st->buffer;
+    uint128_t d[3];
+    uint64_t h0, h1, h2;
+    uint64_t t0, t1;
+    uint64_t g0, g1, g2, c, nc;
+    uint64_t r0, r1, r2, s1, s2;
+    poly1305_power *p;
+
+    if (st->started) {
+        size_t consumed = poly1305_combine(st, m, leftover);
+        leftover -= consumed;
+        m += consumed;
+    }
+
+    /* st->HH will either be 0 or have the combined result */
+    h0 = st->HH[0];
+    h1 = st->HH[1];
+    h2 = st->HH[2];
+
+    p = &st->P[1];
+    r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
+    r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
+    r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
+    s1 = r1 * (5 << 2);
+    s2 = r2 * (5 << 2);
+
+    if (leftover < 16)
+        goto poly1305_donna_atmost15bytes;
+
+poly1305_donna_atleast16bytes:
+    t0 = U8TO64_LE(m + 0);
+    t1 = U8TO64_LE(m + 8);
+    h0 += t0 & 0xfffffffffff;
+    t0 = shr128_pair(t1, t0, 44);
+    h1 += t0 & 0xfffffffffff;
+    h2 += (t1 >> 24) | ((uint64_t)1 << 40);
+
+poly1305_donna_mul:
+    d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x64_128(h2, s1));
+    d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x64_128(h2, s2));
+    d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x64_128(h2, r0));
+    h0 = lo128(d[0]) & 0xfffffffffff;
+    c = shr128(d[0], 44);
+    d[1] = add128_64(d[1], c);
+    h1 = lo128(d[1]) & 0xfffffffffff;
+    c = shr128(d[1], 44);
+    d[2] = add128_64(d[2], c);
+    h2 = lo128(d[2]) & 0x3ffffffffff;
+    c = shr128(d[2], 42);
+    h0 += c * 5;
+
+    m += 16;
+    leftover -= 16;
+    if (leftover >= 16)
+        goto poly1305_donna_atleast16bytes;
+
+/* final bytes */
+poly1305_donna_atmost15bytes:
+    if (!leftover)
+        goto poly1305_donna_finish;
+
+    m[leftover++] = 1;
+    poly1305_block_zero(m + leftover, 16 - leftover);
+    leftover = 16;
+
+    t0 = U8TO64_LE(m + 0);
+    t1 = U8TO64_LE(m + 8);
+    h0 += t0 & 0xfffffffffff;
+    t0 = shr128_pair(t1, t0, 44);
+    h1 += t0 & 0xfffffffffff;
+    h2 += (t1 >> 24);
+
+    goto poly1305_donna_mul;
+
+poly1305_donna_finish:
+    c = (h0 >> 44);
+    h0 &= 0xfffffffffff;
+    h1 += c;
+    c = (h1 >> 44);
+    h1 &= 0xfffffffffff;
+    h2 += c;
+    c = (h2 >> 42);
+    h2 &= 0x3ffffffffff;
+    h0 += c * 5;
+
+    g0 = h0 + 5;
+    c = (g0 >> 44);
+    g0 &= 0xfffffffffff;
+    g1 = h1 + c;
+    c = (g1 >> 44);
+    g1 &= 0xfffffffffff;
+    g2 = h2 + c - ((uint64_t)1 << 42);
+
+    c = (g2 >> 63) - 1;
+    nc = ~c;
+    h0 = (h0 & nc) | (g0 & c);
+    h1 = (h1 & nc) | (g1 & c);
+    h2 = (h2 & nc) | (g2 & c);
+
+    /* pad */
+    t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
+    t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
+    h0 += (t0 & 0xfffffffffff);
+    c = (h0 >> 44);
+    h0 &= 0xfffffffffff;
+    t0 = shr128_pair(t1, t0, 44);
+    h1 += (t0 & 0xfffffffffff) + c;
+    c = (h1 >> 44);
+    h1 &= 0xfffffffffff;
+    t1 = (t1 >> 24);
+    h2 += (t1) + c;
+
+    U64TO8_LE(mac + 0, ((h0) | (h1 << 44)));
+    U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
+}
diff --git a/security/nss/lib/freebl/poly1305.c b/security/nss/lib/freebl/poly1305.c
new file mode 100644
index 000000000..eb3e3cd55
--- /dev/null
+++ b/security/nss/lib/freebl/poly1305.c
@@ -0,0 +1,314 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This implementation of poly1305 is by Andrew Moon
+ * (https://github.com/floodyberry/poly1305-donna) and released as public
+ * domain. */
+
+#include <string.h>
+
+#include "poly1305.h"
+
+#if defined(_MSC_VER) && _MSC_VER < 1600
+#include "prtypes.h"
+typedef PRUint32 uint32_t;
+typedef PRUint64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+#if defined(NSS_X86) || defined(NSS_X64)
+/* We can assume little-endian. */
+static uint32_t
+U8TO32_LE(const unsigned char *m)
+{
+    uint32_t r;
+    memcpy(&r, m, sizeof(r));
+    return r;
+}
+
+static void
+U32TO8_LE(unsigned char *m, uint32_t v)
+{
+    memcpy(m, &v, sizeof(v));
+}
+#else
+static uint32_t
+U8TO32_LE(const unsigned char *m)
+{
+    return (uint32_t)m[0] |
+           (uint32_t)m[1] << 8 |
+           (uint32_t)m[2] << 16 |
+           (uint32_t)m[3] << 24;
+}
+
+static void
+U32TO8_LE(unsigned char *m, uint32_t v)
+{
+    m[0] = v;
+    m[1] = v >> 8;
+    m[2] = v >> 16;
+    m[3] = v >> 24;
+}
+#endif
+
+static uint64_t
+mul32x32_64(uint32_t a, uint32_t b)
+{
+    return (uint64_t)a * b;
+}
+
+struct poly1305_state_st {
+    uint32_t r0, r1, r2, r3, r4;
+    uint32_t s1, s2, s3, s4;
+    uint32_t h0, h1, h2, h3, h4;
+    unsigned char buf[16];
+    unsigned int buf_used;
+    unsigned char key[16];
+};
+
+/* update updates |state| given some amount of input data. This function may
+ * only be called with a |len| that is not a multiple of 16 at the end of the
+ * data. Otherwise the input must be buffered into 16 byte blocks. */
+static void
+update(struct poly1305_state_st *state, const unsigned char *in,
+       size_t len)
+{
+    uint32_t t0, t1, t2, t3;
+    uint64_t t[5];
+    uint32_t b;
+    uint64_t c;
+    size_t j;
+    unsigned char mp[16];
+
+    if (len < 16)
+        goto poly1305_donna_atmost15bytes;
+
+poly1305_donna_16bytes:
+    t0 = U8TO32_LE(in);
+    t1 = U8TO32_LE(in + 4);
+    t2 = U8TO32_LE(in + 8);
+    t3 = U8TO32_LE(in + 12);
+
+    in += 16;
+    len -= 16;
+
+    state->h0 += t0 & 0x3ffffff;
+    state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
+    state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
+    state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
+    state->h4 += (t3 >> 8) | (1 << 24);
+
+poly1305_donna_mul:
+    t[0] = mul32x32_64(state->h0, state->r0) +
+           mul32x32_64(state->h1, state->s4) +
+           mul32x32_64(state->h2, state->s3) +
+           mul32x32_64(state->h3, state->s2) +
+           mul32x32_64(state->h4, state->s1);
+    t[1] = mul32x32_64(state->h0, state->r1) +
+           mul32x32_64(state->h1, state->r0) +
+           mul32x32_64(state->h2, state->s4) +
+           mul32x32_64(state->h3, state->s3) +
+           mul32x32_64(state->h4, state->s2);
+    t[2] = mul32x32_64(state->h0, state->r2) +
+           mul32x32_64(state->h1, state->r1) +
+           mul32x32_64(state->h2, state->r0) +
+           mul32x32_64(state->h3, state->s4) +
+           mul32x32_64(state->h4, state->s3);
+    t[3] = mul32x32_64(state->h0, state->r3) +
+           mul32x32_64(state->h1, state->r2) +
+           mul32x32_64(state->h2, state->r1) +
+           mul32x32_64(state->h3, state->r0) +
+           mul32x32_64(state->h4, state->s4);
+    t[4] = mul32x32_64(state->h0, state->r4) +
+           mul32x32_64(state->h1, state->r3) +
+           mul32x32_64(state->h2, state->r2) +
+           mul32x32_64(state->h3, state->r1) +
+           mul32x32_64(state->h4, state->r0);
+
+    state->h0 = (uint32_t)t[0] & 0x3ffffff;
+    c = (t[0] >> 26);
+    t[1] += c;
+    state->h1 = (uint32_t)t[1] & 0x3ffffff;
+    b = (uint32_t)(t[1] >> 26);
+    t[2] += b;
+    state->h2 = (uint32_t)t[2] & 0x3ffffff;
+    b = (uint32_t)(t[2] >> 26);
+    t[3] += b;
+    state->h3 = (uint32_t)t[3] & 0x3ffffff;
+    b = (uint32_t)(t[3] >> 26);
+    t[4] += b;
+    state->h4 = (uint32_t)t[4] & 0x3ffffff;
+    b = (uint32_t)(t[4] >> 26);
+    state->h0 += b * 5;
+
+    if (len >= 16)
+        goto poly1305_donna_16bytes;
+
+/* final bytes */
+poly1305_donna_atmost15bytes:
+    if (!len)
+        return;
+
+    for (j = 0; j < len; j++)
+        mp[j] = in[j];
+    mp[j++] = 1;
+    for (; j < 16; j++)
+        mp[j] = 0;
+    len = 0;
+
+    t0 = U8TO32_LE(mp + 0);
+    t1 = U8TO32_LE(mp + 4);
+    t2 = U8TO32_LE(mp + 8);
+    t3 = U8TO32_LE(mp + 12);
+
+    state->h0 += t0 & 0x3ffffff;
+    state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
+    state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
+    state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
+    state->h4 += (t3 >> 8);
+
+    goto poly1305_donna_mul;
+}
+
+void
+Poly1305Init(poly1305_state *statep, const unsigned char key[32])
+{
+    struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
+    uint32_t t0, t1, t2, t3;
+
+    t0 = U8TO32_LE(key + 0);
+    t1 = U8TO32_LE(key + 4);
+    t2 = U8TO32_LE(key + 8);
+    t3 = U8TO32_LE(key + 12);
+
+    /* precompute multipliers */
+    state->r0 = t0 & 0x3ffffff;
+    t0 >>= 26;
+    t0 |= t1 << 6;
+    state->r1 = t0 & 0x3ffff03;
+    t1 >>= 20;
+    t1 |= t2 << 12;
+    state->r2 = t1 & 0x3ffc0ff;
+    t2 >>= 14;
+    t2 |= t3 << 18;
+    state->r3 = t2 & 0x3f03fff;
+    t3 >>= 8;
+    state->r4 = t3 & 0x00fffff;
+
+    state->s1 = state->r1 * 5;
+    state->s2 = state->r2 * 5;
+    state->s3 = state->r3 * 5;
+    state->s4 = state->r4 * 5;
+
+    /* init state */
+    state->h0 = 0;
+    state->h1 = 0;
+    state->h2 = 0;
+    state->h3 = 0;
+    state->h4 = 0;
+
+    state->buf_used = 0;
+    memcpy(state->key, key + 16, sizeof(state->key));
+}
+
+void
+Poly1305Update(poly1305_state *statep, const unsigned char *in,
+               size_t in_len)
+{
+    unsigned int i;
+    struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
+
+    if (state->buf_used) {
+        unsigned int todo = 16 - state->buf_used;
+        if (todo > in_len)
+            todo = in_len;
+        for (i = 0; i < todo; i++)
+            state->buf[state->buf_used + i] = in[i];
+        state->buf_used += todo;
+        in_len -= todo;
+        in += todo;
+
+        if (state->buf_used == 16) {
+            update(state, state->buf, 16);
+            state->buf_used = 0;
+        }
+    }
+
+    if (in_len >= 16) {
+        size_t todo = in_len & ~0xf;
+        update(state, in, todo);
+        in += todo;
+        in_len &= 0xf;
+    }
+
+    if (in_len) {
+        for (i = 0; i < in_len; i++)
+            state->buf[i] = in[i];
+        state->buf_used = in_len;
+    }
+}
+
+void
+Poly1305Finish(poly1305_state *statep, unsigned char mac[16])
+{
+    struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
+    uint64_t f0, f1, f2, f3;
+    uint32_t g0, g1, g2, g3, g4;
+    uint32_t b, nb;
+
+    if (state->buf_used)
+        update(state, state->buf, state->buf_used);
+
+    b = state->h0 >> 26;
+    state->h0 = state->h0 & 0x3ffffff;
+    state->h1 += b;
+    b = state->h1 >> 26;
+    state->h1 = state->h1 & 0x3ffffff;
+    state->h2 += b;
+    b = state->h2 >> 26;
+    state->h2 = state->h2 & 0x3ffffff;
+    state->h3 += b;
+    b = state->h3 >> 26;
+    state->h3 = state->h3 & 0x3ffffff;
+    state->h4 += b;
+    b = state->h4 >> 26;
+    state->h4 = state->h4 & 0x3ffffff;
+    state->h0 += b * 5;
+
+    g0 = state->h0 + 5;
+    b = g0 >> 26;
+    g0 &= 0x3ffffff;
+    g1 = state->h1 + b;
+    b = g1 >> 26;
+    g1 &= 0x3ffffff;
+    g2 = state->h2 + b;
+    b = g2 >> 26;
+    g2 &= 0x3ffffff;
+    g3 = state->h3 + b;
+    b = g3 >> 26;
+    g3 &= 0x3ffffff;
+    g4 = state->h4 + b - (1 << 26);
+
+    b = (g4 >> 31) - 1;
+    nb = ~b;
+    state->h0 = (state->h0 & nb) | (g0 & b);
+    state->h1 = (state->h1 & nb) | (g1 & b);
+    state->h2 = (state->h2 & nb) | (g2 & b);
+    state->h3 = (state->h3 & nb) | (g3 & b);
+    state->h4 = (state->h4 & nb) | (g4 & b);
+
+    f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]);
+    f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&state->key[4]);
+    f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&state->key[8]);
+    f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&state->key[12]);
+
+    U32TO8_LE(&mac[0], (uint32_t)f0);
+    f1 += (f0 >> 32);
+    U32TO8_LE(&mac[4], (uint32_t)f1);
+    f2 += (f1 >> 32);
+    U32TO8_LE(&mac[8], (uint32_t)f2);
+    f3 += (f2 >> 32);
+    U32TO8_LE(&mac[12], (uint32_t)f3);
+}
diff --git a/security/nss/lib/freebl/poly1305.h b/security/nss/lib/freebl/poly1305.h
new file mode 100644
index 000000000..0a463483f
--- /dev/null
+++ b/security/nss/lib/freebl/poly1305.h
@@ -0,0 +1,28 @@
+/*
+ * poly1305.h - header file for Poly1305 implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef FREEBL_POLY1305_H_
+#define FREEBL_POLY1305_H_
+
+typedef unsigned char poly1305_state[512];
+
+/* Poly1305Init sets up |state| so that it can be used to calculate an
+ * authentication tag with the one-time key |key|. Note that |key| is a
+ * one-time key and therefore there is no `reset' method because that would
+ * enable several messages to be authenticated with the same key. */
+extern void Poly1305Init(poly1305_state* state, const unsigned char key[32]);
+
+/* Poly1305Update processes |in_len| bytes from |in|. It can be called zero or
+ * more times after poly1305_init. */
+extern void Poly1305Update(poly1305_state* state, const unsigned char* in,
+                           size_t inLen);
+
+/* Poly1305Finish completes the poly1305 calculation and writes a 16 byte
+ * authentication tag to |mac|. */
+extern void Poly1305Finish(poly1305_state* state, unsigned char mac[16]);
+
+#endif /* FREEBL_POLY1305_H_ */
diff --git a/security/nss/lib/freebl/pqg.c b/security/nss/lib/freebl/pqg.c
new file mode 100644
index 000000000..2f24afd24
--- /dev/null
+++ b/security/nss/lib/freebl/pqg.c
@@ -0,0 +1,1878 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * PQG parameter generation/verification.  Based on FIPS 186-3.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "secitem.h"
+#include "mpi.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include "secmpi.h"
+
+#define MAX_ITERATIONS 1000 /* Maximum number of iterations of primegen */
+
+typedef enum {
+    FIPS186_1_TYPE,   /* Probablistic */
+    FIPS186_3_TYPE,   /* Probablistic */
+    FIPS186_3_ST_TYPE /* Shawe-Taylor provable */
+} pqgGenType;
+
+/*
+ * These test iterations are quite a bit larger than we previously had.
+ * This is because FIPS 186-3 is worried about the primes in PQG generation.
+ * It may be possible to purposefully construct composites which more
+ * iterations of Miller-Rabin than the for your normal randomly selected
+ * numbers.There are 3 ways to counter this: 1) use one of the cool provably
+ * prime algorithms (which would require a lot more work than DSA-2 deservers.
+ * 2) add a Lucas primality test (which requires coding a Lucas primality test,
+ * or 3) use a larger M-R test count. I chose the latter. It increases the time
+ * that it takes to prove the selected prime, but it shouldn't increase the
+ * overall time to run the algorithm (non-primes should still faile M-R
+ * realively quickly). If you want to get that last bit of performance,
+ * implement Lucas and adjust these two functions.  See FIPS 186-3 Appendix C
+ * and F for more information.
+ */
+static int
+prime_testcount_p(int L, int N)
+{
+    switch (L) {
+        case 1024:
+            return 40;
+        case 2048:
+            return 56;
+        case 3072:
+            return 64;
+        default:
+            break;
+    }
+    return 50; /* L = 512-960 */
+}
+
+/* The q numbers are different if you run M-R followd by Lucas. I created
+ * a separate function so if someone wanted to add the Lucas check, they
+ * could do so fairly easily */
+static int
+prime_testcount_q(int L, int N)
+{
+    return prime_testcount_p(L, N);
+}
+
+/*
+ * generic function to make sure our input matches DSA2 requirements
+ * this gives us one place to go if we need to bump the requirements in the
+ * future.
+ */
+static SECStatus
+pqg_validate_dsa2(unsigned int L, unsigned int N)
+{
+
+    switch (L) {
+        case 1024:
+            if (N != DSA1_Q_BITS) {
+                PORT_SetError(SEC_ERROR_INVALID_ARGS);
+                return SECFailure;
+            }
+            break;
+        case 2048:
+            if ((N != 224) && (N != 256)) {
+                PORT_SetError(SEC_ERROR_INVALID_ARGS);
+                return SECFailure;
+            }
+            break;
+        case 3072:
+            if (N != 256) {
+                PORT_SetError(SEC_ERROR_INVALID_ARGS);
+                return SECFailure;
+            }
+            break;
+        default:
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+    }
+    return SECSuccess;
+}
+
+static unsigned int
+pqg_get_default_N(unsigned int L)
+{
+    unsigned int N = 0;
+    switch (L) {
+        case 1024:
+            N = DSA1_Q_BITS;
+            break;
+        case 2048:
+            N = 224;
+            break;
+        case 3072:
+            N = 256;
+            break;
+        default:
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            break; /* N already set to zero */
+    }
+    return N;
+}
+
+/*
+ * Select the lowest hash algorithm usable
+ */
+static HASH_HashType
+getFirstHash(unsigned int L, unsigned int N)
+{
+    if (N < 224) {
+        return HASH_AlgSHA1;
+    }
+    if (N < 256) {
+        return HASH_AlgSHA224;
+    }
+    if (N < 384) {
+        return HASH_AlgSHA256;
+    }
+    if (N < 512) {
+        return HASH_AlgSHA384;
+    }
+    return HASH_AlgSHA512;
+}
+
+/*
+ * find the next usable hash algorthim
+ */
+static HASH_HashType
+getNextHash(HASH_HashType hashtype)
+{
+    switch (hashtype) {
+        case HASH_AlgSHA1:
+            hashtype = HASH_AlgSHA224;
+            break;
+        case HASH_AlgSHA224:
+            hashtype = HASH_AlgSHA256;
+            break;
+        case HASH_AlgSHA256:
+            hashtype = HASH_AlgSHA384;
+            break;
+        case HASH_AlgSHA384:
+            hashtype = HASH_AlgSHA512;
+            break;
+        case HASH_AlgSHA512:
+        default:
+            hashtype = HASH_AlgTOTAL;
+            break;
+    }
+    return hashtype;
+}
+
+static unsigned int
+HASH_ResultLen(HASH_HashType type)
+{
+    const SECHashObject *hash_obj = HASH_GetRawHashObject(type);
+    PORT_Assert(hash_obj != NULL);
+    if (hash_obj == NULL) {
+        /* type is always a valid HashType. Thus a null hash_obj must be a bug */
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return 0;
+    }
+    PORT_Assert(hash_obj->length != 0);
+    return hash_obj->length;
+}
+
+static SECStatus
+HASH_HashBuf(HASH_HashType type, unsigned char *dest,
+             const unsigned char *src, PRUint32 src_len)
+{
+    const SECHashObject *hash_obj = HASH_GetRawHashObject(type);
+    void *hashcx = NULL;
+    unsigned int dummy;
+
+    if (hash_obj == NULL) {
+        return SECFailure;
+    }
+
+    hashcx = hash_obj->create();
+    if (hashcx == NULL) {
+        return SECFailure;
+    }
+    hash_obj->begin(hashcx);
+    hash_obj->update(hashcx, src, src_len);
+    hash_obj->end(hashcx, dest, &dummy, hash_obj->length);
+    hash_obj->destroy(hashcx, PR_TRUE);
+    return SECSuccess;
+}
+
+unsigned int
+PQG_GetLength(const SECItem *obj)
+{
+    unsigned int len = obj->len;
+
+    if (obj->data == NULL) {
+        return 0;
+    }
+    if (len > 1 && obj->data[0] == 0) {
+        len--;
+    }
+    return len;
+}
+
+SECStatus
+PQG_Check(const PQGParams *params)
+{
+    unsigned int L, N;
+    SECStatus rv = SECSuccess;
+
+    if (params == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    L = PQG_GetLength(&params->prime) * PR_BITS_PER_BYTE;
+    N = PQG_GetLength(&params->subPrime) * PR_BITS_PER_BYTE;
+
+    if (L < 1024) {
+        int j;
+
+        /* handle DSA1 pqg parameters with less thatn 1024 bits*/
+        if (N != DSA1_Q_BITS) {
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+        }
+        j = PQG_PBITS_TO_INDEX(L);
+        if (j < 0) {
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            rv = SECFailure;
+        }
+    } else {
+        /* handle DSA2 parameters (includes DSA1, 1024 bits) */
+        rv = pqg_validate_dsa2(L, N);
+    }
+    return rv;
+}
+
+HASH_HashType
+PQG_GetHashType(const PQGParams *params)
+{
+    unsigned int L, N;
+
+    if (params == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return HASH_AlgNULL;
+    }
+
+    L = PQG_GetLength(&params->prime) * PR_BITS_PER_BYTE;
+    N = PQG_GetLength(&params->subPrime) * PR_BITS_PER_BYTE;
+    return getFirstHash(L, N);
+}
+
+/* Get a seed for generating P and Q.  If in testing mode, copy in the
+** seed from FIPS 186-1 appendix 5.  Otherwise, obtain bytes from the
+** global random number generator.
+*/
+static SECStatus
+getPQseed(SECItem *seed, PLArenaPool *arena)
+{
+    SECStatus rv;
+
+    if (!seed->data) {
+        seed->data = (unsigned char *)PORT_ArenaZAlloc(arena, seed->len);
+    }
+    if (!seed->data) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    rv = RNG_GenerateGlobalRandomBytes(seed->data, seed->len);
+    /*
+     * NIST CMVP disallows a sequence of 20 bytes with the most
+     * significant byte equal to 0.  Perhaps they interpret
+     * "a sequence of at least 160 bits" as "a number >= 2^159".
+     * So we always set the most significant bit to 1. (bug 334533)
+     */
+    seed->data[0] |= 0x80;
+    return rv;
+}
+
+/* Generate a candidate h value.  If in testing mode, use the h value
+** specified in FIPS 186-1 appendix 5, h = 2.  Otherwise, obtain bytes
+** from the global random number generator.
+*/
+static SECStatus
+generate_h_candidate(SECItem *hit, mp_int *H)
+{
+    SECStatus rv = SECSuccess;
+    mp_err err = MP_OKAY;
+#ifdef FIPS_186_1_A5_TEST
+    memset(hit->data, 0, hit->len);
+    hit->data[hit->len - 1] = 0x02;
+#else
+    rv = RNG_GenerateGlobalRandomBytes(hit->data, hit->len);
+#endif
+    if (rv)
+        return SECFailure;
+    err = mp_read_unsigned_octets(H, hit->data, hit->len);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+static SECStatus
+addToSeed(const SECItem *seed,
+          unsigned long addend,
+          int seedlen, /* g in 186-1 */
+          SECItem *seedout)
+{
+    mp_int s, sum, modulus, tmp;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    MP_DIGITS(&s) = 0;
+    MP_DIGITS(&sum) = 0;
+    MP_DIGITS(&modulus) = 0;
+    MP_DIGITS(&tmp) = 0;
+    CHECK_MPI_OK(mp_init(&s));
+    CHECK_MPI_OK(mp_init(&sum));
+    CHECK_MPI_OK(mp_init(&modulus));
+    SECITEM_TO_MPINT(*seed, &s); /* s = seed */
+    /* seed += addend */
+    if (addend < MP_DIGIT_MAX) {
+        CHECK_MPI_OK(mp_add_d(&s, (mp_digit)addend, &s));
+    } else {
+        CHECK_MPI_OK(mp_init(&tmp));
+        CHECK_MPI_OK(mp_set_ulong(&tmp, addend));
+        CHECK_MPI_OK(mp_add(&s, &tmp, &s));
+    }
+    /*sum = s mod 2**seedlen */
+    CHECK_MPI_OK(mp_div_2d(&s, (mp_digit)seedlen, NULL, &sum));
+    if (seedout->data != NULL) {
+        SECITEM_ZfreeItem(seedout, PR_FALSE);
+    }
+    MPINT_TO_SECITEM(&sum, seedout, NULL);
+cleanup:
+    mp_clear(&s);
+    mp_clear(&sum);
+    mp_clear(&modulus);
+    mp_clear(&tmp);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        return SECFailure;
+    }
+    return rv;
+}
+
+/* Compute Hash[(SEED + addend) mod 2**g]
+** Result is placed in shaOutBuf.
+** This computation is used in steps 2 and 7 of FIPS 186 Appendix 2.2  and
+** step 11.2 of FIPS 186-3 Appendix A.1.1.2 .
+*/
+static SECStatus
+addToSeedThenHash(HASH_HashType hashtype,
+                  const SECItem *seed,
+                  unsigned long addend,
+                  int seedlen, /* g in 186-1 */
+                  unsigned char *hashOutBuf)
+{
+    SECItem str = { 0, 0, 0 };
+    SECStatus rv;
+    rv = addToSeed(seed, addend, seedlen, &str);
+    if (rv != SECSuccess) {
+        return rv;
+    }
+    rv = HASH_HashBuf(hashtype, hashOutBuf, str.data, str.len); /* hash result */
+    if (str.data)
+        SECITEM_ZfreeItem(&str, PR_FALSE);
+    return rv;
+}
+
+/*
+**  Perform steps 2 and 3 of FIPS 186-1, appendix 2.2.
+**  Generate Q from seed.
+*/
+static SECStatus
+makeQfromSeed(
+    unsigned int g,      /* input.  Length of seed in bits. */
+    const SECItem *seed, /* input.  */
+    mp_int *Q)           /* output. */
+{
+    unsigned char sha1[SHA1_LENGTH];
+    unsigned char sha2[SHA1_LENGTH];
+    unsigned char U[SHA1_LENGTH];
+    SECStatus rv = SECSuccess;
+    mp_err err = MP_OKAY;
+    int i;
+    /* ******************************************************************
+    ** Step 2.
+    ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]."
+    **/
+    CHECK_SEC_OK(SHA1_HashBuf(sha1, seed->data, seed->len));
+    CHECK_SEC_OK(addToSeedThenHash(HASH_AlgSHA1, seed, 1, g, sha2));
+    for (i = 0; i < SHA1_LENGTH; ++i)
+        U[i] = sha1[i] ^ sha2[i];
+    /* ******************************************************************
+    ** Step 3.
+    ** "Form Q from U by setting the most signficant bit (the 2**159 bit)
+    **  and the least signficant bit to 1.  In terms of boolean operations,
+    **  Q = U OR 2**159 OR 1.  Note that 2**159 < Q < 2**160."
+    */
+    U[0] |= 0x80; /* U is MSB first */
+    U[SHA1_LENGTH - 1] |= 0x01;
+    err = mp_read_unsigned_octets(Q, U, SHA1_LENGTH);
+cleanup:
+    memset(U, 0, SHA1_LENGTH);
+    memset(sha1, 0, SHA1_LENGTH);
+    memset(sha2, 0, SHA1_LENGTH);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        return SECFailure;
+    }
+    return rv;
+}
+
+/*
+**  Perform steps 6 and 7 of FIPS 186-3, appendix A.1.1.2.
+**  Generate Q from seed.
+*/
+static SECStatus
+makeQ2fromSeed(
+    HASH_HashType hashtype, /* selected Hashing algorithm */
+    unsigned int N,         /* input.  Length of q in bits. */
+    const SECItem *seed,    /* input.  */
+    mp_int *Q)              /* output. */
+{
+    unsigned char U[HASH_LENGTH_MAX];
+    SECStatus rv = SECSuccess;
+    mp_err err = MP_OKAY;
+    int N_bytes = N / PR_BITS_PER_BYTE; /* length of N in bytes rather than bits */
+    int hashLen = HASH_ResultLen(hashtype);
+    int offset = 0;
+
+    /* ******************************************************************
+    ** Step 6.
+    ** "Compute U = hash[SEED] mod 2**N-1]."
+    **/
+    CHECK_SEC_OK(HASH_HashBuf(hashtype, U, seed->data, seed->len));
+    /* mod 2**N . Step 7 will explicitly set the top bit to 1, so no need
+     * to handle mod 2**N-1 */
+    if (hashLen > N_bytes) {
+        offset = hashLen - N_bytes;
+    }
+    /* ******************************************************************
+    ** Step 7.
+    ** computed_q = 2**(N-1) + U + 1 - (U mod 2)
+    **
+    ** This is the same as:
+    ** computed_q = 2**(N-1) | U | 1;
+    */
+    U[offset] |= 0x80; /* U is MSB first */
+    U[hashLen - 1] |= 0x01;
+    err = mp_read_unsigned_octets(Q, &U[offset], N_bytes);
+cleanup:
+    memset(U, 0, HASH_LENGTH_MAX);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        return SECFailure;
+    }
+    return rv;
+}
+
+/*
+**  Perform steps from  FIPS 186-3, Appendix A.1.2.1 and Appendix C.6
+**
+**  This generates a provable prime from two smaller prime. The resulting
+**  prime p will have q0 as a multiple of p-1. q0 can be 1.
+**
+** This implments steps 4 thorough 22 of FIPS 186-3 A.1.2.1 and
+**                steps 16 through 34 of FIPS 186-2 C.6
+*/
+#define MAX_ST_SEED_BITS (HASH_LENGTH_MAX * PR_BITS_PER_BYTE)
+static SECStatus
+makePrimefromPrimesShaweTaylor(
+    HASH_HashType hashtype,          /* selected Hashing algorithm */
+    unsigned int length,             /* input. Length of prime in bits. */
+    mp_int *c0,                      /* seed prime */
+    mp_int *q,                       /* sub prime, can be 1 */
+    mp_int *prime,                   /* output.  */
+    SECItem *prime_seed,             /* input/output.  */
+    unsigned int *prime_gen_counter) /* input/output.  */
+{
+    mp_int c;
+    mp_int c0_2;
+    mp_int t;
+    mp_int a;
+    mp_int z;
+    mp_int two_length_minus_1;
+    SECStatus rv = SECFailure;
+    int hashlen = HASH_ResultLen(hashtype);
+    int outlen = hashlen * PR_BITS_PER_BYTE;
+    int offset;
+    unsigned char bit, mask;
+    /* x needs to hold roundup(L/outlen)*outlen.
+     * This can be no larger than L+outlen-1, So we set it's size to
+     * our max L + max outlen and know we are safe */
+    unsigned char x[DSA_MAX_P_BITS / 8 + HASH_LENGTH_MAX];
+    mp_err err = MP_OKAY;
+    int i;
+    int iterations;
+    int old_counter;
+
+    MP_DIGITS(&c) = 0;
+    MP_DIGITS(&c0_2) = 0;
+    MP_DIGITS(&t) = 0;
+    MP_DIGITS(&a) = 0;
+    MP_DIGITS(&z) = 0;
+    MP_DIGITS(&two_length_minus_1) = 0;
+    CHECK_MPI_OK(mp_init(&c));
+    CHECK_MPI_OK(mp_init(&c0_2));
+    CHECK_MPI_OK(mp_init(&t));
+    CHECK_MPI_OK(mp_init(&a));
+    CHECK_MPI_OK(mp_init(&z));
+    CHECK_MPI_OK(mp_init(&two_length_minus_1));
+
+    /*
+    ** There is a slight mapping of variable names depending on which
+    ** FIPS 186 steps are being carried out. The mapping is as follows:
+    **  variable          A.1.2.1           C.6
+    **    c0                p0               c0
+    **    q                 q                1
+    **    c                 p                c
+    **    c0_2            2*p0*q            2*c0
+    **    length            L               length
+    **    prime_seed       pseed            prime_seed
+    **  prime_gen_counter pgen_counter     prime_gen_counter
+    **
+    ** Also note: or iterations variable is actually iterations+1, since
+    ** iterations+1 works better in C.
+    */
+
+    /* Step 4/16 iterations = ceiling(length/outlen)-1 */
+    iterations = (length + outlen - 1) / outlen; /* NOTE: iterations +1 */
+    /* Step 5/17 old_counter = prime_gen_counter */
+    old_counter = *prime_gen_counter;
+    /*
+    ** Comment: Generate a pseudorandom integer x in the interval
+    ** [2**(lenght-1), 2**length].
+    **
+    ** Step 6/18 x = 0
+    */
+    PORT_Memset(x, 0, sizeof(x));
+    /*
+    ** Step 7/19 for i = 0 to iterations do
+    **  x = x + (HASH(prime_seed + i) * 2^(i*outlen))
+    */
+    for (i = 0; i < iterations; i++) {
+        /* is bigger than prime_seed should get to */
+        CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i,
+                                       MAX_ST_SEED_BITS, &x[(iterations - i - 1) * hashlen]));
+    }
+    /* Step 8/20 prime_seed = prime_seed + iterations + 1 */
+    CHECK_SEC_OK(addToSeed(prime_seed, iterations, MAX_ST_SEED_BITS,
+                           prime_seed));
+    /*
+    ** Step 9/21 x = 2 ** (length-1) + x mod 2 ** (length-1)
+    **
+    **   This step mathematically sets the high bit and clears out
+    **  all the other bits higher than length. 'x' is stored
+    **  in the x array, MSB first. The above formula gives us an 'x'
+    **  which is length bytes long and has the high bit set. We also know
+    **  that length <= iterations*outlen since
+    **  iterations=ceiling(length/outlen). First we find the offset in
+    **  bytes into the array where the high bit is.
+    */
+    offset = (outlen * iterations - length) / PR_BITS_PER_BYTE;
+    /* now we want to set the 'high bit', since length may not be a
+     * multiple of 8,*/
+    bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */
+    /* we need to zero out the rest of the bits in the byte above */
+    mask = (bit - 1);
+    /* now we set it */
+    x[offset] = (mask & x[offset]) | bit;
+    /*
+    ** Comment: Generate a candidate prime c in the interval
+    ** [2**(lenght-1), 2**length].
+    **
+    ** Step 10 t = ceiling(x/(2q(p0)))
+    ** Step 22 t = ceiling(x/(2(c0)))
+    */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&t, &x[offset],
+                                         hashlen * iterations - offset)); /* t = x */
+    CHECK_MPI_OK(mp_mul(c0, q, &c0_2));                                   /* c0_2 is now c0*q */
+    CHECK_MPI_OK(mp_add(&c0_2, &c0_2, &c0_2));                            /* c0_2 is now 2*q*c0 */
+    CHECK_MPI_OK(mp_add(&t, &c0_2, &t));                                  /* t = x+2*q*c0 */
+    CHECK_MPI_OK(mp_sub_d(&t, (mp_digit)1, &t));                          /* t = x+2*q*c0 -1 */
+    /* t = floor((x+2qc0-1)/2qc0) = ceil(x/2qc0) */
+    CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL));
+    /*
+    ** step 11: if (2tqp0 +1 > 2**length), then t = ceiling(2**(length-1)/2qp0)
+    ** step 12: t = 2tqp0 +1.
+    **
+    ** step 23: if (2tc0 +1 > 2**length), then t = ceiling(2**(length-1)/2c0)
+    ** step 24: t = 2tc0 +1.
+    */
+    CHECK_MPI_OK(mp_2expt(&two_length_minus_1, length - 1));
+step_23:
+    CHECK_MPI_OK(mp_mul(&t, &c0_2, &c));                /* c = t*2qc0 */
+    CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c));        /* c= 2tqc0 + 1*/
+    if (mpl_significant_bits(&c) > length) {            /* if c > 2**length */
+        CHECK_MPI_OK(mp_sub_d(&c0_2, (mp_digit)1, &t)); /* t = 2qc0-1 */
+        /* t = 2**(length-1) + 2qc0 -1 */
+        CHECK_MPI_OK(mp_add(&two_length_minus_1, &t, &t));
+        /* t = floor((2**(length-1)+2qc0 -1)/2qco)
+         *   = ceil(2**(lenght-2)/2qc0) */
+        CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL));
+        CHECK_MPI_OK(mp_mul(&t, &c0_2, &c));
+        CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/
+    }
+    /* Step 13/25 prime_gen_counter = prime_gen_counter + 1*/
+    (*prime_gen_counter)++;
+    /*
+    ** Comment: Test the candidate prime c for primality; first pick an
+    ** integer a between 2 and c-2.
+    **
+    ** Step 14/26 a=0
+    */
+    PORT_Memset(x, 0, sizeof(x)); /* use x for a */
+    /*
+    ** Step 15/27 for i = 0 to iterations do
+    **  a = a + (HASH(prime_seed + i) * 2^(i*outlen))
+    **
+    ** NOTE: we reuse the x array for 'a' initially.
+    */
+    for (i = 0; i < iterations; i++) {
+        /* MAX_ST_SEED_BITS is bigger than prime_seed should get to */
+        CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i,
+                                       MAX_ST_SEED_BITS, &x[(iterations - i - 1) * hashlen]));
+    }
+    /* Step 16/28 prime_seed = prime_seed + iterations + 1 */
+    CHECK_SEC_OK(addToSeed(prime_seed, iterations, MAX_ST_SEED_BITS,
+                           prime_seed));
+    /* Step 17/29 a = 2 + (a mod (c-3)). */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&a, x, iterations * hashlen));
+    CHECK_MPI_OK(mp_sub_d(&c, (mp_digit)3, &z)); /* z = c -3 */
+    CHECK_MPI_OK(mp_mod(&a, &z, &a));            /* a = a mod c -3 */
+    CHECK_MPI_OK(mp_add_d(&a, (mp_digit)2, &a)); /* a = 2 + a mod c -3 */
+    /*
+    ** Step 18 z = a**(2tq) mod p.
+    ** Step 30 z = a**(2t) mod c.
+    */
+    CHECK_MPI_OK(mp_mul(&t, q, &z));          /* z = tq */
+    CHECK_MPI_OK(mp_add(&z, &z, &z));         /* z = 2tq */
+    CHECK_MPI_OK(mp_exptmod(&a, &z, &c, &z)); /* z = a**(2tq) mod c */
+    /*
+    ** Step 19 if (( 1 == GCD(z-1,p)) and ( 1 == z**p0 mod p )), then
+    ** Step 31 if (( 1 == GCD(z-1,c)) and ( 1 == z**c0 mod c )), then
+    */
+    CHECK_MPI_OK(mp_sub_d(&z, (mp_digit)1, &a));
+    CHECK_MPI_OK(mp_gcd(&a, &c, &a));
+    if (mp_cmp_d(&a, (mp_digit)1) == 0) {
+        CHECK_MPI_OK(mp_exptmod(&z, c0, &c, &a));
+        if (mp_cmp_d(&a, (mp_digit)1) == 0) {
+            /* Step 31.1 prime = c */
+            CHECK_MPI_OK(mp_copy(&c, prime));
+            /*
+        ** Step 31.2 return Success, prime, prime_seed,
+        **    prime_gen_counter
+        */
+            rv = SECSuccess;
+            goto cleanup;
+        }
+    }
+    /*
+    ** Step 20/32 If (prime_gen_counter > 4 * length + old_counter then
+    **   return (FAILURE, 0, 0, 0).
+    ** NOTE: the test is reversed, so we fall through on failure to the
+    ** cleanup routine
+    */
+    if (*prime_gen_counter < (4 * length + old_counter)) {
+        /* Step 21/33 t = t + 1 */
+        CHECK_MPI_OK(mp_add_d(&t, (mp_digit)1, &t));
+        /* Step 22/34 Go to step 23/11 */
+        goto step_23;
+    }
+
+    /* if (prime_gencont > (4*length + old_counter), fall through to failure */
+    rv = SECFailure; /* really is already set, but paranoia is good */
+
+cleanup:
+    mp_clear(&c);
+    mp_clear(&c0_2);
+    mp_clear(&t);
+    mp_clear(&a);
+    mp_clear(&z);
+    mp_clear(&two_length_minus_1);
+    PORT_Memset(x, 0, sizeof(x));
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    if (rv == SECFailure) {
+        mp_zero(prime);
+        if (prime_seed->data) {
+            SECITEM_FreeItem(prime_seed, PR_FALSE);
+        }
+        *prime_gen_counter = 0;
+    }
+    return rv;
+}
+
+/*
+**  Perform steps from  FIPS 186-3, Appendix C.6
+**
+**  This generates a provable prime from a seed
+*/
+static SECStatus
+makePrimefromSeedShaweTaylor(
+    HASH_HashType hashtype,          /* selected Hashing algorithm */
+    unsigned int length,             /* input.  Length of prime in bits. */
+    const SECItem *input_seed,       /* input.  */
+    mp_int *prime,                   /* output.  */
+    SECItem *prime_seed,             /* output.  */
+    unsigned int *prime_gen_counter) /* output.  */
+{
+    mp_int c;
+    mp_int c0;
+    mp_int one;
+    SECStatus rv = SECFailure;
+    int hashlen = HASH_ResultLen(hashtype);
+    int outlen = hashlen * PR_BITS_PER_BYTE;
+    int offset;
+    unsigned char bit, mask;
+    unsigned char x[HASH_LENGTH_MAX * 2];
+    mp_digit dummy;
+    mp_err err = MP_OKAY;
+    int i;
+
+    MP_DIGITS(&c) = 0;
+    MP_DIGITS(&c0) = 0;
+    MP_DIGITS(&one) = 0;
+    CHECK_MPI_OK(mp_init(&c));
+    CHECK_MPI_OK(mp_init(&c0));
+    CHECK_MPI_OK(mp_init(&one));
+
+    /* Step 1. if length < 2 then return (FAILURE, 0, 0, 0) */
+    if (length < 2) {
+        rv = SECFailure;
+        goto cleanup;
+    }
+    /* Step 2. if length >= 33 then goto step 14 */
+    if (length >= 33) {
+        mp_zero(&one);
+        CHECK_MPI_OK(mp_add_d(&one, (mp_digit)1, &one));
+
+        /* Step 14 (status, c0, prime_seed, prime_gen_counter) =
+    ** (ST_Random_Prime((ceil(length/2)+1, input_seed)
+    */
+        rv = makePrimefromSeedShaweTaylor(hashtype, (length + 1) / 2 + 1,
+                                          input_seed, &c0, prime_seed, prime_gen_counter);
+        /* Step 15 if FAILURE is returned, return (FAILURE, 0, 0, 0). */
+        if (rv != SECSuccess) {
+            goto cleanup;
+        }
+        /* Steps 16-34 */
+        rv = makePrimefromPrimesShaweTaylor(hashtype, length, &c0, &one,
+                                            prime, prime_seed, prime_gen_counter);
+        goto cleanup; /* we're done, one way or the other */
+    }
+    /* Step 3 prime_seed = input_seed */
+    CHECK_SEC_OK(SECITEM_CopyItem(NULL, prime_seed, input_seed));
+    /* Step 4 prime_gen_count = 0 */
+    *prime_gen_counter = 0;
+
+step_5:
+    /* Step 5 c = Hash(prime_seed) xor Hash(prime_seed+1). */
+    CHECK_SEC_OK(HASH_HashBuf(hashtype, x, prime_seed->data, prime_seed->len));
+    CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, 1,
+                                   MAX_ST_SEED_BITS, &x[hashlen]));
+    for (i = 0; i < hashlen; i++) {
+        x[i] = x[i] ^ x[i + hashlen];
+    }
+    /* Step 6 c = 2**length-1 + c mod 2**length-1 */
+    /*   This step mathematically sets the high bit and clears out
+    **  all the other bits higher than length. Right now c is stored
+    **  in the x array, MSB first. The above formula gives us a c which
+    **  is length bytes long and has the high bit set. We also know that
+    **  length < outlen since the smallest outlen is 160 bits and the largest
+    **  length at this point is 32 bits. So first we find the offset in bytes
+    **  into the array where the high bit is.
+    */
+    offset = (outlen - length) / PR_BITS_PER_BYTE;
+    /* now we want to set the 'high bit'. We have to calculate this since
+     * length may not be a multiple of 8.*/
+    bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */
+    /* we need to zero out the rest of the bits  in the byte above */
+    mask = (bit - 1);
+    /* now we set it */
+    x[offset] = (mask & x[offset]) | bit;
+    /* Step 7 c = c*floor(c/2) + 1 */
+    /* set the low bit. much easier to find (the end of the array) */
+    x[hashlen - 1] |= 1;
+    /* now that we've set our bits, we can create our candidate "c" */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&c, &x[offset], hashlen - offset));
+    /* Step 8 prime_gen_counter = prime_gen_counter + 1 */
+    (*prime_gen_counter)++;
+    /* Step 9 prime_seed = prime_seed + 2 */
+    CHECK_SEC_OK(addToSeed(prime_seed, 2, MAX_ST_SEED_BITS, prime_seed));
+    /* Step 10 Perform deterministic primality test on c. For example, since
+    ** c is small, it's primality can be tested by trial division, See
+    ** See Appendic C.7.
+    **
+    ** We in fact test with trial division. mpi has a built int trial divider
+    ** that divides all divisors up to 2^16.
+    */
+    if (prime_tab[prime_tab_size - 1] < 0xFFF1) {
+        /* we aren't testing all the primes between 0 and 2^16, we really
+     * can't use this construction. Just fail. */
+        rv = SECFailure;
+        goto cleanup;
+    }
+    dummy = prime_tab_size;
+    err = mpp_divis_primes(&c, &dummy);
+    /* Step 11 if c is prime then */
+    if (err == MP_NO) {
+        /* Step 11.1 prime = c */
+        CHECK_MPI_OK(mp_copy(&c, prime));
+        /* Step 11.2 return SUCCESS prime, prime_seed, prime_gen_counter */
+        err = MP_OKAY;
+        rv = SECSuccess;
+        goto cleanup;
+    } else if (err != MP_YES) {
+        goto cleanup; /* function failed, bail out */
+    } else {
+        /* reset mp_err */
+        err = MP_OKAY;
+    }
+    /*
+    ** Step 12 if (prime_gen_counter > (4*len))
+    ** then return (FAILURE, 0, 0, 0))
+    ** Step 13 goto step 5
+    */
+    if (*prime_gen_counter <= (4 * length)) {
+        goto step_5;
+    }
+    /* if (prime_gencont > 4*length), fall through to failure */
+    rv = SECFailure; /* really is already set, but paranoia is good */
+
+cleanup:
+    mp_clear(&c);
+    mp_clear(&c0);
+    mp_clear(&one);
+    PORT_Memset(x, 0, sizeof(x));
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    if (rv == SECFailure) {
+        mp_zero(prime);
+        if (prime_seed->data) {
+            SECITEM_FreeItem(prime_seed, PR_FALSE);
+        }
+        *prime_gen_counter = 0;
+    }
+    return rv;
+}
+
+/*
+ * Find a Q and algorithm from Seed.
+ */
+static SECStatus
+findQfromSeed(
+    unsigned int L,             /* input.  Length of p in bits. */
+    unsigned int N,             /* input.  Length of q in bits. */
+    unsigned int g,             /* input.  Length of seed in bits. */
+    const SECItem *seed,        /* input.  */
+    mp_int *Q,                  /* input. */
+    mp_int *Q_,                 /* output. */
+    unsigned int *qseed_len,    /* output */
+    HASH_HashType *hashtypePtr, /* output. Hash uses */
+    pqgGenType *typePtr)        /* output. Generation Type used */
+{
+    HASH_HashType hashtype;
+    SECItem firstseed = { 0, 0, 0 };
+    SECItem qseed = { 0, 0, 0 };
+    SECStatus rv;
+
+    *qseed_len = 0; /* only set if FIPS186_3_ST_TYPE */
+
+    /* handle legacy small DSA first can only be FIPS186_1_TYPE */
+    if (L < 1024) {
+        rv = makeQfromSeed(g, seed, Q_);
+        if ((rv == SECSuccess) && (mp_cmp(Q, Q_) == 0)) {
+            *hashtypePtr = HASH_AlgSHA1;
+            *typePtr = FIPS186_1_TYPE;
+            return SECSuccess;
+        }
+        return SECFailure;
+    }
+    /* 1024 could use FIPS186_1 or FIPS186_3 algorithms, we need to try
+     * them both */
+    if (L == 1024) {
+        rv = makeQfromSeed(g, seed, Q_);
+        if (rv == SECSuccess) {
+            if (mp_cmp(Q, Q_) == 0) {
+                *hashtypePtr = HASH_AlgSHA1;
+                *typePtr = FIPS186_1_TYPE;
+                return SECSuccess;
+            }
+        }
+        /* fall through for FIPS186_3 types */
+    }
+    /* at this point we know we aren't using FIPS186_1, start trying FIPS186_3
+     * with appropriate hash types */
+    for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL;
+         hashtype = getNextHash(hashtype)) {
+        rv = makeQ2fromSeed(hashtype, N, seed, Q_);
+        if (rv != SECSuccess) {
+            continue;
+        }
+        if (mp_cmp(Q, Q_) == 0) {
+            *hashtypePtr = hashtype;
+            *typePtr = FIPS186_3_TYPE;
+            return SECSuccess;
+        }
+    }
+    /*
+     * OK finally try FIPS186_3 Shawe-Taylor
+     */
+    firstseed = *seed;
+    firstseed.len = seed->len / 3;
+    for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL;
+         hashtype = getNextHash(hashtype)) {
+        unsigned int count;
+
+        rv = makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, Q_,
+                                          &qseed, &count);
+        if (rv != SECSuccess) {
+            continue;
+        }
+        if (mp_cmp(Q, Q_) == 0) {
+            /* check qseed as well... */
+            int offset = seed->len - qseed.len;
+            if ((offset < 0) ||
+                (PORT_Memcmp(&seed->data[offset], qseed.data, qseed.len) != 0)) {
+                /* we found q, but the seeds don't match. This isn't an
+         * accident, someone has been tweeking with the seeds, just
+         * fail a this point. */
+                SECITEM_FreeItem(&qseed, PR_FALSE);
+                return SECFailure;
+            }
+            *qseed_len = qseed.len;
+            *hashtypePtr = hashtype;
+            *typePtr = FIPS186_3_ST_TYPE;
+            SECITEM_FreeItem(&qseed, PR_FALSE);
+            return SECSuccess;
+        }
+        SECITEM_FreeItem(&qseed, PR_FALSE);
+    }
+    /* no hash algorithms found which match seed to Q, fail */
+    return SECFailure;
+}
+
+/*
+**  Perform steps 7, 8 and 9 of FIPS 186, appendix 2.2.
+**  which are the same as steps 11.1-11.5 of FIPS 186-2, App A.1.1.2
+**  Generate P from Q, seed, L, and offset.
+*/
+static SECStatus
+makePfromQandSeed(
+    HASH_HashType hashtype, /* selected Hashing algorithm */
+    unsigned int L,         /* Length of P in bits.  Per FIPS 186. */
+    unsigned int N,         /* Length of Q in bits.  Per FIPS 186. */
+    unsigned int offset,    /* Per FIPS 186, App 2.2. & 186-3 App A.1.1.2 */
+    unsigned int seedlen,   /* input. Length of seed in bits. (g in 186-1)*/
+    const SECItem *seed,    /* input.  */
+    const mp_int *Q,        /* input.  */
+    mp_int *P)              /* output. */
+{
+    unsigned int j;       /* Per FIPS 186-3 App. A.1.1.2  (k in 186-1)*/
+    unsigned int n;       /* Per FIPS 186, appendix 2.2. */
+    mp_digit b;           /* Per FIPS 186, appendix 2.2. */
+    unsigned int outlen;  /* Per FIPS 186-3 App. A.1.1.2 */
+    unsigned int hashlen; /* outlen in bytes */
+    unsigned char V_j[HASH_LENGTH_MAX];
+    mp_int W, X, c, twoQ, V_n, tmp;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    /* Initialize bignums */
+    MP_DIGITS(&W) = 0;
+    MP_DIGITS(&X) = 0;
+    MP_DIGITS(&c) = 0;
+    MP_DIGITS(&twoQ) = 0;
+    MP_DIGITS(&V_n) = 0;
+    MP_DIGITS(&tmp) = 0;
+    CHECK_MPI_OK(mp_init(&W));
+    CHECK_MPI_OK(mp_init(&X));
+    CHECK_MPI_OK(mp_init(&c));
+    CHECK_MPI_OK(mp_init(&twoQ));
+    CHECK_MPI_OK(mp_init(&tmp));
+    CHECK_MPI_OK(mp_init(&V_n));
+
+    hashlen = HASH_ResultLen(hashtype);
+    outlen = hashlen * PR_BITS_PER_BYTE;
+
+    /* L - 1 = n*outlen + b */
+    n = (L - 1) / outlen;
+    b = (L - 1) % outlen;
+
+    /* ******************************************************************
+    ** Step 11.1 (Step 7 in 186-1)
+    **  "for j = 0 ... n let
+    **           V_j = SHA[(SEED + offset + j) mod 2**seedlen]."
+    **
+    ** Step 11.2 (Step 8 in 186-1)
+    **   "W = V_0 + (V_1 * 2**outlen) + ... + (V_n-1 * 2**((n-1)*outlen))
+    **         + ((V_n mod 2**b) * 2**(n*outlen))
+    */
+    for (j = 0; j < n; ++j) { /* Do the first n terms of V_j */
+        /* Do step 11.1 for iteration j.
+    ** V_j = HASH[(seed + offset + j) mod 2**g]
+    */
+        CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + j, seedlen, V_j));
+        /* Do step 11.2 for iteration j.
+    ** W += V_j * 2**(j*outlen)
+    */
+        OCTETS_TO_MPINT(V_j, &tmp, hashlen);           /* get bignum V_j     */
+        CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, j * outlen)); /* tmp=V_j << j*outlen */
+        CHECK_MPI_OK(mp_add(&W, &tmp, &W));            /* W += tmp           */
+    }
+    /* Step 11.2, continued.
+    **   [W += ((V_n mod 2**b) * 2**(n*outlen))]
+    */
+    CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + n, seedlen, V_j));
+    OCTETS_TO_MPINT(V_j, &V_n, hashlen);           /* get bignum V_n     */
+    CHECK_MPI_OK(mp_div_2d(&V_n, b, NULL, &tmp));  /* tmp = V_n mod 2**b */
+    CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, n * outlen)); /* tmp = tmp << n*outlen */
+    CHECK_MPI_OK(mp_add(&W, &tmp, &W));            /* W += tmp           */
+    /* Step 11.3, (Step 8 in 186-1)
+    ** "X = W + 2**(L-1).
+    **  Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L."
+    */
+    CHECK_MPI_OK(mpl_set_bit(&X, (mp_size)(L - 1), 1)); /* X = 2**(L-1) */
+    CHECK_MPI_OK(mp_add(&X, &W, &X));                   /* X += W       */
+    /*************************************************************
+    ** Step 11.4. (Step 9 in 186-1)
+    ** "c = X mod 2q"
+    */
+    CHECK_MPI_OK(mp_mul_2(Q, &twoQ));    /* 2q           */
+    CHECK_MPI_OK(mp_mod(&X, &twoQ, &c)); /* c = X mod 2q */
+    /*************************************************************
+    ** Step 11.5. (Step 9 in 186-1)
+    ** "p = X - (c - 1).
+    **  Note that p is congruent to 1 mod 2q."
+    */
+    CHECK_MPI_OK(mp_sub_d(&c, 1, &c)); /* c -= 1       */
+    CHECK_MPI_OK(mp_sub(&X, &c, P));   /* P = X - c    */
+cleanup:
+    mp_clear(&W);
+    mp_clear(&X);
+    mp_clear(&c);
+    mp_clear(&twoQ);
+    mp_clear(&V_n);
+    mp_clear(&tmp);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        return SECFailure;
+    }
+    return rv;
+}
+
+/*
+** Generate G from h, P, and Q.
+*/
+static SECStatus
+makeGfromH(const mp_int *P, /* input.  */
+           const mp_int *Q, /* input.  */
+           mp_int *H,       /* input and output. */
+           mp_int *G,       /* output. */
+           PRBool *passed)
+{
+    mp_int exp, pm1;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    *passed = PR_FALSE;
+    MP_DIGITS(&exp) = 0;
+    MP_DIGITS(&pm1) = 0;
+    CHECK_MPI_OK(mp_init(&exp));
+    CHECK_MPI_OK(mp_init(&pm1));
+    CHECK_MPI_OK(mp_sub_d(P, 1, &pm1));   /* P - 1            */
+    if (mp_cmp(H, &pm1) >= 0)             /* H >= P-1         */
+        CHECK_MPI_OK(mp_sub(H, &pm1, H)); /* H = H mod (P-1)  */
+    /* Let b = 2**n (smallest power of 2 greater than P).
+    ** Since P-1 >= b/2, and H < b, quotient(H/(P-1)) = 0 or 1
+    ** so the above operation safely computes H mod (P-1)
+    */
+    /* Check for H = to 0 or 1.  Regen H if so.  (Regen means return error). */
+    if (mp_cmp_d(H, 1) <= 0) {
+        rv = SECFailure;
+        goto cleanup;
+    }
+    /* Compute G, according to the equation  G = (H ** ((P-1)/Q)) mod P */
+    CHECK_MPI_OK(mp_div(&pm1, Q, &exp, NULL)); /* exp = (P-1)/Q      */
+    CHECK_MPI_OK(mp_exptmod(H, &exp, P, G));   /* G = H ** exp mod P */
+    /* Check for G == 0 or G == 1, return error if so. */
+    if (mp_cmp_d(G, 1) <= 0) {
+        rv = SECFailure;
+        goto cleanup;
+    }
+    *passed = PR_TRUE;
+cleanup:
+    mp_clear(&exp);
+    mp_clear(&pm1);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/*
+** Generate G from seed, index, P, and Q.
+*/
+static SECStatus
+makeGfromIndex(HASH_HashType hashtype,
+               const mp_int *P,     /* input.  */
+               const mp_int *Q,     /* input.  */
+               const SECItem *seed, /* input. */
+               unsigned char index, /* input. */
+               mp_int *G)           /* input/output */
+{
+    mp_int e, pm1, W;
+    unsigned int count;
+    unsigned char data[HASH_LENGTH_MAX];
+    unsigned int len;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    const SECHashObject *hashobj = NULL;
+    void *hashcx = NULL;
+
+    MP_DIGITS(&e) = 0;
+    MP_DIGITS(&pm1) = 0;
+    MP_DIGITS(&W) = 0;
+    CHECK_MPI_OK(mp_init(&e));
+    CHECK_MPI_OK(mp_init(&pm1));
+    CHECK_MPI_OK(mp_init(&W));
+
+    /* initialize our hash stuff */
+    hashobj = HASH_GetRawHashObject(hashtype);
+    if (hashobj == NULL) {
+        /* shouldn't happen */
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    hashcx = hashobj->create();
+    if (hashcx == NULL) {
+        rv = SECFailure;
+        goto cleanup;
+    }
+
+    CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1            */
+    /* Step 3 e = (p-1)/q */
+    CHECK_MPI_OK(mp_div(&pm1, Q, &e, NULL)); /* e = (P-1)/Q      */
+/* Steps 4, 5, and 6 */
+/* count is a 16 bit value in the spec. We actually represent count
+     * as more than 16 bits so we can easily detect the 16 bit overflow */
+#define MAX_COUNT 0x10000
+    for (count = 1; count < MAX_COUNT; count++) {
+        /* step 7
+         * U = domain_param_seed || "ggen" || index || count
+             * step 8
+         * W = HASH(U)
+         */
+        hashobj->begin(hashcx);
+        hashobj->update(hashcx, seed->data, seed->len);
+        hashobj->update(hashcx, (unsigned char *)"ggen", 4);
+        hashobj->update(hashcx, &index, 1);
+        data[0] = (count >> 8) & 0xff;
+        data[1] = count & 0xff;
+        hashobj->update(hashcx, data, 2);
+        hashobj->end(hashcx, data, &len, sizeof(data));
+        OCTETS_TO_MPINT(data, &W, len);
+        /* step 9. g = W**e mod p */
+        CHECK_MPI_OK(mp_exptmod(&W, &e, P, G));
+        /* step 10. if (g < 2) then goto step 5 */
+        /* NOTE: this weird construct is to keep the flow according to the spec.
+     * the continue puts us back to step 5 of the for loop */
+        if (mp_cmp_d(G, 2) < 0) {
+            continue;
+        }
+        break; /* step 11 follows step 10 if the test condition is false */
+    }
+    if (count >= MAX_COUNT) {
+        rv = SECFailure; /* last part of step 6 */
+    }
+/* step 11.
+     * return valid G */
+cleanup:
+    PORT_Memset(data, 0, sizeof(data));
+    if (hashcx) {
+        hashobj->destroy(hashcx, PR_TRUE);
+    }
+    mp_clear(&e);
+    mp_clear(&pm1);
+    mp_clear(&W);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/* This code uses labels and gotos, so that it can follow the numbered
+** steps in the algorithms from FIPS 186-3 appendix A.1.1.2 very closely,
+** and so that the correctness of this code can be easily verified.
+** So, please forgive the ugly c code.
+**/
+static SECStatus
+pqg_ParamGen(unsigned int L, unsigned int N, pqgGenType type,
+             unsigned int seedBytes, PQGParams **pParams, PQGVerify **pVfy)
+{
+    unsigned int n;       /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+    unsigned int seedlen; /* Per FIPS 186-3 app A.1.1.2  (was 'g' 186-1)*/
+    unsigned int counter; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+    unsigned int offset;  /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+    unsigned int outlen;  /* Per FIPS 186-3, appendix A.1.1.2. */
+    unsigned int maxCount;
+    HASH_HashType hashtype;
+    SECItem *seed; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+    PLArenaPool *arena = NULL;
+    PQGParams *params = NULL;
+    PQGVerify *verify = NULL;
+    PRBool passed;
+    SECItem hit = { 0, 0, 0 };
+    SECItem firstseed = { 0, 0, 0 };
+    SECItem qseed = { 0, 0, 0 };
+    SECItem pseed = { 0, 0, 0 };
+    mp_int P, Q, G, H, l, p0;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECFailure;
+    int iterations = 0;
+
+    /* Step 1. L and N already checked by caller*/
+    /* Step 2. if (seedlen < N) return INVALID; */
+    if (seedBytes < N / PR_BITS_PER_BYTE || !pParams || !pVfy) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    /* Initialize bignums */
+    MP_DIGITS(&P) = 0;
+    MP_DIGITS(&Q) = 0;
+    MP_DIGITS(&G) = 0;
+    MP_DIGITS(&H) = 0;
+    MP_DIGITS(&l) = 0;
+    MP_DIGITS(&p0) = 0;
+    CHECK_MPI_OK(mp_init(&P));
+    CHECK_MPI_OK(mp_init(&Q));
+    CHECK_MPI_OK(mp_init(&G));
+    CHECK_MPI_OK(mp_init(&H));
+    CHECK_MPI_OK(mp_init(&l));
+    CHECK_MPI_OK(mp_init(&p0));
+
+    /* parameters have been passed in, only generate G */
+    if (*pParams != NULL) {
+        /* we only support G index generation if generating separate from PQ */
+        if ((*pVfy == NULL) || (type == FIPS186_1_TYPE) ||
+            ((*pVfy)->h.len != 1) || ((*pVfy)->h.data == NULL) ||
+            ((*pVfy)->seed.data == NULL) || ((*pVfy)->seed.len == 0)) {
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+        }
+        params = *pParams;
+        verify = *pVfy;
+
+        /* fill in P Q,  */
+        SECITEM_TO_MPINT((*pParams)->prime, &P);
+        SECITEM_TO_MPINT((*pParams)->subPrime, &Q);
+        hashtype = getFirstHash(L, N);
+        CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &(*pVfy)->seed,
+                                    (*pVfy)->h.data[0], &G));
+        MPINT_TO_SECITEM(&G, &(*pParams)->base, (*pParams)->arena);
+        goto cleanup;
+    }
+    /* Initialize an arena for the params. */
+    arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+    if (!arena) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    params = (PQGParams *)PORT_ArenaZAlloc(arena, sizeof(PQGParams));
+    if (!params) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        PORT_FreeArena(arena, PR_TRUE);
+        return SECFailure;
+    }
+    params->arena = arena;
+    /* Initialize an arena for the verify. */
+    arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+    if (!arena) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        PORT_FreeArena(params->arena, PR_TRUE);
+        return SECFailure;
+    }
+    verify = (PQGVerify *)PORT_ArenaZAlloc(arena, sizeof(PQGVerify));
+    if (!verify) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        PORT_FreeArena(arena, PR_TRUE);
+        PORT_FreeArena(params->arena, PR_TRUE);
+        return SECFailure;
+    }
+    verify->arena = arena;
+    seed = &verify->seed;
+    arena = NULL;
+
+    /* Select Hash and Compute lengths. */
+    /* getFirstHash gives us the smallest acceptable hash for this key
+     * strength */
+    hashtype = getFirstHash(L, N);
+    outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE;
+
+    /* Step 3: n = Ceil(L/outlen)-1; (same as n = Floor((L-1)/outlen)) */
+    n = (L - 1) / outlen;
+    /* Step 4: (skipped since we don't use b): b = L -1 - (n*outlen); */
+    seedlen = seedBytes * PR_BITS_PER_BYTE; /* bits in seed */
+step_5:
+    /* ******************************************************************
+    ** Step 5. (Step 1 in 186-1)
+    ** "Choose an abitrary sequence of at least N bits and call it SEED.
+    **  Let g be the length of SEED in bits."
+    */
+    if (++iterations > MAX_ITERATIONS) { /* give up after a while */
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        goto cleanup;
+    }
+    seed->len = seedBytes;
+    CHECK_SEC_OK(getPQseed(seed, verify->arena));
+    /* ******************************************************************
+    ** Step 6. (Step 2 in 186-1)
+    **
+    ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g].  (186-1)"
+    ** "Compute U = HASH[SEED] 2**(N-1).  (186-3)"
+    **
+    ** Step 7. (Step 3 in 186-1)
+    ** "Form Q from U by setting the most signficant bit (the 2**159 bit)
+    **  and the least signficant bit to 1.  In terms of boolean operations,
+    **  Q = U OR 2**159 OR 1.  Note that 2**159 < Q < 2**160. (186-1)"
+    **
+    ** "q = 2**(N-1) + U + 1 - (U mod 2) (186-3)
+    **
+    ** Note: Both formulations are the same for U < 2**(N-1) and N=160
+    **
+    ** If using Shawe-Taylor, We do the entire A.1.2.1.2 setps in the block
+    ** FIPS186_3_ST_TYPE.
+    */
+    if (type == FIPS186_1_TYPE) {
+        CHECK_SEC_OK(makeQfromSeed(seedlen, seed, &Q));
+    } else if (type == FIPS186_3_TYPE) {
+        CHECK_SEC_OK(makeQ2fromSeed(hashtype, N, seed, &Q));
+    } else {
+        /* FIPS186_3_ST_TYPE */
+        unsigned int qgen_counter, pgen_counter;
+
+        /* Step 1 (L,N) already checked for acceptability */
+
+        firstseed = *seed;
+        qgen_counter = 0;
+        /* Step 2. Use N and firstseed to  generate random prime q
+     * using Apendix C.6 */
+        CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, &Q,
+                                                  &qseed, &qgen_counter));
+        /* Step 3. Use floor(L/2+1) and qseed to generate random prime p0
+     * using Appendix C.6 */
+        pgen_counter = 0;
+        CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1,
+                                                  &qseed, &p0, &pseed, &pgen_counter));
+        /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */
+        CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L,
+                                                    &p0, &Q, &P, &pseed, &pgen_counter));
+
+        /* combine all the seeds */
+        seed->len = firstseed.len + qseed.len + pseed.len;
+        seed->data = PORT_ArenaZAlloc(verify->arena, seed->len);
+        if (seed->data == NULL) {
+            goto cleanup;
+        }
+        PORT_Memcpy(seed->data, firstseed.data, firstseed.len);
+        PORT_Memcpy(seed->data + firstseed.len, pseed.data, pseed.len);
+        PORT_Memcpy(seed->data + firstseed.len + pseed.len, qseed.data, qseed.len);
+        counter = 0; /* (qgen_counter << 16) | pgen_counter; */
+
+        /* we've generated both P and Q now, skip to generating G */
+        goto generate_G;
+    }
+    /* ******************************************************************
+    ** Step 8. (Step 4 in 186-1)
+    ** "Use a robust primality testing algorithm to test whether q is prime."
+    **
+    ** Appendix 2.1 states that a Rabin test with at least 50 iterations
+    ** "will give an acceptable probability of error."
+    */
+    /*CHECK_SEC_OK( prm_RabinTest(&Q, &passed) );*/
+    err = mpp_pprime(&Q, prime_testcount_q(L, N));
+    passed = (err == MP_YES) ? SECSuccess : SECFailure;
+    /* ******************************************************************
+    ** Step 9. (Step 5 in 186-1) "If q is not prime, goto step 5 (1 in 186-1)."
+    */
+    if (passed != SECSuccess)
+        goto step_5;
+    /* ******************************************************************
+    ** Step 10.
+    **      offset = 1;
+    **(     Step 6b 186-1)"Let counter = 0 and offset = 2."
+    */
+    offset = (type == FIPS186_1_TYPE) ? 2 : 1;
+    /*
+    ** Step 11. (Step 6a,13a,14 in 186-1)
+    **  For counter - 0 to (4L-1) do
+    **
+    */
+    maxCount = L >= 1024 ? (4 * L - 1) : 4095;
+    for (counter = 0; counter <= maxCount; counter++) {
+        /* ******************************************************************
+    ** Step 11.1  (Step 7 in 186-1)
+    ** "for j = 0 ... n let
+    **          V_j = HASH[(SEED + offset + j) mod 2**seedlen]."
+    **
+    ** Step 11.2 (Step 8 in 186-1)
+    ** "W = V_0 + V_1*2**outlen+...+ V_n-1 * 2**((n-1)*outlen) +
+    **                               ((Vn* mod 2**b)*2**(n*outlen))"
+    ** Step 11.3 (Step 8 in 186-1)
+    ** "X = W + 2**(L-1)
+    **  Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L."
+    **
+    ** Step 11.4 (Step 9 in 186-1).
+    ** "c = X mod 2q"
+    **
+    ** Step 11.5 (Step 9 in 186-1).
+    ** " p = X - (c - 1).
+    **  Note that p is congruent to 1 mod 2q."
+    */
+        CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, seedlen,
+                                       seed, &Q, &P));
+        /*************************************************************
+    ** Step 11.6. (Step 10 in 186-1)
+    ** "if p < 2**(L-1), then goto step 11.9. (step 13 in 186-1)"
+    */
+        CHECK_MPI_OK(mpl_set_bit(&l, (mp_size)(L - 1), 1)); /* l = 2**(L-1) */
+        if (mp_cmp(&P, &l) < 0)
+            goto step_11_9;
+        /************************************************************
+    ** Step 11.7 (step 11 in 186-1)
+    ** "Perform a robust primality test on p."
+    */
+        /*CHECK_SEC_OK( prm_RabinTest(&P, &passed) );*/
+        err = mpp_pprime(&P, prime_testcount_p(L, N));
+        passed = (err == MP_YES) ? SECSuccess : SECFailure;
+        /* ******************************************************************
+    ** Step 11.8. "If p is determined to be primed return VALID
+        ** values of p, q, seed and counter."
+    */
+        if (passed == SECSuccess)
+            break;
+    step_11_9:
+        /* ******************************************************************
+    ** Step 11.9.  "offset = offset + n + 1."
+    */
+        offset += n + 1;
+    }
+    /* ******************************************************************
+    ** Step 12.  "goto step 5."
+    **
+    ** NOTE: if counter <= maxCount, then we exited the loop at Step 11.8
+    ** and now need to return p,q, seed, and counter.
+    */
+    if (counter > maxCount)
+        goto step_5;
+
+generate_G:
+    /* ******************************************************************
+    ** returning p, q, seed and counter
+    */
+    if (type == FIPS186_1_TYPE) {
+        /* Generate g, This is called the "Unverifiable Generation of g
+     * in FIPA186-3 Appedix A.2.1. For compatibility we maintain
+     * this version of the code */
+        SECITEM_AllocItem(NULL, &hit, L / 8); /* h is no longer than p */
+        if (!hit.data)
+            goto cleanup;
+        do {
+            /* loop generate h until 1<h<p-1 and (h**[(p-1)/q])mod p > 1 */
+            CHECK_SEC_OK(generate_h_candidate(&hit, &H));
+            CHECK_SEC_OK(makeGfromH(&P, &Q, &H, &G, &passed));
+        } while (passed != PR_TRUE);
+        MPINT_TO_SECITEM(&H, &verify->h, verify->arena);
+    } else {
+        unsigned char index = 1; /* default to 1 */
+        verify->h.data = (unsigned char *)PORT_ArenaZAlloc(verify->arena, 1);
+        if (verify->h.data == NULL) {
+            goto cleanup;
+        }
+        verify->h.len = 1;
+        verify->h.data[0] = index;
+        /* Generate g, using the FIPS 186-3 Appendix A.23 */
+        CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, seed, index, &G));
+    }
+    /* All generation is done.  Now, save the PQG params.  */
+    MPINT_TO_SECITEM(&P, &params->prime, params->arena);
+    MPINT_TO_SECITEM(&Q, &params->subPrime, params->arena);
+    MPINT_TO_SECITEM(&G, &params->base, params->arena);
+    verify->counter = counter;
+    *pParams = params;
+    *pVfy = verify;
+cleanup:
+    if (pseed.data) {
+        PORT_Free(pseed.data);
+    }
+    if (qseed.data) {
+        PORT_Free(qseed.data);
+    }
+    mp_clear(&P);
+    mp_clear(&Q);
+    mp_clear(&G);
+    mp_clear(&H);
+    mp_clear(&l);
+    mp_clear(&p0);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    if (rv) {
+        if (params) {
+            PORT_FreeArena(params->arena, PR_TRUE);
+        }
+        if (verify) {
+            PORT_FreeArena(verify->arena, PR_TRUE);
+        }
+    }
+    if (hit.data) {
+        SECITEM_FreeItem(&hit, PR_FALSE);
+    }
+    return rv;
+}
+
+SECStatus
+PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy)
+{
+    unsigned int L; /* Length of P in bits.  Per FIPS 186. */
+    unsigned int seedBytes;
+
+    if (j > 8 || !pParams || !pVfy) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    L = 512 + (j * 64); /* bits in P */
+    seedBytes = L / 8;
+    return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes,
+                        pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes,
+                    PQGParams **pParams, PQGVerify **pVfy)
+{
+    unsigned int L; /* Length of P in bits.  Per FIPS 186. */
+
+    if (j > 8 || !pParams || !pVfy) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    L = 512 + (j * 64); /* bits in P */
+    return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes,
+                        pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes,
+               PQGParams **pParams, PQGVerify **pVfy)
+{
+    if (N == 0) {
+        N = pqg_get_default_N(L);
+    }
+    if (seedBytes == 0) {
+        /* seedBytes == L/8 for probable primes, N/8 for Shawe-Taylor Primes */
+        seedBytes = N / 8;
+    }
+    if (pqg_validate_dsa2(L, N) != SECSuccess) {
+        /* error code already set */
+        return SECFailure;
+    }
+    return pqg_ParamGen(L, N, FIPS186_3_ST_TYPE, seedBytes, pParams, pVfy);
+}
+
+/*
+ * verify can use vfy structures returned from either FIPS186-1 or
+ * FIPS186-2, and can handle differences in selected Hash functions to
+ * generate the parameters.
+ */
+SECStatus
+PQG_VerifyParams(const PQGParams *params,
+                 const PQGVerify *vfy, SECStatus *result)
+{
+    SECStatus rv = SECSuccess;
+    unsigned int g, n, L, N, offset, outlen;
+    mp_int p0, P, Q, G, P_, Q_, G_, r, h;
+    mp_err err = MP_OKAY;
+    int j;
+    unsigned int counter_max = 0; /* handle legacy L < 1024 */
+    unsigned int qseed_len;
+    SECItem pseed_ = { 0, 0, 0 };
+    HASH_HashType hashtype;
+    pqgGenType type;
+
+#define CHECKPARAM(cond)      \
+    if (!(cond)) {            \
+        *result = SECFailure; \
+        goto cleanup;         \
+    }
+    if (!params || !vfy || !result) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    /* always need at least p, q, and seed for any meaningful check */
+    if ((params->prime.len == 0) || (params->subPrime.len == 0) ||
+        (vfy->seed.len == 0)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    /* we want to either check PQ or G or both. If we don't have G, make
+     * sure we have count so we can check P. */
+    if ((params->base.len == 0) && (vfy->counter == -1)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    MP_DIGITS(&p0) = 0;
+    MP_DIGITS(&P) = 0;
+    MP_DIGITS(&Q) = 0;
+    MP_DIGITS(&G) = 0;
+    MP_DIGITS(&P_) = 0;
+    MP_DIGITS(&Q_) = 0;
+    MP_DIGITS(&G_) = 0;
+    MP_DIGITS(&r) = 0;
+    MP_DIGITS(&h) = 0;
+    CHECK_MPI_OK(mp_init(&p0));
+    CHECK_MPI_OK(mp_init(&P));
+    CHECK_MPI_OK(mp_init(&Q));
+    CHECK_MPI_OK(mp_init(&G));
+    CHECK_MPI_OK(mp_init(&P_));
+    CHECK_MPI_OK(mp_init(&Q_));
+    CHECK_MPI_OK(mp_init(&G_));
+    CHECK_MPI_OK(mp_init(&r));
+    CHECK_MPI_OK(mp_init(&h));
+    *result = SECSuccess;
+    SECITEM_TO_MPINT(params->prime, &P);
+    SECITEM_TO_MPINT(params->subPrime, &Q);
+    /* if G isn't specified, just check P and Q */
+    if (params->base.len != 0) {
+        SECITEM_TO_MPINT(params->base, &G);
+    }
+    /* 1.  Check (L,N) pair */
+    N = mpl_significant_bits(&Q);
+    L = mpl_significant_bits(&P);
+    if (L < 1024) {
+        /* handle DSA1 pqg parameters with less thatn 1024 bits*/
+        CHECKPARAM(N == DSA1_Q_BITS);
+        j = PQG_PBITS_TO_INDEX(L);
+        CHECKPARAM(j >= 0 && j <= 8);
+        counter_max = 4096;
+    } else {
+        /* handle DSA2 parameters (includes DSA1, 1024 bits) */
+        CHECKPARAM(pqg_validate_dsa2(L, N) == SECSuccess);
+        counter_max = 4 * L;
+    }
+    /* 3.  G < P */
+    if (params->base.len != 0) {
+        CHECKPARAM(mp_cmp(&G, &P) < 0);
+    }
+    /* 4.  P % Q == 1 */
+    CHECK_MPI_OK(mp_mod(&P, &Q, &r));
+    CHECKPARAM(mp_cmp_d(&r, 1) == 0);
+    /* 5.  Q is prime */
+    CHECKPARAM(mpp_pprime(&Q, prime_testcount_q(L, N)) == MP_YES);
+    /* 6.  P is prime */
+    CHECKPARAM(mpp_pprime(&P, prime_testcount_p(L, N)) == MP_YES);
+    /* Steps 7-12 are done only if the optional PQGVerify is supplied. */
+    /* continue processing P */
+    /* 7.  counter < 4*L */
+    CHECKPARAM((vfy->counter == -1) || (vfy->counter < counter_max));
+    /* 8.  g >= N and g < 2*L   (g is length of seed in bits) */
+    g = vfy->seed.len * 8;
+    CHECKPARAM(g >= N && g < counter_max / 2);
+    /* 9.  Q generated from SEED matches Q in PQGParams. */
+    /* This function checks all possible hash and generation types to
+     * find a Q_ which matches Q. */
+    CHECKPARAM(findQfromSeed(L, N, g, &vfy->seed, &Q, &Q_, &qseed_len,
+                             &hashtype, &type) == SECSuccess);
+    CHECKPARAM(mp_cmp(&Q, &Q_) == 0);
+    if (type == FIPS186_3_ST_TYPE) {
+        SECItem qseed = { 0, 0, 0 };
+        SECItem pseed = { 0, 0, 0 };
+        unsigned int first_seed_len;
+        unsigned int pgen_counter = 0;
+
+        /* extract pseed and qseed from domain_parameter_seed, which is
+         * first_seed || pseed || qseed. qseed is first_seed + small_integer
+         * pseed is qseed + small_integer. This means most of the time
+         * first_seed.len == qseed.len == pseed.len. Rarely qseed.len and/or
+         * pseed.len will be one greater than first_seed.len, so we can
+         * depend on the fact that
+         *   first_seed.len = floor(domain_parameter_seed.len/3).
+         * findQfromSeed returned qseed.len, so we can calculate pseed.len as
+         *   pseed.len = domain_parameter_seed.len - first_seed.len - qseed.len
+         * this is probably over kill, since 99.999% of the time they will all
+         * be equal.
+         *
+         * With the lengths, we can now find the offsets;
+         * first_seed.data = domain_parameter_seed.data + 0
+         * pseed.data = domain_parameter_seed.data + first_seed.len
+         * qseed.data = domain_parameter_seed.data
+         *         + domain_paramter_seed.len - qseed.len
+         *
+         */
+        first_seed_len = vfy->seed.len / 3;
+        CHECKPARAM(qseed_len < vfy->seed.len);
+        CHECKPARAM(first_seed_len * 8 > N - 1);
+        CHECKPARAM(first_seed_len + qseed_len < vfy->seed.len);
+        qseed.len = qseed_len;
+        qseed.data = vfy->seed.data + vfy->seed.len - qseed.len;
+        pseed.len = vfy->seed.len - (first_seed_len + qseed_len);
+        pseed.data = vfy->seed.data + first_seed_len;
+
+        /*
+         * now complete FIPS 186-3 A.1.2.1.2. Step 1 was completed
+         * above in our initial checks, Step 2 was completed by
+         * findQfromSeed */
+
+        /* Step 3 (status, c0, prime_seed, prime_gen_counter) =
+        ** (ST_Random_Prime((ceil(length/2)+1, input_seed)
+        */
+        CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1,
+                                                  &qseed, &p0, &pseed_, &pgen_counter));
+        /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */
+        CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L,
+                                                    &p0, &Q_, &P_, &pseed_, &pgen_counter));
+        CHECKPARAM(mp_cmp(&P, &P_) == 0);
+        /* make sure pseed wasn't tampered with (since it is part of
+         * calculating G) */
+        CHECKPARAM(SECITEM_CompareItem(&pseed, &pseed_) == SECEqual);
+    } else if (vfy->counter == -1) {
+        /* If counter is set to -1, we are really only verifying G, skip
+         * the remainder of the checks for P */
+        CHECKPARAM(type != FIPS186_1_TYPE); /* we only do this for DSA2 */
+    } else {
+        /* 10. P generated from (L, counter, g, SEED, Q) matches P
+         * in PQGParams. */
+        outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE;
+        n = (L - 1) / outlen;
+        offset = vfy->counter * (n + 1) + ((type == FIPS186_1_TYPE) ? 2 : 1);
+        CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, g, &vfy->seed,
+                                       &Q, &P_));
+        CHECKPARAM(mp_cmp(&P, &P_) == 0);
+    }
+
+    /* now check G, skip if don't have a g */
+    if (params->base.len == 0)
+        goto cleanup;
+
+    /* first Always check that G is OK  FIPS186-3 A.2.2  & A.2.4*/
+    /* 1. 2 < G < P-1 */
+    /* P is prime, p-1 == zero 1st bit */
+    CHECK_MPI_OK(mpl_set_bit(&P, 0, 0));
+    CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P) < 0);
+    CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */
+    /* 2. verify g**q mod p == 1 */
+    CHECK_MPI_OK(mp_exptmod(&G, &Q, &P, &h)); /* h = G ** Q mod P */
+    CHECKPARAM(mp_cmp_d(&h, 1) == 0);
+
+    /* no h, the above is the best we can do */
+    if (vfy->h.len == 0) {
+        if (type != FIPS186_1_TYPE) {
+            *result = SECWouldBlock;
+        }
+        goto cleanup;
+    }
+
+    /*
+     * If h is one byte and FIPS186-3 was used to generate Q (we've verified
+     * Q was generated from seed already, then we assume that FIPS 186-3
+     * appendix A.2.3 was used to generate G. Otherwise we assume A.2.1 was
+     * used to generate G.
+     */
+    if ((vfy->h.len == 1) && (type != FIPS186_1_TYPE)) {
+        /* A.2.3 */
+        CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &vfy->seed,
+                                    vfy->h.data[0], &G_));
+        CHECKPARAM(mp_cmp(&G, &G_) == 0);
+    } else {
+        int passed;
+        /* A.2.1 */
+        SECITEM_TO_MPINT(vfy->h, &h);
+        /* 11. 1 < h < P-1 */
+        /* P is prime, p-1 == zero 1st bit */
+        CHECK_MPI_OK(mpl_set_bit(&P, 0, 0));
+        CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P));
+        CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */
+                                             /* 12. G generated from h matches G in PQGParams. */
+        CHECK_SEC_OK(makeGfromH(&P, &Q, &h, &G_, &passed));
+        CHECKPARAM(passed && mp_cmp(&G, &G_) == 0);
+    }
+cleanup:
+    mp_clear(&p0);
+    mp_clear(&P);
+    mp_clear(&Q);
+    mp_clear(&G);
+    mp_clear(&P_);
+    mp_clear(&Q_);
+    mp_clear(&G_);
+    mp_clear(&r);
+    mp_clear(&h);
+    if (pseed_.data) {
+        SECITEM_FreeItem(&pseed_, PR_FALSE);
+    }
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/**************************************************************************
+ *  Free the PQGParams struct and the things it points to.                *
+ **************************************************************************/
+void
+PQG_DestroyParams(PQGParams *params)
+{
+    if (params == NULL)
+        return;
+    if (params->arena != NULL) {
+        PORT_FreeArena(params->arena, PR_FALSE); /* don't zero it */
+    } else {
+        SECITEM_FreeItem(&params->prime, PR_FALSE);    /* don't free prime */
+        SECITEM_FreeItem(&params->subPrime, PR_FALSE); /* don't free subPrime */
+        SECITEM_FreeItem(&params->base, PR_FALSE);     /* don't free base */
+        PORT_Free(params);
+    }
+}
+
+/**************************************************************************
+ *  Free the PQGVerify struct and the things it points to.                *
+ **************************************************************************/
+
+void
+PQG_DestroyVerify(PQGVerify *vfy)
+{
+    if (vfy == NULL)
+        return;
+    if (vfy->arena != NULL) {
+        PORT_FreeArena(vfy->arena, PR_FALSE); /* don't zero it */
+    } else {
+        SECITEM_FreeItem(&vfy->seed, PR_FALSE); /* don't free seed */
+        SECITEM_FreeItem(&vfy->h, PR_FALSE);    /* don't free h */
+        PORT_Free(vfy);
+    }
+}
diff --git a/security/nss/lib/freebl/pqg.h b/security/nss/lib/freebl/pqg.h
new file mode 100644
index 000000000..c4eecd590
--- /dev/null
+++ b/security/nss/lib/freebl/pqg.h
@@ -0,0 +1,25 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ *  pqg.h
+ *
+ *  header file for pqg functions exported just to freebl
+ */
+
+#ifndef _PQG_H_
+#define _PQG_H_ 1
+
+/* PQG_GetLength returns the significant bytes in the SECItem object (that is
+ * the length of the object minus any leading zeros. Any SECItem may be used,
+ * though this function is usually used for P, Q, or G values */
+unsigned int PQG_GetLength(const SECItem *obj);
+/* Check to see the PQG parameters patch a NIST defined DSA size,
+ * returns SECFaillure and sets SEC_ERROR_INVALID_ARGS if it doesn't.
+ * See blapi.h for legal DSA PQG sizes. */
+SECStatus PQG_Check(const PQGParams *params);
+/* Return the prefered hash algorithm for the given PQGParameters. */
+HASH_HashType PQG_GetHashType(const PQGParams *params);
+
+#endif /* _PQG_H_ */
diff --git a/security/nss/lib/freebl/rawhash.c b/security/nss/lib/freebl/rawhash.c
new file mode 100644
index 000000000..551727b89
--- /dev/null
+++ b/security/nss/lib/freebl/rawhash.c
@@ -0,0 +1,154 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "nspr.h"
+#include "hasht.h"
+#include "blapi.h" /* below the line */
+#include "secerr.h"
+
+static void *
+null_hash_new_context(void)
+{
+    return NULL;
+}
+
+static void *
+null_hash_clone_context(void *v)
+{
+    PORT_Assert(v == NULL);
+    return NULL;
+}
+
+static void
+null_hash_begin(void *v)
+{
+}
+
+static void
+null_hash_update(void *v, const unsigned char *input, unsigned int length)
+{
+}
+
+static void
+null_hash_end(void *v, unsigned char *output, unsigned int *outLen,
+              unsigned int maxOut)
+{
+    *outLen = 0;
+}
+
+static void
+null_hash_destroy_context(void *v, PRBool b)
+{
+    PORT_Assert(v == NULL);
+}
+
+const SECHashObject SECRawHashObjects[] = {
+    { 0,
+      (void *(*)(void))null_hash_new_context,
+      (void *(*)(void *))null_hash_clone_context,
+      (void (*)(void *, PRBool))null_hash_destroy_context,
+      (void (*)(void *))null_hash_begin,
+      (void (*)(void *, const unsigned char *, unsigned int))null_hash_update,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))null_hash_end,
+      0,
+      HASH_AlgNULL,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))null_hash_end },
+    {
+        MD2_LENGTH,
+        (void *(*)(void))MD2_NewContext,
+        (void *(*)(void *))null_hash_clone_context,
+        (void (*)(void *, PRBool))MD2_DestroyContext,
+        (void (*)(void *))MD2_Begin,
+        (void (*)(void *, const unsigned char *, unsigned int))MD2_Update,
+        (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD2_End,
+        MD2_BLOCK_LENGTH,
+        HASH_AlgMD2,
+        NULL /* end_raw */
+    },
+    { MD5_LENGTH,
+      (void *(*)(void))MD5_NewContext,
+      (void *(*)(void *))null_hash_clone_context,
+      (void (*)(void *, PRBool))MD5_DestroyContext,
+      (void (*)(void *))MD5_Begin,
+      (void (*)(void *, const unsigned char *, unsigned int))MD5_Update,
+      (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_End,
+      MD5_BLOCK_LENGTH,
+      HASH_AlgMD5,
+      (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_EndRaw },
+    { SHA1_LENGTH,
+      (void *(*)(void))SHA1_NewContext,
+      (void *(*)(void *))null_hash_clone_context,
+      (void (*)(void *, PRBool))SHA1_DestroyContext,
+      (void (*)(void *))SHA1_Begin,
+      (void (*)(void *, const unsigned char *, unsigned int))SHA1_Update,
+      (void (*)(void *, unsigned char *, unsigned int *, unsigned int))SHA1_End,
+      SHA1_BLOCK_LENGTH,
+      HASH_AlgSHA1,
+      (void (*)(void *, unsigned char *, unsigned int *, unsigned int))
+          SHA1_EndRaw },
+    { SHA256_LENGTH,
+      (void *(*)(void))SHA256_NewContext,
+      (void *(*)(void *))null_hash_clone_context,
+      (void (*)(void *, PRBool))SHA256_DestroyContext,
+      (void (*)(void *))SHA256_Begin,
+      (void (*)(void *, const unsigned char *, unsigned int))SHA256_Update,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))SHA256_End,
+      SHA256_BLOCK_LENGTH,
+      HASH_AlgSHA256,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))SHA256_EndRaw },
+    { SHA384_LENGTH,
+      (void *(*)(void))SHA384_NewContext,
+      (void *(*)(void *))null_hash_clone_context,
+      (void (*)(void *, PRBool))SHA384_DestroyContext,
+      (void (*)(void *))SHA384_Begin,
+      (void (*)(void *, const unsigned char *, unsigned int))SHA384_Update,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))SHA384_End,
+      SHA384_BLOCK_LENGTH,
+      HASH_AlgSHA384,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))SHA384_EndRaw },
+    { SHA512_LENGTH,
+      (void *(*)(void))SHA512_NewContext,
+      (void *(*)(void *))null_hash_clone_context,
+      (void (*)(void *, PRBool))SHA512_DestroyContext,
+      (void (*)(void *))SHA512_Begin,
+      (void (*)(void *, const unsigned char *, unsigned int))SHA512_Update,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))SHA512_End,
+      SHA512_BLOCK_LENGTH,
+      HASH_AlgSHA512,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))SHA512_EndRaw },
+    { SHA224_LENGTH,
+      (void *(*)(void))SHA224_NewContext,
+      (void *(*)(void *))null_hash_clone_context,
+      (void (*)(void *, PRBool))SHA224_DestroyContext,
+      (void (*)(void *))SHA224_Begin,
+      (void (*)(void *, const unsigned char *, unsigned int))SHA224_Update,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))SHA224_End,
+      SHA224_BLOCK_LENGTH,
+      HASH_AlgSHA224,
+      (void (*)(void *, unsigned char *, unsigned int *,
+                unsigned int))SHA224_EndRaw },
+};
+
+const SECHashObject *
+HASH_GetRawHashObject(HASH_HashType hashType)
+{
+    if (hashType <= HASH_AlgNULL || hashType >= HASH_AlgTOTAL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return NULL;
+    }
+    return &SECRawHashObjects[hashType];
+}
diff --git a/security/nss/lib/freebl/ret_cr16.s b/security/nss/lib/freebl/ret_cr16.s
new file mode 100644
index 000000000..1f53fc900
--- /dev/null
+++ b/security/nss/lib/freebl/ret_cr16.s
@@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef __LP64__
+        .LEVEL   2.0W
+#else
+        .LEVEL   1.1
+#endif
+
+	.CODE	; equivalent to the following two lines
+;       .SPACE   $TEXT$,SORT=8
+;       .SUBSPA  $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
+
+ret_cr16
+	.PROC
+	.CALLINFO 	FRAME=0, NO_CALLS
+	.EXPORT 	ret_cr16,ENTRY
+	.ENTRY
+;	BV		%r0(%rp)
+	BV		0(%rp)
+	MFCTL		%cr16,%ret0
+        BV %r0(%rp)
+        .EXIT
+        NOP
+        .PROCEND
+        .END
diff --git a/security/nss/lib/freebl/rijndael.c b/security/nss/lib/freebl/rijndael.c
new file mode 100644
index 000000000..4bb182693
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael.c
@@ -0,0 +1,1375 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prinit.h"
+#include "prenv.h"
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "rijndael.h"
+
+#include "cts.h"
+#include "ctr.h"
+#include "gcm.h"
+
+#ifdef USE_HW_AES
+#include "intel-aes.h"
+#endif
+
+#include "mpi.h"
+
+#ifdef USE_HW_AES
+static int has_intel_aes = 0;
+static PRBool use_hw_aes = PR_FALSE;
+
+#ifdef INTEL_GCM
+#include "intel-gcm.h"
+static int has_intel_avx = 0;
+static int has_intel_clmul = 0;
+static PRBool use_hw_gcm = PR_FALSE;
+#if defined(_MSC_VER) && !defined(_M_IX86)
+#include <intrin.h> /* for _xgetbv() */
+#endif
+#endif
+#endif /* USE_HW_AES */
+
+/*
+ * There are currently five ways to build this code, varying in performance
+ * and code size.
+ *
+ * RIJNDAEL_INCLUDE_TABLES         Include all tables from rijndael32.tab
+ * RIJNDAEL_GENERATE_TABLES        Generate tables on first
+ *                                 encryption/decryption, then store them;
+ *                                 use the function gfm
+ * RIJNDAEL_GENERATE_TABLES_MACRO  Same as above, but use macros to do
+ *                                 the generation
+ * RIJNDAEL_GENERATE_VALUES        Do not store tables, generate the table
+ *                                 values "on-the-fly", using gfm
+ * RIJNDAEL_GENERATE_VALUES_MACRO  Same as above, but use macros
+ *
+ * The default is RIJNDAEL_INCLUDE_TABLES.
+ */
+
+/*
+ * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4],
+ *                                                 T**-1[0..4], IMXC[0..4]
+ * When building anything else, includes S, S**-1, Rcon
+ */
+#include "rijndael32.tab"
+
+#if defined(RIJNDAEL_INCLUDE_TABLES)
+/*
+ * RIJNDAEL_INCLUDE_TABLES
+ */
+#define T0(i) _T0[i]
+#define T1(i) _T1[i]
+#define T2(i) _T2[i]
+#define T3(i) _T3[i]
+#define TInv0(i) _TInv0[i]
+#define TInv1(i) _TInv1[i]
+#define TInv2(i) _TInv2[i]
+#define TInv3(i) _TInv3[i]
+#define IMXC0(b) _IMXC0[b]
+#define IMXC1(b) _IMXC1[b]
+#define IMXC2(b) _IMXC2[b]
+#define IMXC3(b) _IMXC3[b]
+/* The S-box can be recovered from the T-tables */
+#ifdef IS_LITTLE_ENDIAN
+#define SBOX(b) ((PRUint8)_T3[b])
+#else
+#define SBOX(b) ((PRUint8)_T1[b])
+#endif
+#define SINV(b) (_SInv[b])
+
+#else /* not RIJNDAEL_INCLUDE_TABLES */
+
+/*
+ * Code for generating T-table values.
+ */
+
+#ifdef IS_LITTLE_ENDIAN
+#define WORD4(b0, b1, b2, b3) \
+    ((((PRUint32)b3) << 24) | \
+     (((PRUint32)b2) << 16) | \
+     (((PRUint32)b1) << 8) |  \
+     ((PRUint32)b0))
+#else
+#define WORD4(b0, b1, b2, b3) \
+    ((((PRUint32)b0) << 24) | \
+     (((PRUint32)b1) << 16) | \
+     (((PRUint32)b2) << 8) |  \
+     ((PRUint32)b3))
+#endif
+
+/*
+ * Define the S and S**-1 tables (both have been stored)
+ */
+#define SBOX(b) (_S[b])
+#define SINV(b) (_SInv[b])
+
+/*
+ * The function xtime, used for Galois field multiplication
+ */
+#define XTIME(a) \
+    ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))
+
+/* Choose GFM method (macros or function) */
+#if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \
+    defined(RIJNDAEL_GENERATE_VALUES_MACRO)
+
+/*
+ * Galois field GF(2**8) multipliers, in macro form
+ */
+#define GFM01(a) \
+    (a) /* a * 01 = a, the identity */
+#define GFM02(a) \
+    (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
+#define GFM04(a) \
+    (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */
+#define GFM08(a) \
+    (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */
+#define GFM03(a) \
+    (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */
+#define GFM09(a) \
+    (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */
+#define GFM0B(a) \
+    (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */
+#define GFM0D(a) \
+    (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */
+#define GFM0E(a) \
+    (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */
+
+#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */
+
+/* GF_MULTIPLY
+ *
+ * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
+ */
+PRUint8
+gfm(PRUint8 a, PRUint8 b)
+{
+    PRUint8 res = 0;
+    while (b > 0) {
+        res = (b & 0x01) ? res ^ a : res;
+        a = XTIME(a);
+        b >>= 1;
+    }
+    return res;
+}
+
+#define GFM01(a) \
+    (a) /* a * 01 = a, the identity */
+#define GFM02(a) \
+    (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
+#define GFM03(a) \
+    (gfm(a, 0x03)) /* a * 03 */
+#define GFM09(a) \
+    (gfm(a, 0x09)) /* a * 09 */
+#define GFM0B(a) \
+    (gfm(a, 0x0B)) /* a * 0B */
+#define GFM0D(a) \
+    (gfm(a, 0x0D)) /* a * 0D */
+#define GFM0E(a) \
+    (gfm(a, 0x0E)) /* a * 0E */
+
+#endif /* choosing GFM function */
+
+/*
+ * The T-tables
+ */
+#define G_T0(i) \
+    (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i))))
+#define G_T1(i) \
+    (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i))))
+#define G_T2(i) \
+    (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i))))
+#define G_T3(i) \
+    (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i))))
+
+/*
+ * The inverse T-tables
+ */
+#define G_TInv0(i) \
+    (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i))))
+#define G_TInv1(i) \
+    (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i))))
+#define G_TInv2(i) \
+    (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i))))
+#define G_TInv3(i) \
+    (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i))))
+
+/*
+ * The inverse mix column tables
+ */
+#define G_IMXC0(i) \
+    (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i)))
+#define G_IMXC1(i) \
+    (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i)))
+#define G_IMXC2(i) \
+    (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i)))
+#define G_IMXC3(i) \
+    (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i)))
+
+/* Now choose the T-table indexing method */
+#if defined(RIJNDAEL_GENERATE_VALUES)
+/* generate values for the tables with a function*/
+static PRUint32
+gen_TInvXi(PRUint8 tx, PRUint8 i)
+{
+    PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
+    si01 = SINV(i);
+    si02 = XTIME(si01);
+    si04 = XTIME(si02);
+    si08 = XTIME(si04);
+    si03 = si02 ^ si01;
+    si09 = si08 ^ si01;
+    si0B = si08 ^ si03;
+    si0D = si09 ^ si04;
+    si0E = si08 ^ si04 ^ si02;
+    switch (tx) {
+        case 0:
+            return WORD4(si0E, si09, si0D, si0B);
+        case 1:
+            return WORD4(si0B, si0E, si09, si0D);
+        case 2:
+            return WORD4(si0D, si0B, si0E, si09);
+        case 3:
+            return WORD4(si09, si0D, si0B, si0E);
+    }
+    return -1;
+}
+#define T0(i) G_T0(i)
+#define T1(i) G_T1(i)
+#define T2(i) G_T2(i)
+#define T3(i) G_T3(i)
+#define TInv0(i) gen_TInvXi(0, i)
+#define TInv1(i) gen_TInvXi(1, i)
+#define TInv2(i) gen_TInvXi(2, i)
+#define TInv3(i) gen_TInvXi(3, i)
+#define IMXC0(b) G_IMXC0(b)
+#define IMXC1(b) G_IMXC1(b)
+#define IMXC2(b) G_IMXC2(b)
+#define IMXC3(b) G_IMXC3(b)
+#elif defined(RIJNDAEL_GENERATE_VALUES_MACRO)
+/* generate values for the tables with macros */
+#define T0(i) G_T0(i)
+#define T1(i) G_T1(i)
+#define T2(i) G_T2(i)
+#define T3(i) G_T3(i)
+#define TInv0(i) G_TInv0(i)
+#define TInv1(i) G_TInv1(i)
+#define TInv2(i) G_TInv2(i)
+#define TInv3(i) G_TInv3(i)
+#define IMXC0(b) G_IMXC0(b)
+#define IMXC1(b) G_IMXC1(b)
+#define IMXC2(b) G_IMXC2(b)
+#define IMXC3(b) G_IMXC3(b)
+#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */
+/* Generate T and T**-1 table values and store, then index */
+/* The inverse mix column tables are still generated */
+#define T0(i) rijndaelTables->T0[i]
+#define T1(i) rijndaelTables->T1[i]
+#define T2(i) rijndaelTables->T2[i]
+#define T3(i) rijndaelTables->T3[i]
+#define TInv0(i) rijndaelTables->TInv0[i]
+#define TInv1(i) rijndaelTables->TInv1[i]
+#define TInv2(i) rijndaelTables->TInv2[i]
+#define TInv3(i) rijndaelTables->TInv3[i]
+#define IMXC0(b) G_IMXC0(b)
+#define IMXC1(b) G_IMXC1(b)
+#define IMXC2(b) G_IMXC2(b)
+#define IMXC3(b) G_IMXC3(b)
+#endif /* choose T-table indexing method */
+
+#endif /* not RIJNDAEL_INCLUDE_TABLES */
+
+#if defined(RIJNDAEL_GENERATE_TABLES) || \
+    defined(RIJNDAEL_GENERATE_TABLES_MACRO)
+
+/* Code to generate and store the tables */
+
+struct rijndael_tables_str {
+    PRUint32 T0[256];
+    PRUint32 T1[256];
+    PRUint32 T2[256];
+    PRUint32 T3[256];
+    PRUint32 TInv0[256];
+    PRUint32 TInv1[256];
+    PRUint32 TInv2[256];
+    PRUint32 TInv3[256];
+};
+
+static struct rijndael_tables_str *rijndaelTables = NULL;
+static PRCallOnceType coRTInit = { 0, 0, 0 };
+static PRStatus
+init_rijndael_tables(void)
+{
+    PRUint32 i;
+    PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
+    struct rijndael_tables_str *rts;
+    rts = (struct rijndael_tables_str *)
+        PORT_Alloc(sizeof(struct rijndael_tables_str));
+    if (!rts)
+        return PR_FAILURE;
+    for (i = 0; i < 256; i++) {
+        /* The forward values */
+        si01 = SBOX(i);
+        si02 = XTIME(si01);
+        si03 = si02 ^ si01;
+        rts->T0[i] = WORD4(si02, si01, si01, si03);
+        rts->T1[i] = WORD4(si03, si02, si01, si01);
+        rts->T2[i] = WORD4(si01, si03, si02, si01);
+        rts->T3[i] = WORD4(si01, si01, si03, si02);
+        /* The inverse values */
+        si01 = SINV(i);
+        si02 = XTIME(si01);
+        si04 = XTIME(si02);
+        si08 = XTIME(si04);
+        si03 = si02 ^ si01;
+        si09 = si08 ^ si01;
+        si0B = si08 ^ si03;
+        si0D = si09 ^ si04;
+        si0E = si08 ^ si04 ^ si02;
+        rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B);
+        rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D);
+        rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09);
+        rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E);
+    }
+    /* wait until all the values are in to set */
+    rijndaelTables = rts;
+    return PR_SUCCESS;
+}
+
+#endif /* code to generate tables */
+
+/**************************************************************************
+ *
+ * Stuff related to the Rijndael key schedule
+ *
+ *************************************************************************/
+
+#define SUBBYTE(w)                                \
+    ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \
+     (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \
+     (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) |   \
+     (((PRUint32)SBOX((w)&0xff))))
+
+#ifdef IS_LITTLE_ENDIAN
+#define ROTBYTE(b) \
+    ((b >> 8) | (b << 24))
+#else
+#define ROTBYTE(b) \
+    ((b << 8) | (b >> 24))
+#endif
+
+/* rijndael_key_expansion7
+ *
+ * Generate the expanded key from the key input by the user.
+ * XXX
+ * Nk == 7 (224 key bits) is a weird case.  Since Nk > 6, an added SubByte
+ * transformation is done periodically.  The period is every 4 bytes, and
+ * since 7%4 != 0 this happens at different times for each key word (unlike
+ * Nk == 8 where it happens twice in every key word, in the same positions).
+ * For now, I'm implementing this case "dumbly", w/o any unrolling.
+ */
+static SECStatus
+rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+    unsigned int i;
+    PRUint32 *W;
+    PRUint32 *pW;
+    PRUint32 tmp;
+    W = cx->expandedKey;
+    /* 1.  the first Nk words contain the cipher key */
+    memcpy(W, key, Nk * 4);
+    i = Nk;
+    /* 2.  loop until full expanded key is obtained */
+    pW = W + i - 1;
+    for (; i < cx->Nb * (cx->Nr + 1); ++i) {
+        tmp = *pW++;
+        if (i % Nk == 0)
+            tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+        else if (i % Nk == 4)
+            tmp = SUBBYTE(tmp);
+        *pW = W[i - Nk] ^ tmp;
+    }
+    return SECSuccess;
+}
+
+/* rijndael_key_expansion
+ *
+ * Generate the expanded key from the key input by the user.
+ */
+static SECStatus
+rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+    unsigned int i;
+    PRUint32 *W;
+    PRUint32 *pW;
+    PRUint32 tmp;
+    unsigned int round_key_words = cx->Nb * (cx->Nr + 1);
+    if (Nk == 7)
+        return rijndael_key_expansion7(cx, key, Nk);
+    W = cx->expandedKey;
+    /* The first Nk words contain the input cipher key */
+    memcpy(W, key, Nk * 4);
+    i = Nk;
+    pW = W + i - 1;
+    /* Loop over all sets of Nk words, except the last */
+    while (i < round_key_words - Nk) {
+        tmp = *pW++;
+        tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+        *pW = W[i++ - Nk] ^ tmp;
+        tmp = *pW++;
+        *pW = W[i++ - Nk] ^ tmp;
+        tmp = *pW++;
+        *pW = W[i++ - Nk] ^ tmp;
+        tmp = *pW++;
+        *pW = W[i++ - Nk] ^ tmp;
+        if (Nk == 4)
+            continue;
+        switch (Nk) {
+            case 8:
+                tmp = *pW++;
+                tmp = SUBBYTE(tmp);
+                *pW = W[i++ - Nk] ^ tmp;
+            case 7:
+                tmp = *pW++;
+                *pW = W[i++ - Nk] ^ tmp;
+            case 6:
+                tmp = *pW++;
+                *pW = W[i++ - Nk] ^ tmp;
+            case 5:
+                tmp = *pW++;
+                *pW = W[i++ - Nk] ^ tmp;
+        }
+    }
+    /* Generate the last word */
+    tmp = *pW++;
+    tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+    *pW = W[i++ - Nk] ^ tmp;
+    /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0.  However,
+     * since the above loop generated all but the last Nk key words, there
+     * is no more need for the SubByte transformation.
+     */
+    if (Nk < 8) {
+        for (; i < round_key_words; ++i) {
+            tmp = *pW++;
+            *pW = W[i - Nk] ^ tmp;
+        }
+    } else {
+        /* except in the case when Nk == 8.  Then one more SubByte may have
+         * to be performed, at i % Nk == 4.
+         */
+        for (; i < round_key_words; ++i) {
+            tmp = *pW++;
+            if (i % Nk == 4)
+                tmp = SUBBYTE(tmp);
+            *pW = W[i - Nk] ^ tmp;
+        }
+    }
+    return SECSuccess;
+}
+
+/* rijndael_invkey_expansion
+ *
+ * Generate the expanded key for the inverse cipher from the key input by
+ * the user.
+ */
+static SECStatus
+rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+    unsigned int r;
+    PRUint32 *roundkeyw;
+    PRUint8 *b;
+    int Nb = cx->Nb;
+    /* begins like usual key expansion ... */
+    if (rijndael_key_expansion(cx, key, Nk) != SECSuccess)
+        return SECFailure;
+    /* ... but has the additional step of InvMixColumn,
+     * excepting the first and last round keys.
+     */
+    roundkeyw = cx->expandedKey + cx->Nb;
+    for (r = 1; r < cx->Nr; ++r) {
+        /* each key word, roundkeyw, represents a column in the key
+         * matrix.  Each column is multiplied by the InvMixColumn matrix.
+         *   [ 0E 0B 0D 09 ]   [ b0 ]
+         *   [ 09 0E 0B 0D ] * [ b1 ]
+         *   [ 0D 09 0E 0B ]   [ b2 ]
+         *   [ 0B 0D 09 0E ]   [ b3 ]
+         */
+        b = (PRUint8 *)roundkeyw;
+        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+        b = (PRUint8 *)roundkeyw;
+        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+        b = (PRUint8 *)roundkeyw;
+        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+        b = (PRUint8 *)roundkeyw;
+        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+        if (Nb <= 4)
+            continue;
+        switch (Nb) {
+            case 8:
+                b = (PRUint8 *)roundkeyw;
+                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+                               IMXC2(b[2]) ^ IMXC3(b[3]);
+            case 7:
+                b = (PRUint8 *)roundkeyw;
+                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+                               IMXC2(b[2]) ^ IMXC3(b[3]);
+            case 6:
+                b = (PRUint8 *)roundkeyw;
+                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+                               IMXC2(b[2]) ^ IMXC3(b[3]);
+            case 5:
+                b = (PRUint8 *)roundkeyw;
+                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+                               IMXC2(b[2]) ^ IMXC3(b[3]);
+        }
+    }
+    return SECSuccess;
+}
+/**************************************************************************
+ *
+ * Stuff related to Rijndael encryption/decryption, optimized for
+ * a 128-bit blocksize.
+ *
+ *************************************************************************/
+
+#ifdef IS_LITTLE_ENDIAN
+#define BYTE0WORD(w) ((w)&0x000000ff)
+#define BYTE1WORD(w) ((w)&0x0000ff00)
+#define BYTE2WORD(w) ((w)&0x00ff0000)
+#define BYTE3WORD(w) ((w)&0xff000000)
+#else
+#define BYTE0WORD(w) ((w)&0xff000000)
+#define BYTE1WORD(w) ((w)&0x00ff0000)
+#define BYTE2WORD(w) ((w)&0x0000ff00)
+#define BYTE3WORD(w) ((w)&0x000000ff)
+#endif
+
+typedef union {
+    PRUint32 w[4];
+    PRUint8 b[16];
+} rijndael_state;
+
+#define COLUMN_0(state) state.w[0]
+#define COLUMN_1(state) state.w[1]
+#define COLUMN_2(state) state.w[2]
+#define COLUMN_3(state) state.w[3]
+
+#define STATE_BYTE(i) state.b[i]
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rijndael_encryptBlock128(AESContext *cx,
+                         unsigned char *output,
+                         const unsigned char *input)
+{
+    unsigned int r;
+    PRUint32 *roundkeyw;
+    rijndael_state state;
+    PRUint32 C0, C1, C2, C3;
+#if defined(NSS_X86_OR_X64)
+#define pIn input
+#define pOut output
+#else
+    unsigned char *pIn, *pOut;
+    PRUint32 inBuf[4], outBuf[4];
+
+    if ((ptrdiff_t)input & 0x3) {
+        memcpy(inBuf, input, sizeof inBuf);
+        pIn = (unsigned char *)inBuf;
+    } else {
+        pIn = (unsigned char *)input;
+    }
+    if ((ptrdiff_t)output & 0x3) {
+        pOut = (unsigned char *)outBuf;
+    } else {
+        pOut = (unsigned char *)output;
+    }
+#endif
+    roundkeyw = cx->expandedKey;
+    /* Step 1: Add Round Key 0 to initial state */
+    COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++;
+    COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++;
+    COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++;
+    COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;
+    /* Step 2: Loop over rounds [1..NR-1] */
+    for (r = 1; r < cx->Nr; ++r) {
+        /* Do ShiftRow, ByteSub, and MixColumn all at once */
+        C0 = T0(STATE_BYTE(0)) ^
+             T1(STATE_BYTE(5)) ^
+             T2(STATE_BYTE(10)) ^
+             T3(STATE_BYTE(15));
+        C1 = T0(STATE_BYTE(4)) ^
+             T1(STATE_BYTE(9)) ^
+             T2(STATE_BYTE(14)) ^
+             T3(STATE_BYTE(3));
+        C2 = T0(STATE_BYTE(8)) ^
+             T1(STATE_BYTE(13)) ^
+             T2(STATE_BYTE(2)) ^
+             T3(STATE_BYTE(7));
+        C3 = T0(STATE_BYTE(12)) ^
+             T1(STATE_BYTE(1)) ^
+             T2(STATE_BYTE(6)) ^
+             T3(STATE_BYTE(11));
+        /* Round key addition */
+        COLUMN_0(state) = C0 ^ *roundkeyw++;
+        COLUMN_1(state) = C1 ^ *roundkeyw++;
+        COLUMN_2(state) = C2 ^ *roundkeyw++;
+        COLUMN_3(state) = C3 ^ *roundkeyw++;
+    }
+    /* Step 3: Do the last round */
+    /* Final round does not employ MixColumn */
+    C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) |
+          (BYTE1WORD(T3(STATE_BYTE(5)))) |
+          (BYTE2WORD(T0(STATE_BYTE(10)))) |
+          (BYTE3WORD(T1(STATE_BYTE(15))))) ^
+         *roundkeyw++;
+    C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) |
+          (BYTE1WORD(T3(STATE_BYTE(9)))) |
+          (BYTE2WORD(T0(STATE_BYTE(14)))) |
+          (BYTE3WORD(T1(STATE_BYTE(3))))) ^
+         *roundkeyw++;
+    C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) |
+          (BYTE1WORD(T3(STATE_BYTE(13)))) |
+          (BYTE2WORD(T0(STATE_BYTE(2)))) |
+          (BYTE3WORD(T1(STATE_BYTE(7))))) ^
+         *roundkeyw++;
+    C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) |
+          (BYTE1WORD(T3(STATE_BYTE(1)))) |
+          (BYTE2WORD(T0(STATE_BYTE(6)))) |
+          (BYTE3WORD(T1(STATE_BYTE(11))))) ^
+         *roundkeyw++;
+    *((PRUint32 *)pOut) = C0;
+    *((PRUint32 *)(pOut + 4)) = C1;
+    *((PRUint32 *)(pOut + 8)) = C2;
+    *((PRUint32 *)(pOut + 12)) = C3;
+#if defined(NSS_X86_OR_X64)
+#undef pIn
+#undef pOut
+#else
+    if ((ptrdiff_t)output & 0x3) {
+        memcpy(output, outBuf, sizeof outBuf);
+    }
+#endif
+    return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rijndael_decryptBlock128(AESContext *cx,
+                         unsigned char *output,
+                         const unsigned char *input)
+{
+    int r;
+    PRUint32 *roundkeyw;
+    rijndael_state state;
+    PRUint32 C0, C1, C2, C3;
+#if defined(NSS_X86_OR_X64)
+#define pIn input
+#define pOut output
+#else
+    unsigned char *pIn, *pOut;
+    PRUint32 inBuf[4], outBuf[4];
+
+    if ((ptrdiff_t)input & 0x3) {
+        memcpy(inBuf, input, sizeof inBuf);
+        pIn = (unsigned char *)inBuf;
+    } else {
+        pIn = (unsigned char *)input;
+    }
+    if ((ptrdiff_t)output & 0x3) {
+        pOut = (unsigned char *)outBuf;
+    } else {
+        pOut = (unsigned char *)output;
+    }
+#endif
+    roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
+    /* reverse the final key addition */
+    COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;
+    COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--;
+    COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--;
+    COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--;
+    /* Loop over rounds in reverse [NR..1] */
+    for (r = cx->Nr; r > 1; --r) {
+        /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
+        C0 = TInv0(STATE_BYTE(0)) ^
+             TInv1(STATE_BYTE(13)) ^
+             TInv2(STATE_BYTE(10)) ^
+             TInv3(STATE_BYTE(7));
+        C1 = TInv0(STATE_BYTE(4)) ^
+             TInv1(STATE_BYTE(1)) ^
+             TInv2(STATE_BYTE(14)) ^
+             TInv3(STATE_BYTE(11));
+        C2 = TInv0(STATE_BYTE(8)) ^
+             TInv1(STATE_BYTE(5)) ^
+             TInv2(STATE_BYTE(2)) ^
+             TInv3(STATE_BYTE(15));
+        C3 = TInv0(STATE_BYTE(12)) ^
+             TInv1(STATE_BYTE(9)) ^
+             TInv2(STATE_BYTE(6)) ^
+             TInv3(STATE_BYTE(3));
+        /* Invert the key addition step */
+        COLUMN_3(state) = C3 ^ *roundkeyw--;
+        COLUMN_2(state) = C2 ^ *roundkeyw--;
+        COLUMN_1(state) = C1 ^ *roundkeyw--;
+        COLUMN_0(state) = C0 ^ *roundkeyw--;
+    }
+    /* inverse sub */
+    pOut[0] = SINV(STATE_BYTE(0));
+    pOut[1] = SINV(STATE_BYTE(13));
+    pOut[2] = SINV(STATE_BYTE(10));
+    pOut[3] = SINV(STATE_BYTE(7));
+    pOut[4] = SINV(STATE_BYTE(4));
+    pOut[5] = SINV(STATE_BYTE(1));
+    pOut[6] = SINV(STATE_BYTE(14));
+    pOut[7] = SINV(STATE_BYTE(11));
+    pOut[8] = SINV(STATE_BYTE(8));
+    pOut[9] = SINV(STATE_BYTE(5));
+    pOut[10] = SINV(STATE_BYTE(2));
+    pOut[11] = SINV(STATE_BYTE(15));
+    pOut[12] = SINV(STATE_BYTE(12));
+    pOut[13] = SINV(STATE_BYTE(9));
+    pOut[14] = SINV(STATE_BYTE(6));
+    pOut[15] = SINV(STATE_BYTE(3));
+    /* final key addition */
+    *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;
+    *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--;
+    *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--;
+    *((PRUint32 *)pOut) ^= *roundkeyw--;
+#if defined(NSS_X86_OR_X64)
+#undef pIn
+#undef pOut
+#else
+    if ((ptrdiff_t)output & 0x3) {
+        memcpy(output, outBuf, sizeof outBuf);
+    }
+#endif
+    return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * Stuff related to general Rijndael encryption/decryption, for blocksizes
+ * greater than 128 bits.
+ *
+ * XXX This code is currently untested!  So far, AES specs have only been
+ *     released for 128 bit blocksizes.  This will be tested, but for now
+ *     only the code above has been tested using known values.
+ *
+ *************************************************************************/
+
+#define COLUMN(array, j) *((PRUint32 *)(array + j))
+
+SECStatus
+rijndael_encryptBlock(AESContext *cx,
+                      unsigned char *output,
+                      const unsigned char *input)
+{
+    return SECFailure;
+#ifdef rijndael_large_blocks_fixed
+    unsigned int j, r, Nb;
+    unsigned int c2 = 0, c3 = 0;
+    PRUint32 *roundkeyw;
+    PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE];
+    Nb = cx->Nb;
+    roundkeyw = cx->expandedKey;
+    /* Step 1: Add Round Key 0 to initial state */
+    for (j = 0; j < 4 * Nb; j += 4) {
+        COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++;
+    }
+    /* Step 2: Loop over rounds [1..NR-1] */
+    for (r = 1; r < cx->Nr; ++r) {
+        for (j = 0; j < Nb; ++j) {
+            COLUMN(output, j) = T0(STATE_BYTE(4 * j)) ^
+                                T1(STATE_BYTE(4 * ((j + 1) % Nb) + 1)) ^
+                                T2(STATE_BYTE(4 * ((j + c2) % Nb) + 2)) ^
+                                T3(STATE_BYTE(4 * ((j + c3) % Nb) + 3));
+        }
+        for (j = 0; j < 4 * Nb; j += 4) {
+            COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++;
+        }
+    }
+    /* Step 3: Do the last round */
+    /* Final round does not employ MixColumn */
+    for (j = 0; j < Nb; ++j) {
+        COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4 * j)))) |
+                             (BYTE1WORD(T3(STATE_BYTE(4 * (j + 1) % Nb) + 1))) |
+                             (BYTE2WORD(T0(STATE_BYTE(4 * (j + c2) % Nb) + 2))) |
+                             (BYTE3WORD(T1(STATE_BYTE(4 * (j + c3) % Nb) + 3)))) ^
+                            *roundkeyw++;
+    }
+    return SECSuccess;
+#endif
+}
+
+SECStatus
+rijndael_decryptBlock(AESContext *cx,
+                      unsigned char *output,
+                      const unsigned char *input)
+{
+    return SECFailure;
+#ifdef rijndael_large_blocks_fixed
+    int j, r, Nb;
+    int c2 = 0, c3 = 0;
+    PRUint32 *roundkeyw;
+    PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE];
+    Nb = cx->Nb;
+    roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
+    /* reverse key addition */
+    for (j = 4 * Nb; j >= 0; j -= 4) {
+        COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--;
+    }
+    /* Loop over rounds in reverse [NR..1] */
+    for (r = cx->Nr; r > 1; --r) {
+        /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
+        for (j = 0; j < Nb; ++j) {
+            COLUMN(output, 4 * j) = TInv0(STATE_BYTE(4 * j)) ^
+                                    TInv1(STATE_BYTE(4 * (j + Nb - 1) % Nb) + 1) ^
+                                    TInv2(STATE_BYTE(4 * (j + Nb - c2) % Nb) + 2) ^
+                                    TInv3(STATE_BYTE(4 * (j + Nb - c3) % Nb) + 3);
+        }
+        /* Invert the key addition step */
+        for (j = 4 * Nb; j >= 0; j -= 4) {
+            COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--;
+        }
+    }
+    /* inverse sub */
+    for (j = 0; j < 4 * Nb; ++j) {
+        output[j] = SINV(clone[j]);
+    }
+    /* final key addition */
+    for (j = 4 * Nb; j >= 0; j -= 4) {
+        COLUMN(output, j) ^= *roundkeyw--;
+    }
+    return SECSuccess;
+#endif
+}
+
+/**************************************************************************
+ *
+ *  Rijndael modes of operation (ECB and CBC)
+ *
+ *************************************************************************/
+
+static SECStatus
+rijndael_encryptECB(AESContext *cx, unsigned char *output,
+                    unsigned int *outputLen, unsigned int maxOutputLen,
+                    const unsigned char *input, unsigned int inputLen,
+                    unsigned int blocksize)
+{
+    SECStatus rv;
+    AESBlockFunc *encryptor;
+
+    encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
+                    ? &rijndael_encryptBlock128
+                    : &rijndael_encryptBlock;
+    while (inputLen > 0) {
+        rv = (*encryptor)(cx, output, input);
+        if (rv != SECSuccess)
+            return rv;
+        output += blocksize;
+        input += blocksize;
+        inputLen -= blocksize;
+    }
+    return SECSuccess;
+}
+
+static SECStatus
+rijndael_encryptCBC(AESContext *cx, unsigned char *output,
+                    unsigned int *outputLen, unsigned int maxOutputLen,
+                    const unsigned char *input, unsigned int inputLen,
+                    unsigned int blocksize)
+{
+    unsigned int j;
+    SECStatus rv;
+    AESBlockFunc *encryptor;
+    unsigned char *lastblock;
+    unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8];
+
+    if (!inputLen)
+        return SECSuccess;
+    lastblock = cx->iv;
+    encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
+                    ? &rijndael_encryptBlock128
+                    : &rijndael_encryptBlock;
+    while (inputLen > 0) {
+        /* XOR with the last block (IV if first block) */
+        for (j = 0; j < blocksize; ++j)
+            inblock[j] = input[j] ^ lastblock[j];
+        /* encrypt */
+        rv = (*encryptor)(cx, output, inblock);
+        if (rv != SECSuccess)
+            return rv;
+        /* move to the next block */
+        lastblock = output;
+        output += blocksize;
+        input += blocksize;
+        inputLen -= blocksize;
+    }
+    memcpy(cx->iv, lastblock, blocksize);
+    return SECSuccess;
+}
+
+static SECStatus
+rijndael_decryptECB(AESContext *cx, unsigned char *output,
+                    unsigned int *outputLen, unsigned int maxOutputLen,
+                    const unsigned char *input, unsigned int inputLen,
+                    unsigned int blocksize)
+{
+    SECStatus rv;
+    AESBlockFunc *decryptor;
+
+    decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
+                    ? &rijndael_decryptBlock128
+                    : &rijndael_decryptBlock;
+    while (inputLen > 0) {
+        rv = (*decryptor)(cx, output, input);
+        if (rv != SECSuccess)
+            return rv;
+        output += blocksize;
+        input += blocksize;
+        inputLen -= blocksize;
+    }
+    return SECSuccess;
+}
+
+static SECStatus
+rijndael_decryptCBC(AESContext *cx, unsigned char *output,
+                    unsigned int *outputLen, unsigned int maxOutputLen,
+                    const unsigned char *input, unsigned int inputLen,
+                    unsigned int blocksize)
+{
+    SECStatus rv;
+    AESBlockFunc *decryptor;
+    const unsigned char *in;
+    unsigned char *out;
+    unsigned int j;
+    unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE];
+
+    if (!inputLen)
+        return SECSuccess;
+    PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
+    decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
+                    ? &rijndael_decryptBlock128
+                    : &rijndael_decryptBlock;
+    in = input + (inputLen - blocksize);
+    memcpy(newIV, in, blocksize);
+    out = output + (inputLen - blocksize);
+    while (inputLen > blocksize) {
+        rv = (*decryptor)(cx, out, in);
+        if (rv != SECSuccess)
+            return rv;
+        for (j = 0; j < blocksize; ++j)
+            out[j] ^= in[(int)(j - blocksize)];
+        out -= blocksize;
+        in -= blocksize;
+        inputLen -= blocksize;
+    }
+    if (in == input) {
+        rv = (*decryptor)(cx, out, in);
+        if (rv != SECSuccess)
+            return rv;
+        for (j = 0; j < blocksize; ++j)
+            out[j] ^= cx->iv[j];
+    }
+    memcpy(cx->iv, newIV, blocksize);
+    return SECSuccess;
+}
+
+/************************************************************************
+ *
+ * BLAPI Interface functions
+ *
+ * The following functions implement the encryption routines defined in
+ * BLAPI for the AES cipher, Rijndael.
+ *
+ ***********************************************************************/
+
+AESContext *
+AES_AllocateContext(void)
+{
+    return PORT_ZNew(AESContext);
+}
+
+#ifdef INTEL_GCM
+/*
+ * Adapted from the example code in "How to detect New Instruction support in
+ * the 4th generation Intel Core processor family" by Max Locktyukhin.
+ *
+ * XGETBV:
+ *   Reads an extended control register (XCR) specified by ECX into EDX:EAX.
+ */
+static PRBool
+check_xcr0_ymm()
+{
+    PRUint32 xcr0;
+#if defined(_MSC_VER)
+#if defined(_M_IX86)
+    __asm {
+        mov ecx, 0
+        xgetbv
+        mov xcr0, eax
+    }
+#else
+    xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */
+#endif
+#else
+    __asm__("xgetbv"
+            : "=a"(xcr0)
+            : "c"(0)
+            : "%edx");
+#endif
+    /* Check if xmm and ymm state are enabled in XCR0. */
+    return (xcr0 & 6) == 6;
+}
+#endif
+
+/*
+** Initialize a new AES context suitable for AES encryption/decryption in
+** the ECB or CBC mode.
+**  "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC
+*/
+static SECStatus
+aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
+                const unsigned char *iv, int mode, unsigned int encrypt,
+                unsigned int blocksize)
+{
+    unsigned int Nk;
+    /* According to Rijndael AES Proposal, section 12.1, block and key
+     * lengths between 128 and 256 bits are supported, as long as the
+     * length in bytes is divisible by 4.
+     */
+    if (key == NULL ||
+        keysize < RIJNDAEL_MIN_BLOCKSIZE ||
+        keysize > RIJNDAEL_MAX_BLOCKSIZE ||
+        keysize % 4 != 0 ||
+        blocksize < RIJNDAEL_MIN_BLOCKSIZE ||
+        blocksize > RIJNDAEL_MAX_BLOCKSIZE ||
+        blocksize % 4 != 0) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (mode != NSS_AES && mode != NSS_AES_CBC) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (mode == NSS_AES_CBC && iv == NULL) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+#ifdef USE_HW_AES
+    if (has_intel_aes == 0) {
+        unsigned long eax, ebx, ecx, edx;
+        char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
+
+        if (disable_hw_aes == NULL) {
+            freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
+            has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1;
+#ifdef INTEL_GCM
+            has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1;
+            if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 &&
+                check_xcr0_ymm()) {
+                has_intel_avx = 1;
+            } else {
+                has_intel_avx = -1;
+            }
+#endif
+        } else {
+            has_intel_aes = -1;
+#ifdef INTEL_GCM
+            has_intel_avx = -1;
+            has_intel_clmul = -1;
+#endif
+        }
+    }
+    use_hw_aes = (PRBool)(has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16);
+#ifdef INTEL_GCM
+    use_hw_gcm = (PRBool)(use_hw_aes && has_intel_avx > 0 && has_intel_clmul > 0);
+#endif
+#endif /* USE_HW_AES */
+    /* Nb = (block size in bits) / 32 */
+    cx->Nb = blocksize / 4;
+    /* Nk = (key size in bits) / 32 */
+    Nk = keysize / 4;
+    /* Obtain number of rounds from "table" */
+    cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
+    /* copy in the iv, if neccessary */
+    if (mode == NSS_AES_CBC) {
+        memcpy(cx->iv, iv, blocksize);
+#ifdef USE_HW_AES
+        if (use_hw_aes) {
+            cx->worker = (freeblCipherFunc)
+                intel_aes_cbc_worker(encrypt, keysize);
+        } else
+#endif
+        {
+            cx->worker = (freeblCipherFunc)(encrypt
+                                                ? &rijndael_encryptCBC
+                                                : &rijndael_decryptCBC);
+        }
+    } else {
+#ifdef USE_HW_AES
+        if (use_hw_aes) {
+            cx->worker = (freeblCipherFunc)
+                intel_aes_ecb_worker(encrypt, keysize);
+        } else
+#endif
+        {
+            cx->worker = (freeblCipherFunc)(encrypt
+                                                ? &rijndael_encryptECB
+                                                : &rijndael_decryptECB);
+        }
+    }
+    PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
+    if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        goto cleanup;
+    }
+#ifdef USE_HW_AES
+    if (use_hw_aes) {
+        intel_aes_init(encrypt, keysize);
+    } else
+#endif
+    {
+
+#if defined(RIJNDAEL_GENERATE_TABLES) || \
+    defined(RIJNDAEL_GENERATE_TABLES_MACRO)
+        if (rijndaelTables == NULL) {
+            if (PR_CallOnce(&coRTInit, init_rijndael_tables) != PR_SUCCESS) {
+                return SecFailure;
+            }
+        }
+#endif
+        /* Generate expanded key */
+        if (encrypt) {
+            if (rijndael_key_expansion(cx, key, Nk) != SECSuccess)
+                goto cleanup;
+        } else {
+            if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess)
+                goto cleanup;
+        }
+    }
+    cx->worker_cx = cx;
+    cx->destroy = NULL;
+    cx->isBlock = PR_TRUE;
+    return SECSuccess;
+cleanup:
+    return SECFailure;
+}
+
+SECStatus
+AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
+                const unsigned char *iv, int mode, unsigned int encrypt,
+                unsigned int blocksize)
+{
+    int basemode = mode;
+    PRBool baseencrypt = encrypt;
+    SECStatus rv;
+
+    switch (mode) {
+        case NSS_AES_CTS:
+            basemode = NSS_AES_CBC;
+            break;
+        case NSS_AES_GCM:
+        case NSS_AES_CTR:
+            basemode = NSS_AES;
+            baseencrypt = PR_TRUE;
+            break;
+    }
+    /* make sure enough is initializes so we can safely call Destroy */
+    cx->worker_cx = NULL;
+    cx->destroy = NULL;
+    rv = aes_InitContext(cx, key, keysize, iv, basemode,
+                         baseencrypt, blocksize);
+    if (rv != SECSuccess) {
+        AES_DestroyContext(cx, PR_FALSE);
+        return rv;
+    }
+    cx->mode = mode;
+
+    /* finally, set up any mode specific contexts */
+    switch (mode) {
+        case NSS_AES_CTS:
+            cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize);
+            cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate);
+            cx->destroy = (freeblDestroyFunc)CTS_DestroyContext;
+            cx->isBlock = PR_FALSE;
+            break;
+        case NSS_AES_GCM:
+#ifdef INTEL_GCM
+            if (use_hw_gcm) {
+                cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize);
+                cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate);
+                cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext;
+                cx->isBlock = PR_FALSE;
+            } else
+#endif
+            {
+                cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize);
+                cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate);
+                cx->destroy = (freeblDestroyFunc)GCM_DestroyContext;
+                cx->isBlock = PR_FALSE;
+            }
+            break;
+        case NSS_AES_CTR:
+            cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize);
+#if defined(USE_HW_AES) && defined(_MSC_VER)
+            if (use_hw_aes) {
+                cx->worker = (freeblCipherFunc)CTR_Update_HW_AES;
+            } else
+#endif
+            {
+                cx->worker = (freeblCipherFunc)CTR_Update;
+            }
+            cx->destroy = (freeblDestroyFunc)CTR_DestroyContext;
+            cx->isBlock = PR_FALSE;
+            break;
+        default:
+            /* everything has already been set up by aes_InitContext, just
+     * return */
+            return SECSuccess;
+    }
+    /* check to see if we succeeded in getting the worker context */
+    if (cx->worker_cx == NULL) {
+        /* no, just destroy the existing context */
+        cx->destroy = NULL; /* paranoia, though you can see a dozen lines */
+                            /* below that this isn't necessary */
+        AES_DestroyContext(cx, PR_FALSE);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+/* AES_CreateContext
+ *
+ * create a new context for Rijndael operations
+ */
+AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+                  int mode, int encrypt,
+                  unsigned int keysize, unsigned int blocksize)
+{
+    AESContext *cx = AES_AllocateContext();
+    if (cx) {
+        SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,
+                                       blocksize);
+        if (rv != SECSuccess) {
+            AES_DestroyContext(cx, PR_TRUE);
+            cx = NULL;
+        }
+    }
+    return cx;
+}
+
+/*
+ * AES_DestroyContext
+ *
+ * Zero an AES cipher context.  If freeit is true, also free the pointer
+ * to the context.
+ */
+void
+AES_DestroyContext(AESContext *cx, PRBool freeit)
+{
+    if (cx->worker_cx && cx->destroy) {
+        (*cx->destroy)(cx->worker_cx, PR_TRUE);
+        cx->worker_cx = NULL;
+        cx->destroy = NULL;
+    }
+    if (freeit)
+        PORT_Free(cx);
+}
+
+/*
+ * AES_Encrypt
+ *
+ * Encrypt an arbitrary-length buffer.  The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    int blocksize;
+    /* Check args */
+    if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    blocksize = 4 * cx->Nb;
+    if (cx->isBlock && (inputLen % blocksize != 0)) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    *outputLen = inputLen;
+#if UINT_MAX > MP_32BIT_MAX
+    /*
+     * we can guarentee that GSM won't overlfow if we limit the input to
+     * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.
+     *
+     * We do it here to cover both hardware and software GCM operations.
+     */
+    {
+        PR_STATIC_ASSERT(sizeof(unsigned int) > 4);
+    }
+    if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+#else
+    /* if we can't pass in a 32_bit number, then no such check needed */
+    {
+        PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);
+    }
+#endif
+
+    return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
+                         input, inputLen, blocksize);
+}
+
+/*
+ * AES_Decrypt
+ *
+ * Decrypt and arbitrary-length buffer.  The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+            unsigned int *outputLen, unsigned int maxOutputLen,
+            const unsigned char *input, unsigned int inputLen)
+{
+    int blocksize;
+    /* Check args */
+    if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    blocksize = 4 * cx->Nb;
+    if (cx->isBlock && (inputLen % blocksize != 0)) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+    if (maxOutputLen < inputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+    *outputLen = inputLen;
+    return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
+                         input, inputLen, blocksize);
+}
diff --git a/security/nss/lib/freebl/rijndael.h b/security/nss/lib/freebl/rijndael.h
new file mode 100644
index 000000000..0e14ec2fc
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael.h
@@ -0,0 +1,67 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _RIJNDAEL_H_
+#define _RIJNDAEL_H_ 1
+
+#include "blapii.h"
+
+#define RIJNDAEL_MIN_BLOCKSIZE 16 /* bytes */
+#define RIJNDAEL_MAX_BLOCKSIZE 32 /* bytes */
+
+typedef SECStatus AESBlockFunc(AESContext *cx,
+                               unsigned char *output,
+                               const unsigned char *input);
+
+/* RIJNDAEL_NUM_ROUNDS
+ *
+ * Number of rounds per execution
+ * Nk - number of key bytes
+ * Nb - blocksize (in bytes)
+ */
+#define RIJNDAEL_NUM_ROUNDS(Nk, Nb) \
+    (PR_MAX(Nk, Nb) + 6)
+
+/* RIJNDAEL_MAX_STATE_SIZE
+ *
+ * Maximum number of bytes in the state (spec includes up to 256-bit block
+ * size)
+ */
+#define RIJNDAEL_MAX_STATE_SIZE 32
+
+/*
+ * This magic number is (Nb_max * (Nr_max + 1))
+ * where Nb_max is the maximum block size in 32-bit words,
+ *       Nr_max is the maximum number of rounds, which is Nb_max + 6
+ */
+#define RIJNDAEL_MAX_EXP_KEY_SIZE (8 * 15)
+
+/* AESContextStr
+ *
+ * Values which maintain the state for Rijndael encryption/decryption.
+ *
+ * iv          - initialization vector for CBC mode
+ * Nb          - the number of bytes in a block, specified by user
+ * Nr          - the number of rounds, specified by a table
+ * expandedKey - the round keys in 4-byte words, the length is Nr * Nb
+ * worker      - the encryption/decryption function to use with worker_cx
+ * destroy     - if not NULL, the destroy function to use with worker_cx
+ * worker_cx   - the context for worker and destroy
+ * isBlock     - is the mode of operation a block cipher or a stream cipher?
+ */
+struct AESContextStr {
+    unsigned int Nb;
+    unsigned int Nr;
+    freeblCipherFunc worker;
+    /* NOTE: The offsets of iv and expandedKey are hardcoded in intel-aes.s.
+     * Don't add new members before them without updating intel-aes.s. */
+    unsigned char iv[RIJNDAEL_MAX_BLOCKSIZE];
+    PRUint32 expandedKey[RIJNDAEL_MAX_EXP_KEY_SIZE];
+    freeblDestroyFunc destroy;
+    void *worker_cx;
+    PRBool isBlock;
+    int mode;
+};
+
+#endif /* _RIJNDAEL_H_ */
diff --git a/security/nss/lib/freebl/rijndael32.tab b/security/nss/lib/freebl/rijndael32.tab
new file mode 100644
index 000000000..59be7c2c0
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael32.tab
@@ -0,0 +1,1219 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RIJNDAEL_INCLUDE_TABLES
+static const PRUint8 _S[256] = 
+{
+ 99, 124, 119, 123, 242, 107, 111, 197,  48,   1, 103,  43, 254, 215, 171, 118,
+202, 130, 201, 125, 250,  89,  71, 240, 173, 212, 162, 175, 156, 164, 114, 192,
+183, 253, 147,  38,  54,  63, 247, 204,  52, 165, 229, 241, 113, 216,  49,  21,
+  4, 199,  35, 195,  24, 150,   5, 154,   7,  18, 128, 226, 235,  39, 178, 117,
+  9, 131,  44,  26,  27, 110,  90, 160,  82,  59, 214, 179,  41, 227,  47, 132,
+ 83, 209,   0, 237,  32, 252, 177,  91, 106, 203, 190,  57,  74,  76,  88, 207,
+208, 239, 170, 251,  67,  77,  51, 133,  69, 249,   2, 127,  80,  60, 159, 168,
+ 81, 163,  64, 143, 146, 157,  56, 245, 188, 182, 218,  33,  16, 255, 243, 210,
+205,  12,  19, 236,  95, 151,  68,  23, 196, 167, 126,  61, 100,  93,  25, 115,
+ 96, 129,  79, 220,  34,  42, 144, 136,  70, 238, 184,  20, 222,  94,  11, 219,
+224,  50,  58,  10,  73,   6,  36,  92, 194, 211, 172,  98, 145, 149, 228, 121,
+231, 200,  55, 109, 141, 213,  78, 169, 108,  86, 244, 234, 101, 122, 174,   8,
+186, 120,  37,  46,  28, 166, 180, 198, 232, 221, 116,  31,  75, 189, 139, 138,
+112,  62, 181, 102,  72,   3, 246,  14,  97,  53,  87, 185, 134, 193,  29, 158,
+225, 248, 152,  17, 105, 217, 142, 148, 155,  30, 135, 233, 206,  85,  40, 223,
+140, 161, 137,  13, 191, 230,  66, 104,  65, 153,  45,  15, 176,  84, 187,  22 
+};
+#endif /* not RIJNDAEL_INCLUDE_TABLES */
+
+static const PRUint8 _SInv[256] = 
+{
+ 82,   9, 106, 213,  48,  54, 165,  56, 191,  64, 163, 158, 129, 243, 215, 251,
+124, 227,  57, 130, 155,  47, 255, 135,  52, 142,  67,  68, 196, 222, 233, 203,
+ 84, 123, 148,  50, 166, 194,  35,  61, 238,  76, 149,  11,  66, 250, 195,  78,
+  8,  46, 161, 102,  40, 217,  36, 178, 118,  91, 162,  73, 109, 139, 209,  37,
+114, 248, 246, 100, 134, 104, 152,  22, 212, 164,  92, 204,  93, 101, 182, 146,
+108, 112,  72,  80, 253, 237, 185, 218,  94,  21,  70,  87, 167, 141, 157, 132,
+144, 216, 171,   0, 140, 188, 211,  10, 247, 228,  88,   5, 184, 179,  69,   6,
+208,  44,  30, 143, 202,  63,  15,   2, 193, 175, 189,   3,   1,  19, 138, 107,
+ 58, 145,  17,  65,  79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115,
+150, 172, 116,  34, 231, 173,  53, 133, 226, 249,  55, 232,  28, 117, 223, 110,
+ 71, 241,  26, 113,  29,  41, 197, 137, 111, 183,  98,  14, 170,  24, 190,  27,
+252,  86,  62,  75, 198, 210, 121,  32, 154, 219, 192, 254, 120, 205,  90, 244,
+ 31, 221, 168,  51, 136,   7, 199,  49, 177,  18,  16,  89,  39, 128, 236,  95,
+ 96,  81, 127, 169,  25, 181,  74,  13,  45, 229, 122, 159, 147, 201, 156, 239,
+160, 224,  59,  77, 174,  42, 245, 176, 200, 235, 187,  60, 131,  83, 153,  97,
+ 23,  43,   4, 126, 186, 119, 214,  38, 225, 105,  20,  99,  85,  33,  12, 125 
+};
+
+#ifdef RIJNDAEL_INCLUDE_TABLES
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T0[256] = 
+{
+0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6,
+0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
+0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f,
+0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
+0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453,
+0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
+0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551,
+0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
+0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637,
+0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
+0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d,
+0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
+0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd,
+0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
+0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d,
+0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
+0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a,
+0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
+0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d,
+0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
+0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5,
+0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
+0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755,
+0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
+0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54,
+0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
+0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264,
+0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
+0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531,
+0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
+0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac,
+0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
+0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657,
+0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
+0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c,
+0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
+0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199,
+0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
+0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c,
+0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
+0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7,
+0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
+0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c  
+};
+#else
+static const PRUint32 _T0[256] = 
+{
+0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd,
+0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
+0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d,
+0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
+0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7,
+0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
+0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4,
+0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
+0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1,
+0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
+0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e,
+0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
+0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e,
+0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
+0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46,
+0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
+0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7,
+0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
+0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe,
+0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
+0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a,
+0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
+0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2,
+0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
+0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e,
+0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
+0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256,
+0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
+0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4,
+0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
+0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa,
+0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
+0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1,
+0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
+0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42,
+0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
+0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158,
+0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
+0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22,
+0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
+0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631,
+0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
+0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T1[256] = 
+{
+0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd,
+0x6f6fdeb1, 0xc5c59154, 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d,
+0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, 0xcaca8f45, 0x82821f9d,
+0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b,
+0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7,
+0x7272e496, 0xc0c09b5b, 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a,
+0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, 0x3434685c, 0xa5a551f4,
+0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f,
+0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1,
+0x05050a0f, 0x9a9a2fb5, 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d,
+0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, 0x0909121b, 0x83831d9e,
+0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb,
+0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e,
+0x2f2f5e71, 0x84841397, 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c,
+0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, 0x6a6ad4be, 0xcbcb8d46,
+0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a,
+0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7,
+0x33336655, 0x85851194, 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81,
+0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, 0x5151a2f3, 0xa3a35dfe,
+0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104,
+0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a,
+0xf3f3fd0e, 0xd2d2bf6d, 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f,
+0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, 0xc4c49357, 0xa7a755f2,
+0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695,
+0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e,
+0x90903bab, 0x88880b83, 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c,
+0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, 0xe0e0db3b, 0x32326456,
+0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4,
+0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4,
+0xe4e4d337, 0x7979f28b, 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7,
+0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, 0x6c6cd8b4, 0x5656acfa,
+0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018,
+0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1,
+0xb4b473c7, 0xc6c69751, 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21,
+0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, 0x7070e090, 0x3e3e7c42,
+0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12,
+0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958,
+0x1d1d3a27, 0x9e9e27b9, 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233,
+0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, 0x9b9b2db6, 0x1e1e3c22,
+0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a,
+0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731,
+0x424284c6, 0x6868d0b8, 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11,
+0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a  
+};
+#else
+static const PRUint32 _T1[256] = 
+{
+0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b,
+0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
+0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, 0x458fcaca, 0x9d1f8282,
+0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
+0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4,
+0x96e47272, 0x5b9bc0c0, 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
+0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, 0x5c683434, 0xf451a5a5,
+0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
+0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696,
+0x0f0a0505, 0xb52f9a9a, 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
+0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, 0x1b120909, 0x9e1d8383,
+0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
+0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3,
+0x715e2f2f, 0x97138484, 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
+0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, 0xbed46a6a, 0x468dcbcb,
+0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
+0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d,
+0x55663333, 0x94118585, 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
+0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, 0xf3a25151, 0xfe5da3a3,
+0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
+0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff,
+0x0efdf3f3, 0x6dbfd2d2, 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
+0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, 0x5793c4c4, 0xf255a7a7,
+0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
+0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a,
+0xab3b9090, 0x830b8888, 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
+0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, 0x3bdbe0e0, 0x56643232,
+0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
+0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595,
+0x37d3e4e4, 0x8bf27979, 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
+0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, 0xb4d86c6c, 0xfaac5656,
+0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
+0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6,
+0xc773b4b4, 0x5197c6c6, 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
+0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, 0x90e07070, 0x427c3e3e,
+0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
+0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1,
+0x273a1d1d, 0xb9279e9e, 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
+0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, 0xb62d9b9b, 0x223c1e1e,
+0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
+0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6,
+0xc6844242, 0xb8d06868, 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
+0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T2[256] = 
+{
+0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b,
+0x6fdeb16f, 0xc59154c5, 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b,
+0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, 0xca8f45ca, 0x821f9d82,
+0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
+0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4,
+0x72e49672, 0xc09b5bc0, 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26,
+0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, 0x34685c34, 0xa551f4a5,
+0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
+0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196,
+0x050a0f05, 0x9a2fb59a, 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2,
+0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, 0x09121b09, 0x831d9e83,
+0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
+0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3,
+0x2f5e712f, 0x84139784, 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced,
+0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, 0x6ad4be6a, 0xcb8d46cb,
+0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
+0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d,
+0x33665533, 0x85119485, 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f,
+0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, 0x51a2f351, 0xa35dfea3,
+0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
+0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff,
+0xf3fd0ef3, 0xd2bf6dd2, 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec,
+0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, 0xc49357c4, 0xa755f2a7,
+0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
+0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a,
+0x903bab90, 0x880b8388, 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14,
+0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, 0xe0db3be0, 0x32645632,
+0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
+0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495,
+0xe4d337e4, 0x79f28b79, 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d,
+0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, 0x6cd8b46c, 0x56acfa56,
+0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
+0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6,
+0xb473c7b4, 0xc69751c6, 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f,
+0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, 0x70e09070, 0x3e7c423e,
+0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
+0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1,
+0x1d3a271d, 0x9e27b99e, 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311,
+0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, 0x9b2db69b, 0x1e3c221e,
+0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
+0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6,
+0x4284c642, 0x68d0b868, 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f,
+0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16  
+};
+#else
+static const PRUint32 _T2[256] = 
+{
+0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, 0xf20dfff2, 0x6bbdd66b,
+0x6fb1de6f, 0xc55491c5, 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
+0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, 0xca458fca, 0x829d1f82,
+0xc94089c9, 0x7d87fa7d, 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
+0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, 0x9cbf239c, 0xa4f753a4,
+0x7296e472, 0xc05b9bc0, 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
+0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, 0x345c6834, 0xa5f451a5,
+0xe534d1e5, 0xf108f9f1, 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
+0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, 0x18283018, 0x96a13796,
+0x050f0a05, 0x9ab52f9a, 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
+0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, 0x091b1209, 0x839e1d83,
+0x2c74582c, 0x1a2e341a, 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
+0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, 0x297b5229, 0xe33edde3,
+0x2f715e2f, 0x84971384, 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
+0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, 0x6abed46a, 0xcb468dcb,
+0xbed967be, 0x394b7239, 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
+0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, 0x43c58643, 0x4dd79a4d,
+0x33556633, 0x85941185, 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
+0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, 0x51f3a251, 0xa3fe5da3,
+0x40c08040, 0x8f8a058f, 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
+0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, 0x10302010, 0xff1ae5ff,
+0xf30efdf3, 0xd26dbfd2, 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
+0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, 0xc45793c4, 0xa7f255a7,
+0x7e82fc7e, 0x3d477a3d, 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
+0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, 0x22664422, 0x2a7e542a,
+0x90ab3b90, 0x88830b88, 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
+0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, 0xe03bdbe0, 0x32566432,
+0x3a4e743a, 0x0a1e140a, 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
+0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, 0x91a83991, 0x95a43195,
+0xe437d3e4, 0x798bf279, 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
+0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, 0x6cb4d86c, 0x56faac56,
+0xf407f3f4, 0xea25cfea, 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
+0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, 0x1c24381c, 0xa6f157a6,
+0xb4c773b4, 0xc65197c6, 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
+0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, 0x7090e070, 0x3e427c3e,
+0xb5c471b5, 0x66aacc66, 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
+0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, 0x86911786, 0xc15899c1,
+0x1d273a1d, 0x9eb9279e, 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
+0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, 0x9bb62d9b, 0x1e223c1e,
+0x87921587, 0xe920c9e9, 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
+0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, 0xbfda65bf, 0xe631d7e6,
+0x42c68442, 0x68b8d068, 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
+0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T3[256] = 
+{
+0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b,
+0xdeb16f6f, 0x9154c5c5, 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b,
+0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, 0x8f45caca, 0x1f9d8282,
+0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0,
+0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4,
+0xe4967272, 0x9b5bc0c0, 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626,
+0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, 0x685c3434, 0x51f4a5a5,
+0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515,
+0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696,
+0x0a0f0505, 0x2fb59a9a, 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2,
+0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, 0x121b0909, 0x1d9e8383,
+0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0,
+0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3,
+0x5e712f2f, 0x13978484, 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded,
+0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, 0xd4be6a6a, 0x8d46cbcb,
+0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf,
+0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d,
+0x66553333, 0x11948585, 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f,
+0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, 0xa2f35151, 0x5dfea3a3,
+0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5,
+0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff,
+0xfd0ef3f3, 0xbf6dd2d2, 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec,
+0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, 0x9357c4c4, 0x55f2a7a7,
+0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373,
+0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a,
+0x3bab9090, 0x0b838888, 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414,
+0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, 0xdb3be0e0, 0x64563232,
+0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c,
+0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595,
+0xd337e4e4, 0xf28b7979, 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d,
+0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, 0xd8b46c6c, 0xacfa5656,
+0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808,
+0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6,
+0x73c7b4b4, 0x9751c6c6, 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f,
+0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, 0xe0907070, 0x7c423e3e,
+0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e,
+0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1,
+0x3a271d1d, 0x27b99e9e, 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111,
+0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, 0x2db69b9b, 0x3c221e1e,
+0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf,
+0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6,
+0x84c64242, 0xd0b86868, 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f,
+0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616  
+};
+#else
+static const PRUint32 _T3[256] = 
+{
+0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, 0xf2f20dff, 0x6b6bbdd6,
+0x6f6fb1de, 0xc5c55491, 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
+0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, 0xcaca458f, 0x82829d1f,
+0xc9c94089, 0x7d7d87fa, 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
+0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, 0x9c9cbf23, 0xa4a4f753,
+0x727296e4, 0xc0c05b9b, 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
+0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, 0x34345c68, 0xa5a5f451,
+0xe5e534d1, 0xf1f108f9, 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
+0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, 0x18182830, 0x9696a137,
+0x05050f0a, 0x9a9ab52f, 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
+0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, 0x09091b12, 0x83839e1d,
+0x2c2c7458, 0x1a1a2e34, 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
+0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, 0x29297b52, 0xe3e33edd,
+0x2f2f715e, 0x84849713, 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
+0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, 0x6a6abed4, 0xcbcb468d,
+0xbebed967, 0x39394b72, 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
+0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, 0x4343c586, 0x4d4dd79a,
+0x33335566, 0x85859411, 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
+0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, 0x5151f3a2, 0xa3a3fe5d,
+0x4040c080, 0x8f8f8a05, 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
+0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, 0x10103020, 0xffff1ae5,
+0xf3f30efd, 0xd2d26dbf, 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
+0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, 0xc4c45793, 0xa7a7f255,
+0x7e7e82fc, 0x3d3d477a, 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
+0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, 0x22226644, 0x2a2a7e54,
+0x9090ab3b, 0x8888830b, 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
+0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, 0xe0e03bdb, 0x32325664,
+0x3a3a4e74, 0x0a0a1e14, 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
+0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, 0x9191a839, 0x9595a431,
+0xe4e437d3, 0x79798bf2, 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
+0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, 0x6c6cb4d8, 0x5656faac,
+0xf4f407f3, 0xeaea25cf, 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
+0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, 0x1c1c2438, 0xa6a6f157,
+0xb4b4c773, 0xc6c65197, 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
+0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, 0x707090e0, 0x3e3e427c,
+0xb5b5c471, 0x6666aacc, 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
+0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, 0x86869117, 0xc1c15899,
+0x1d1d273a, 0x9e9eb927, 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
+0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, 0x9b9bb62d, 0x1e1e223c,
+0x87879215, 0xe9e920c9, 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
+0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, 0xbfbfda65, 0xe6e631d7,
+0x4242c684, 0x6868b8d0, 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
+0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv0[256] = 
+{
+0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f,
+0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
+0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25,
+0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
+0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458,
+0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
+0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5,
+0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
+0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72,
+0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
+0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7,
+0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
+0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040,
+0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
+0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6,
+0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
+0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32,
+0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
+0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793,
+0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
+0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2,
+0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
+0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb,
+0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
+0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc,
+0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
+0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9,
+0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
+0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890,
+0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
+0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e,
+0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
+0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a,
+0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
+0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43,
+0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
+0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92,
+0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
+0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255,
+0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
+0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc,
+0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
+0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0  
+};
+#else
+static const PRUint32 _TInv0[256] = 
+{
+0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1,
+0xacfa58ab, 0x4be30393, 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
+0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 0xdeb15a49, 0x25ba1b67,
+0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
+0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3,
+0x49e06929, 0x8ec9c844, 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
+0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 0x63df4a18, 0xe51a3182,
+0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
+0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2,
+0xe31f8f57, 0x6655ab2a, 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
+0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 0x8acf1c2b, 0xa779b492,
+0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
+0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa,
+0x5e719f06, 0xbd6e1051, 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
+0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 0x1998fb24, 0xd6bde997,
+0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
+0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48,
+0x1e1170ac, 0x6c5a724e, 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
+0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 0x0c0a67b1, 0x9357e70f,
+0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
+0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad,
+0x2db6a8b9, 0x141ea9c8, 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
+0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 0x8b432976, 0xcb23c6dc,
+0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
+0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3,
+0x0d8652ec, 0x77c1e3d0, 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
+0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 0x87494ec7, 0xd938d1c1,
+0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
+0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8,
+0x2e39f75e, 0x82c3aff5, 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
+0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 0xcd267809, 0x6e5918f4,
+0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
+0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331,
+0xc6a59430, 0x35a266c0, 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
+0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 0x764dd68d, 0x43efb04d,
+0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
+0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252,
+0xe9105633, 0x6dd64713, 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
+0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 0x9cd2df59, 0x55f2733f,
+0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
+0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c,
+0x283c498b, 0xff0d9541, 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
+0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv1[256] = 
+{
+0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1,
+0x58faacab, 0x03e34b93, 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525,
+0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, 0x5ab1de49, 0x1bba2567,
+0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6,
+0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3,
+0x69e04929, 0xc8c98e44, 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd,
+0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, 0x4adf6318, 0x311ae582,
+0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994,
+0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2,
+0x8f1fe357, 0xab55662a, 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5,
+0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, 0x1ccf8a2b, 0xb479a792,
+0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a,
+0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa,
+0x9f715e06, 0x106ebd51, 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46,
+0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, 0xfb981924, 0xe9bdd697,
+0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db,
+0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248,
+0x70111eac, 0x725a6c4e, 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627,
+0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, 0x670a0cb1, 0xe757930f,
+0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16,
+0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad,
+0xa8b62db9, 0xa91e14c8, 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd,
+0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, 0x29438b76, 0xc623cbdc,
+0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420,
+0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3,
+0x52860dec, 0xe3c177d0, 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722,
+0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, 0x4e4987c7, 0xd138d9c1,
+0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4,
+0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8,
+0xf7392e5e, 0xafc382f5, 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3,
+0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, 0x7826cd09, 0x18596ef4,
+0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6,
+0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31,
+0x94a5c630, 0x66a235c0, 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315,
+0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, 0xd64d768d, 0xb0ef434d,
+0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f,
+0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252,
+0x5610e933, 0x47d66d13, 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89,
+0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, 0xdfd29c59, 0x73f2553f,
+0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886,
+0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c,
+0x493c288b, 0x950dff41, 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490,
+0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042  
+};
+#else
+static const PRUint32 _TInv1[256] = 
+{
+0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45,
+0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
+0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, 0x49deb15a, 0x6725ba1b,
+0x9845ea0e, 0xe15dfec0, 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
+0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, 0x2dd4be83, 0xd3587421,
+0x2949e069, 0x448ec9c8, 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
+0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, 0x1863df4a, 0x82e51a31,
+0x60975133, 0x4562537f, 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
+0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, 0x23ab73d3, 0xe2724b02,
+0x57e31f8f, 0x2a6655ab, 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
+0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, 0x2b8acf1c, 0x92a779b4,
+0xf0f307f2, 0xa14e69e2, 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
+0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, 0x390b83ec, 0xaa4060ef,
+0x065e719f, 0x51bd6e10, 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
+0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, 0x241998fb, 0x97d6bde9,
+0xcc894043, 0x7767d99e, 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
+0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, 0x83098086, 0x48322bed,
+0xac1e1170, 0x4e6c5a72, 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
+0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, 0xb10c0a67, 0x0f9357e7,
+0xd2b4ee96, 0x9e1b9b91, 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
+0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, 0x0b0e090d, 0xadf28bc7,
+0xb92db6a8, 0xc8141ea9, 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
+0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, 0x768b4329, 0xdccb23c6,
+0x68b6edfc, 0x63b8e4f1, 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
+0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, 0x4b1d9e2f, 0xf3dcb230,
+0xec0d8652, 0xd077c1e3, 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
+0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, 0xc787494e, 0xc1d938d1,
+0xfe8ccaa2, 0x3698d40b, 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
+0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, 0xc2f68d13, 0xe890d8b8,
+0x5e2e39f7, 0xf582c3af, 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
+0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, 0x09cd2678, 0xf46e5918,
+0x01ec9ab7, 0xa8834f9a, 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
+0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, 0xaf31a4b2, 0x312a3f23,
+0x30c6a594, 0xc035a266, 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
+0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, 0x8d764dd6, 0x4d43efb0,
+0x54ccaa4d, 0xdfe49604, 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
+0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, 0x5ab3671d, 0x5292dbd2,
+0x33e91056, 0x136dd647, 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
+0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, 0x599cd2df, 0x3f55f273,
+0x791814ce, 0xbf73c737, 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
+0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, 0x72161dc3, 0x0cbce225,
+0x8b283c49, 0x41ff0d95, 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
+0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv2[256] = 
+{
+0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145,
+0xfaacab58, 0xe34b9303, 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c,
+0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, 0xb1de495a, 0xba25671b,
+0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9,
+0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321,
+0xe0492969, 0xc98e44c8, 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71,
+0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, 0xdf63184a, 0x1ae58231,
+0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b,
+0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202,
+0x1fe3578f, 0x55662aab, 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508,
+0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, 0xcf8a2b1c, 0x79a792b4,
+0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe,
+0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef,
+0x715e069f, 0x6ebd5110, 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd,
+0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, 0x981924fb, 0xbdd697e9,
+0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee,
+0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed,
+0x111eac70, 0x5a6c4e72, 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739,
+0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, 0x0a0cb167, 0x57930fe7,
+0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a,
+0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7,
+0xb62db9a8, 0x1e14c8a9, 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60,
+0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, 0x438b7629, 0x23cbdcc6,
+0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011,
+0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330,
+0x860dec52, 0xc177d0e3, 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264,
+0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, 0x4987c74e, 0x38d9c1d1,
+0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf,
+0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8,
+0x392e5ef7, 0xc382f5af, 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312,
+0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, 0x26cd0978, 0x596ef418,
+0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8,
+0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123,
+0xa5c63094, 0xa235c066, 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8,
+0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, 0x4d768dd6, 0xef434db0,
+0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51,
+0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2,
+0x10e93356, 0xd66d1347, 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c,
+0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, 0xd29c59df, 0xf2553f73,
+0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db,
+0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25,
+0x3c288b49, 0x0dff4195, 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1,
+0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257  
+};
+#else
+static const PRUint32 _TInv2[256] = 
+{
+0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, 0x6bcb3bab, 0x45f11f9d,
+0x58abacfa, 0x03934be3, 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
+0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, 0x5a49deb1, 0x1b6725ba,
+0x0e9845ea, 0xc0e15dfe, 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
+0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, 0x832dd4be, 0x21d35874,
+0x692949e0, 0xc8448ec9, 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
+0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, 0x4a1863df, 0x3182e51a,
+0x33609751, 0x7f456253, 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
+0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, 0xd323ab73, 0x02e2724b,
+0x8f57e31f, 0xab2a6655, 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
+0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, 0x1c2b8acf, 0xb492a779,
+0xf2f0f307, 0xe2a14e69, 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
+0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, 0xec390b83, 0xefaa4060,
+0x9f065e71, 0x1051bd6e, 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
+0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, 0xfb241998, 0xe997d6bd,
+0x43cc8940, 0x9e7767d9, 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
+0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, 0x86830980, 0xed48322b,
+0x70ac1e11, 0x724e6c5a, 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
+0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, 0x67b10c0a, 0xe70f9357,
+0x96d2b4ee, 0x919e1b9b, 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
+0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, 0x0d0b0e09, 0xc7adf28b,
+0xa8b92db6, 0xa9c8141e, 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
+0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, 0x29768b43, 0xc6dccb23,
+0xfc68b6ed, 0xf163b8e4, 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
+0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, 0x2f4b1d9e, 0x30f3dcb2,
+0x52ec0d86, 0xe3d077c1, 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
+0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, 0x4ec78749, 0xd1c1d938,
+0xa2fe8cca, 0x0b3698d4, 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
+0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, 0x13c2f68d, 0xb8e890d8,
+0xf75e2e39, 0xaff582c3, 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
+0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, 0x7809cd26, 0x18f46e59,
+0xb701ec9a, 0x9aa8834f, 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
+0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, 0xb2af31a4, 0x23312a3f,
+0x9430c6a5, 0x66c035a2, 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
+0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, 0xd68d764d, 0xb04d43ef,
+0x4d54ccaa, 0x04dfe496, 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
+0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, 0x1d5ab367, 0xd25292db,
+0x5633e910, 0x47136dd6, 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
+0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, 0xdf599cd2, 0x733f55f2,
+0xce791814, 0x37bf73c7, 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
+0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, 0xc372161d, 0x250cbce2,
+0x498b283c, 0x9541ff0d, 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
+0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv3[256] = 
+{
+0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d,
+0xacab58fa, 0x4b9303e3, 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02,
+0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, 0xde495ab1, 0x25671bba,
+0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3,
+0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174,
+0x492969e0, 0x8e44c8c9, 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9,
+0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, 0x63184adf, 0xe582311a,
+0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08,
+0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b,
+0xe3578f1f, 0x662aab55, 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837,
+0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, 0x8a2b1ccf, 0xa792b479,
+0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6,
+0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60,
+0x5e069f71, 0xbd51106e, 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6,
+0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, 0x1924fb98, 0xd697e9bd,
+0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8,
+0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b,
+0x1eac7011, 0x6c4e725a, 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d,
+0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, 0x0cb1670a, 0x930fe757,
+0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12,
+0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b,
+0x2db9a8b6, 0x14c8a91e, 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f,
+0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, 0x8b762943, 0xcbdcc623,
+0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6,
+0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2,
+0x0dec5286, 0x77d0e3c1, 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9,
+0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, 0x87c74e49, 0xd9c1d138,
+0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad,
+0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8,
+0x2e5ef739, 0x82f5afc3, 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225,
+0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, 0xcd097826, 0x6ef41859,
+0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815,
+0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f,
+0xc63094a5, 0x35c066a2, 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7,
+0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, 0x768dd64d, 0x434db0ef,
+0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165,
+0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db,
+0xe9335610, 0x6d1347d6, 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13,
+0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, 0x9c59dfd2, 0x553f73f2,
+0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44,
+0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2,
+0x288b493c, 0xff41950d, 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156,
+0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8  
+};
+#else
+static const PRUint32 _TInv3[256] = 
+{
+0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, 0xab6bcb3b, 0x9d45f11f,
+0xfa58abac, 0xe303934b, 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
+0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, 0xb15a49de, 0xba1b6725,
+0xea0e9845, 0xfec0e15d, 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
+0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, 0xbe832dd4, 0x7421d358,
+0xe0692949, 0xc9c8448e, 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
+0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, 0xdf4a1863, 0x1a3182e5,
+0x51336097, 0x537f4562, 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
+0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, 0x73d323ab, 0x4b02e272,
+0x1f8f57e3, 0x55ab2a66, 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
+0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, 0xcf1c2b8a, 0x79b492a7,
+0x07f2f0f3, 0x69e2a14e, 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
+0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, 0x83ec390b, 0x60efaa40,
+0x719f065e, 0x6e1051bd, 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
+0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, 0x98fb2419, 0xbde997d6,
+0x4043cc89, 0xd99e7767, 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
+0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, 0x80868309, 0x2bed4832,
+0x1170ac1e, 0x5a724e6c, 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
+0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, 0x0a67b10c, 0x57e70f93,
+0xee96d2b4, 0x9b919e1b, 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
+0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, 0x090d0b0e, 0x8bc7adf2,
+0xb6a8b92d, 0x1ea9c814, 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
+0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, 0x4329768b, 0x23c6dccb,
+0xedfc68b6, 0xe4f163b8, 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
+0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, 0x9e2f4b1d, 0xb230f3dc,
+0x8652ec0d, 0xc1e3d077, 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
+0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, 0x494ec787, 0x38d1c1d9,
+0xcaa2fe8c, 0xd40b3698, 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
+0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, 0x8d13c2f6, 0xd8b8e890,
+0x39f75e2e, 0xc3aff582, 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
+0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, 0x267809cd, 0x5918f46e,
+0x9ab701ec, 0x4f9aa883, 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
+0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, 0xa4b2af31, 0x3f23312a,
+0xa59430c6, 0xa266c035, 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
+0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, 0x4dd68d76, 0xefb04d43,
+0xaa4d54cc, 0x9604dfe4, 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
+0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, 0x671d5ab3, 0xdbd25292,
+0x105633e9, 0xd647136d, 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
+0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, 0xd2df599c, 0xf2733f55,
+0x14ce7918, 0xc737bf73, 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
+0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, 0x1dc37216, 0xe2250cbc,
+0x3c498b28, 0x0d9541ff, 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
+0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC0[256] = 
+{
+0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 0x2c342438, 0x27392d36,
+0x3a2e3624, 0x31233f2a, 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
+0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, 0xb0d090e0, 0xbbdd99ee,
+0xa6ca82fc, 0xadc78bf2, 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
+0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 0xc48cfca8, 0xcf81f5a6,
+0xd296eeb4, 0xd99be7ba, 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
+0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 0x23d373ab, 0x28de7aa5,
+0x35c961b7, 0x3ec468b9, 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
+0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 0xe75f8f03, 0xec52860d,
+0xf1459d1f, 0xfa489411, 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
+0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, 0xf66d76ad, 0xfd607fa3,
+0xe07764b1, 0xeb7a6dbf, 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
+0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 0x82311ae5, 0x893c13eb,
+0x942b08f9, 0x9f2601f7, 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
+0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 0x1ed5ae3d, 0x15d8a733,
+0x08cfbc21, 0x03c2b52f, 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
+0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 0xa1e2694e, 0xaaef6040,
+0xb7f87b52, 0xbcf5725c, 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
+0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, 0x3d06dd96, 0x360bd498,
+0x2b1ccf8a, 0x2011c684, 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
+0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 0x495ab1de, 0x4257b8d0,
+0x5f40a3c2, 0x544daacc, 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
+0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 0xafb2a431, 0xa4bfad3f,
+0xb9a8b62d, 0xb2a5bf23, 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
+0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 0x6b3e5899, 0x60335197,
+0x7d244a85, 0x7629438b, 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
+0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, 0x8c61d79a, 0x876cde94,
+0x9a7bc586, 0x9176cc88, 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
+0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 0xf83dbbd2, 0xf330b2dc,
+0xee27a9ce, 0xe52aa0c0, 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
+0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 0x64d90f0a, 0x6fd40604,
+0x72c31d16, 0x79ce1418, 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
+0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 0x2d83bed4, 0x268eb7da,
+0x3b99acc8, 0x3094a5c6, 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
+0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, 0xb1670a0c, 0xba6a0302,
+0xa77d1810, 0xac70111e, 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
+0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 0xc53b6644, 0xce366f4a,
+0xd3217458, 0xd82c7d56, 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
+0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 0x2264e947, 0x2969e049,
+0x347efb5b, 0x3f73f255, 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
+0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 0xe6e815ef, 0xede51ce1,
+0xf0f207f3, 0xfbff0efd, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
+0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d  
+};
+#else
+static const PRUint32 _IMXC0[256] = 
+{
+0x00000000, 0x0e090d0b, 0x1c121a16, 0x121b171d, 0x3824342c, 0x362d3927,
+0x24362e3a, 0x2a3f2331, 0x70486858, 0x7e416553, 0x6c5a724e, 0x62537f45,
+0x486c5c74, 0x4665517f, 0x547e4662, 0x5a774b69, 0xe090d0b0, 0xee99ddbb,
+0xfc82caa6, 0xf28bc7ad, 0xd8b4e49c, 0xd6bde997, 0xc4a6fe8a, 0xcaaff381,
+0x90d8b8e8, 0x9ed1b5e3, 0x8ccaa2fe, 0x82c3aff5, 0xa8fc8cc4, 0xa6f581cf,
+0xb4ee96d2, 0xbae79bd9, 0xdb3bbb7b, 0xd532b670, 0xc729a16d, 0xc920ac66,
+0xe31f8f57, 0xed16825c, 0xff0d9541, 0xf104984a, 0xab73d323, 0xa57ade28,
+0xb761c935, 0xb968c43e, 0x9357e70f, 0x9d5eea04, 0x8f45fd19, 0x814cf012,
+0x3bab6bcb, 0x35a266c0, 0x27b971dd, 0x29b07cd6, 0x038f5fe7, 0x0d8652ec,
+0x1f9d45f1, 0x119448fa, 0x4be30393, 0x45ea0e98, 0x57f11985, 0x59f8148e,
+0x73c737bf, 0x7dce3ab4, 0x6fd52da9, 0x61dc20a2, 0xad766df6, 0xa37f60fd,
+0xb16477e0, 0xbf6d7aeb, 0x955259da, 0x9b5b54d1, 0x894043cc, 0x87494ec7,
+0xdd3e05ae, 0xd33708a5, 0xc12c1fb8, 0xcf2512b3, 0xe51a3182, 0xeb133c89,
+0xf9082b94, 0xf701269f, 0x4de6bd46, 0x43efb04d, 0x51f4a750, 0x5ffdaa5b,
+0x75c2896a, 0x7bcb8461, 0x69d0937c, 0x67d99e77, 0x3daed51e, 0x33a7d815,
+0x21bccf08, 0x2fb5c203, 0x058ae132, 0x0b83ec39, 0x1998fb24, 0x1791f62f,
+0x764dd68d, 0x7844db86, 0x6a5fcc9b, 0x6456c190, 0x4e69e2a1, 0x4060efaa,
+0x527bf8b7, 0x5c72f5bc, 0x0605bed5, 0x080cb3de, 0x1a17a4c3, 0x141ea9c8,
+0x3e218af9, 0x302887f2, 0x223390ef, 0x2c3a9de4, 0x96dd063d, 0x98d40b36,
+0x8acf1c2b, 0x84c61120, 0xaef93211, 0xa0f03f1a, 0xb2eb2807, 0xbce2250c,
+0xe6956e65, 0xe89c636e, 0xfa877473, 0xf48e7978, 0xdeb15a49, 0xd0b85742,
+0xc2a3405f, 0xccaa4d54, 0x41ecdaf7, 0x4fe5d7fc, 0x5dfec0e1, 0x53f7cdea,
+0x79c8eedb, 0x77c1e3d0, 0x65daf4cd, 0x6bd3f9c6, 0x31a4b2af, 0x3fadbfa4,
+0x2db6a8b9, 0x23bfa5b2, 0x09808683, 0x07898b88, 0x15929c95, 0x1b9b919e,
+0xa17c0a47, 0xaf75074c, 0xbd6e1051, 0xb3671d5a, 0x99583e6b, 0x97513360,
+0x854a247d, 0x8b432976, 0xd134621f, 0xdf3d6f14, 0xcd267809, 0xc32f7502,
+0xe9105633, 0xe7195b38, 0xf5024c25, 0xfb0b412e, 0x9ad7618c, 0x94de6c87,
+0x86c57b9a, 0x88cc7691, 0xa2f355a0, 0xacfa58ab, 0xbee14fb6, 0xb0e842bd,
+0xea9f09d4, 0xe49604df, 0xf68d13c2, 0xf8841ec9, 0xd2bb3df8, 0xdcb230f3,
+0xcea927ee, 0xc0a02ae5, 0x7a47b13c, 0x744ebc37, 0x6655ab2a, 0x685ca621,
+0x42638510, 0x4c6a881b, 0x5e719f06, 0x5078920d, 0x0a0fd964, 0x0406d46f,
+0x161dc372, 0x1814ce79, 0x322bed48, 0x3c22e043, 0x2e39f75e, 0x2030fa55,
+0xec9ab701, 0xe293ba0a, 0xf088ad17, 0xfe81a01c, 0xd4be832d, 0xdab78e26,
+0xc8ac993b, 0xc6a59430, 0x9cd2df59, 0x92dbd252, 0x80c0c54f, 0x8ec9c844,
+0xa4f6eb75, 0xaaffe67e, 0xb8e4f163, 0xb6edfc68, 0x0c0a67b1, 0x02036aba,
+0x10187da7, 0x1e1170ac, 0x342e539d, 0x3a275e96, 0x283c498b, 0x26354480,
+0x7c420fe9, 0x724b02e2, 0x605015ff, 0x6e5918f4, 0x44663bc5, 0x4a6f36ce,
+0x587421d3, 0x567d2cd8, 0x37a10c7a, 0x39a80171, 0x2bb3166c, 0x25ba1b67,
+0x0f853856, 0x018c355d, 0x13972240, 0x1d9e2f4b, 0x47e96422, 0x49e06929,
+0x5bfb7e34, 0x55f2733f, 0x7fcd500e, 0x71c45d05, 0x63df4a18, 0x6dd64713,
+0xd731dcca, 0xd938d1c1, 0xcb23c6dc, 0xc52acbd7, 0xef15e8e6, 0xe11ce5ed,
+0xf307f2f0, 0xfd0efffb, 0xa779b492, 0xa970b999, 0xbb6bae84, 0xb562a38f,
+0x9f5d80be, 0x91548db5, 0x834f9aa8, 0x8d4697a3  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC1[256] = 
+{
+0x00000000, 0x0d090e0b, 0x1a121c16, 0x171b121d, 0x3424382c, 0x392d3627,
+0x2e36243a, 0x233f2a31, 0x68487058, 0x65417e53, 0x725a6c4e, 0x7f536245,
+0x5c6c4874, 0x5165467f, 0x467e5462, 0x4b775a69, 0xd090e0b0, 0xdd99eebb,
+0xca82fca6, 0xc78bf2ad, 0xe4b4d89c, 0xe9bdd697, 0xfea6c48a, 0xf3afca81,
+0xb8d890e8, 0xb5d19ee3, 0xa2ca8cfe, 0xafc382f5, 0x8cfca8c4, 0x81f5a6cf,
+0x96eeb4d2, 0x9be7bad9, 0xbb3bdb7b, 0xb632d570, 0xa129c76d, 0xac20c966,
+0x8f1fe357, 0x8216ed5c, 0x950dff41, 0x9804f14a, 0xd373ab23, 0xde7aa528,
+0xc961b735, 0xc468b93e, 0xe757930f, 0xea5e9d04, 0xfd458f19, 0xf04c8112,
+0x6bab3bcb, 0x66a235c0, 0x71b927dd, 0x7cb029d6, 0x5f8f03e7, 0x52860dec,
+0x459d1ff1, 0x489411fa, 0x03e34b93, 0x0eea4598, 0x19f15785, 0x14f8598e,
+0x37c773bf, 0x3ace7db4, 0x2dd56fa9, 0x20dc61a2, 0x6d76adf6, 0x607fa3fd,
+0x7764b1e0, 0x7a6dbfeb, 0x595295da, 0x545b9bd1, 0x434089cc, 0x4e4987c7,
+0x053eddae, 0x0837d3a5, 0x1f2cc1b8, 0x1225cfb3, 0x311ae582, 0x3c13eb89,
+0x2b08f994, 0x2601f79f, 0xbde64d46, 0xb0ef434d, 0xa7f45150, 0xaafd5f5b,
+0x89c2756a, 0x84cb7b61, 0x93d0697c, 0x9ed96777, 0xd5ae3d1e, 0xd8a73315,
+0xcfbc2108, 0xc2b52f03, 0xe18a0532, 0xec830b39, 0xfb981924, 0xf691172f,
+0xd64d768d, 0xdb447886, 0xcc5f6a9b, 0xc1566490, 0xe2694ea1, 0xef6040aa,
+0xf87b52b7, 0xf5725cbc, 0xbe0506d5, 0xb30c08de, 0xa4171ac3, 0xa91e14c8,
+0x8a213ef9, 0x872830f2, 0x903322ef, 0x9d3a2ce4, 0x06dd963d, 0x0bd49836,
+0x1ccf8a2b, 0x11c68420, 0x32f9ae11, 0x3ff0a01a, 0x28ebb207, 0x25e2bc0c,
+0x6e95e665, 0x639ce86e, 0x7487fa73, 0x798ef478, 0x5ab1de49, 0x57b8d042,
+0x40a3c25f, 0x4daacc54, 0xdaec41f7, 0xd7e54ffc, 0xc0fe5de1, 0xcdf753ea,
+0xeec879db, 0xe3c177d0, 0xf4da65cd, 0xf9d36bc6, 0xb2a431af, 0xbfad3fa4,
+0xa8b62db9, 0xa5bf23b2, 0x86800983, 0x8b890788, 0x9c921595, 0x919b1b9e,
+0x0a7ca147, 0x0775af4c, 0x106ebd51, 0x1d67b35a, 0x3e58996b, 0x33519760,
+0x244a857d, 0x29438b76, 0x6234d11f, 0x6f3ddf14, 0x7826cd09, 0x752fc302,
+0x5610e933, 0x5b19e738, 0x4c02f525, 0x410bfb2e, 0x61d79a8c, 0x6cde9487,
+0x7bc5869a, 0x76cc8891, 0x55f3a2a0, 0x58faacab, 0x4fe1beb6, 0x42e8b0bd,
+0x099fead4, 0x0496e4df, 0x138df6c2, 0x1e84f8c9, 0x3dbbd2f8, 0x30b2dcf3,
+0x27a9ceee, 0x2aa0c0e5, 0xb1477a3c, 0xbc4e7437, 0xab55662a, 0xa65c6821,
+0x85634210, 0x886a4c1b, 0x9f715e06, 0x9278500d, 0xd90f0a64, 0xd406046f,
+0xc31d1672, 0xce141879, 0xed2b3248, 0xe0223c43, 0xf7392e5e, 0xfa302055,
+0xb79aec01, 0xba93e20a, 0xad88f017, 0xa081fe1c, 0x83bed42d, 0x8eb7da26,
+0x99acc83b, 0x94a5c630, 0xdfd29c59, 0xd2db9252, 0xc5c0804f, 0xc8c98e44,
+0xebf6a475, 0xe6ffaa7e, 0xf1e4b863, 0xfcedb668, 0x670a0cb1, 0x6a0302ba,
+0x7d1810a7, 0x70111eac, 0x532e349d, 0x5e273a96, 0x493c288b, 0x44352680,
+0x0f427ce9, 0x024b72e2, 0x155060ff, 0x18596ef4, 0x3b6644c5, 0x366f4ace,
+0x217458d3, 0x2c7d56d8, 0x0ca1377a, 0x01a83971, 0x16b32b6c, 0x1bba2567,
+0x38850f56, 0x358c015d, 0x22971340, 0x2f9e1d4b, 0x64e94722, 0x69e04929,
+0x7efb5b34, 0x73f2553f, 0x50cd7f0e, 0x5dc47105, 0x4adf6318, 0x47d66d13,
+0xdc31d7ca, 0xd138d9c1, 0xc623cbdc, 0xcb2ac5d7, 0xe815efe6, 0xe51ce1ed,
+0xf207f3f0, 0xff0efdfb, 0xb479a792, 0xb970a999, 0xae6bbb84, 0xa362b58f,
+0x805d9fbe, 0x8d5491b5, 0x9a4f83a8, 0x97468da3  
+};
+#else
+static const PRUint32 _IMXC1[256] = 
+{
+0x00000000, 0x0b0e090d, 0x161c121a, 0x1d121b17, 0x2c382434, 0x27362d39,
+0x3a24362e, 0x312a3f23, 0x58704868, 0x537e4165, 0x4e6c5a72, 0x4562537f,
+0x74486c5c, 0x7f466551, 0x62547e46, 0x695a774b, 0xb0e090d0, 0xbbee99dd,
+0xa6fc82ca, 0xadf28bc7, 0x9cd8b4e4, 0x97d6bde9, 0x8ac4a6fe, 0x81caaff3,
+0xe890d8b8, 0xe39ed1b5, 0xfe8ccaa2, 0xf582c3af, 0xc4a8fc8c, 0xcfa6f581,
+0xd2b4ee96, 0xd9bae79b, 0x7bdb3bbb, 0x70d532b6, 0x6dc729a1, 0x66c920ac,
+0x57e31f8f, 0x5ced1682, 0x41ff0d95, 0x4af10498, 0x23ab73d3, 0x28a57ade,
+0x35b761c9, 0x3eb968c4, 0x0f9357e7, 0x049d5eea, 0x198f45fd, 0x12814cf0,
+0xcb3bab6b, 0xc035a266, 0xdd27b971, 0xd629b07c, 0xe7038f5f, 0xec0d8652,
+0xf11f9d45, 0xfa119448, 0x934be303, 0x9845ea0e, 0x8557f119, 0x8e59f814,
+0xbf73c737, 0xb47dce3a, 0xa96fd52d, 0xa261dc20, 0xf6ad766d, 0xfda37f60,
+0xe0b16477, 0xebbf6d7a, 0xda955259, 0xd19b5b54, 0xcc894043, 0xc787494e,
+0xaedd3e05, 0xa5d33708, 0xb8c12c1f, 0xb3cf2512, 0x82e51a31, 0x89eb133c,
+0x94f9082b, 0x9ff70126, 0x464de6bd, 0x4d43efb0, 0x5051f4a7, 0x5b5ffdaa,
+0x6a75c289, 0x617bcb84, 0x7c69d093, 0x7767d99e, 0x1e3daed5, 0x1533a7d8,
+0x0821bccf, 0x032fb5c2, 0x32058ae1, 0x390b83ec, 0x241998fb, 0x2f1791f6,
+0x8d764dd6, 0x867844db, 0x9b6a5fcc, 0x906456c1, 0xa14e69e2, 0xaa4060ef,
+0xb7527bf8, 0xbc5c72f5, 0xd50605be, 0xde080cb3, 0xc31a17a4, 0xc8141ea9,
+0xf93e218a, 0xf2302887, 0xef223390, 0xe42c3a9d, 0x3d96dd06, 0x3698d40b,
+0x2b8acf1c, 0x2084c611, 0x11aef932, 0x1aa0f03f, 0x07b2eb28, 0x0cbce225,
+0x65e6956e, 0x6ee89c63, 0x73fa8774, 0x78f48e79, 0x49deb15a, 0x42d0b857,
+0x5fc2a340, 0x54ccaa4d, 0xf741ecda, 0xfc4fe5d7, 0xe15dfec0, 0xea53f7cd,
+0xdb79c8ee, 0xd077c1e3, 0xcd65daf4, 0xc66bd3f9, 0xaf31a4b2, 0xa43fadbf,
+0xb92db6a8, 0xb223bfa5, 0x83098086, 0x8807898b, 0x9515929c, 0x9e1b9b91,
+0x47a17c0a, 0x4caf7507, 0x51bd6e10, 0x5ab3671d, 0x6b99583e, 0x60975133,
+0x7d854a24, 0x768b4329, 0x1fd13462, 0x14df3d6f, 0x09cd2678, 0x02c32f75,
+0x33e91056, 0x38e7195b, 0x25f5024c, 0x2efb0b41, 0x8c9ad761, 0x8794de6c,
+0x9a86c57b, 0x9188cc76, 0xa0a2f355, 0xabacfa58, 0xb6bee14f, 0xbdb0e842,
+0xd4ea9f09, 0xdfe49604, 0xc2f68d13, 0xc9f8841e, 0xf8d2bb3d, 0xf3dcb230,
+0xeecea927, 0xe5c0a02a, 0x3c7a47b1, 0x37744ebc, 0x2a6655ab, 0x21685ca6,
+0x10426385, 0x1b4c6a88, 0x065e719f, 0x0d507892, 0x640a0fd9, 0x6f0406d4,
+0x72161dc3, 0x791814ce, 0x48322bed, 0x433c22e0, 0x5e2e39f7, 0x552030fa,
+0x01ec9ab7, 0x0ae293ba, 0x17f088ad, 0x1cfe81a0, 0x2dd4be83, 0x26dab78e,
+0x3bc8ac99, 0x30c6a594, 0x599cd2df, 0x5292dbd2, 0x4f80c0c5, 0x448ec9c8,
+0x75a4f6eb, 0x7eaaffe6, 0x63b8e4f1, 0x68b6edfc, 0xb10c0a67, 0xba02036a,
+0xa710187d, 0xac1e1170, 0x9d342e53, 0x963a275e, 0x8b283c49, 0x80263544,
+0xe97c420f, 0xe2724b02, 0xff605015, 0xf46e5918, 0xc544663b, 0xce4a6f36,
+0xd3587421, 0xd8567d2c, 0x7a37a10c, 0x7139a801, 0x6c2bb316, 0x6725ba1b,
+0x560f8538, 0x5d018c35, 0x40139722, 0x4b1d9e2f, 0x2247e964, 0x2949e069,
+0x345bfb7e, 0x3f55f273, 0x0e7fcd50, 0x0571c45d, 0x1863df4a, 0x136dd647,
+0xcad731dc, 0xc1d938d1, 0xdccb23c6, 0xd7c52acb, 0xe6ef15e8, 0xede11ce5,
+0xf0f307f2, 0xfbfd0eff, 0x92a779b4, 0x99a970b9, 0x84bb6bae, 0x8fb562a3,
+0xbe9f5d80, 0xb591548d, 0xa8834f9a, 0xa38d4697  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC2[256] = 
+{
+0x00000000, 0x090e0b0d, 0x121c161a, 0x1b121d17, 0x24382c34, 0x2d362739,
+0x36243a2e, 0x3f2a3123, 0x48705868, 0x417e5365, 0x5a6c4e72, 0x5362457f,
+0x6c48745c, 0x65467f51, 0x7e546246, 0x775a694b, 0x90e0b0d0, 0x99eebbdd,
+0x82fca6ca, 0x8bf2adc7, 0xb4d89ce4, 0xbdd697e9, 0xa6c48afe, 0xafca81f3,
+0xd890e8b8, 0xd19ee3b5, 0xca8cfea2, 0xc382f5af, 0xfca8c48c, 0xf5a6cf81,
+0xeeb4d296, 0xe7bad99b, 0x3bdb7bbb, 0x32d570b6, 0x29c76da1, 0x20c966ac,
+0x1fe3578f, 0x16ed5c82, 0x0dff4195, 0x04f14a98, 0x73ab23d3, 0x7aa528de,
+0x61b735c9, 0x68b93ec4, 0x57930fe7, 0x5e9d04ea, 0x458f19fd, 0x4c8112f0,
+0xab3bcb6b, 0xa235c066, 0xb927dd71, 0xb029d67c, 0x8f03e75f, 0x860dec52,
+0x9d1ff145, 0x9411fa48, 0xe34b9303, 0xea45980e, 0xf1578519, 0xf8598e14,
+0xc773bf37, 0xce7db43a, 0xd56fa92d, 0xdc61a220, 0x76adf66d, 0x7fa3fd60,
+0x64b1e077, 0x6dbfeb7a, 0x5295da59, 0x5b9bd154, 0x4089cc43, 0x4987c74e,
+0x3eddae05, 0x37d3a508, 0x2cc1b81f, 0x25cfb312, 0x1ae58231, 0x13eb893c,
+0x08f9942b, 0x01f79f26, 0xe64d46bd, 0xef434db0, 0xf45150a7, 0xfd5f5baa,
+0xc2756a89, 0xcb7b6184, 0xd0697c93, 0xd967779e, 0xae3d1ed5, 0xa73315d8,
+0xbc2108cf, 0xb52f03c2, 0x8a0532e1, 0x830b39ec, 0x981924fb, 0x91172ff6,
+0x4d768dd6, 0x447886db, 0x5f6a9bcc, 0x566490c1, 0x694ea1e2, 0x6040aaef,
+0x7b52b7f8, 0x725cbcf5, 0x0506d5be, 0x0c08deb3, 0x171ac3a4, 0x1e14c8a9,
+0x213ef98a, 0x2830f287, 0x3322ef90, 0x3a2ce49d, 0xdd963d06, 0xd498360b,
+0xcf8a2b1c, 0xc6842011, 0xf9ae1132, 0xf0a01a3f, 0xebb20728, 0xe2bc0c25,
+0x95e6656e, 0x9ce86e63, 0x87fa7374, 0x8ef47879, 0xb1de495a, 0xb8d04257,
+0xa3c25f40, 0xaacc544d, 0xec41f7da, 0xe54ffcd7, 0xfe5de1c0, 0xf753eacd,
+0xc879dbee, 0xc177d0e3, 0xda65cdf4, 0xd36bc6f9, 0xa431afb2, 0xad3fa4bf,
+0xb62db9a8, 0xbf23b2a5, 0x80098386, 0x8907888b, 0x9215959c, 0x9b1b9e91,
+0x7ca1470a, 0x75af4c07, 0x6ebd5110, 0x67b35a1d, 0x58996b3e, 0x51976033,
+0x4a857d24, 0x438b7629, 0x34d11f62, 0x3ddf146f, 0x26cd0978, 0x2fc30275,
+0x10e93356, 0x19e7385b, 0x02f5254c, 0x0bfb2e41, 0xd79a8c61, 0xde94876c,
+0xc5869a7b, 0xcc889176, 0xf3a2a055, 0xfaacab58, 0xe1beb64f, 0xe8b0bd42,
+0x9fead409, 0x96e4df04, 0x8df6c213, 0x84f8c91e, 0xbbd2f83d, 0xb2dcf330,
+0xa9ceee27, 0xa0c0e52a, 0x477a3cb1, 0x4e7437bc, 0x55662aab, 0x5c6821a6,
+0x63421085, 0x6a4c1b88, 0x715e069f, 0x78500d92, 0x0f0a64d9, 0x06046fd4,
+0x1d1672c3, 0x141879ce, 0x2b3248ed, 0x223c43e0, 0x392e5ef7, 0x302055fa,
+0x9aec01b7, 0x93e20aba, 0x88f017ad, 0x81fe1ca0, 0xbed42d83, 0xb7da268e,
+0xacc83b99, 0xa5c63094, 0xd29c59df, 0xdb9252d2, 0xc0804fc5, 0xc98e44c8,
+0xf6a475eb, 0xffaa7ee6, 0xe4b863f1, 0xedb668fc, 0x0a0cb167, 0x0302ba6a,
+0x1810a77d, 0x111eac70, 0x2e349d53, 0x273a965e, 0x3c288b49, 0x35268044,
+0x427ce90f, 0x4b72e202, 0x5060ff15, 0x596ef418, 0x6644c53b, 0x6f4ace36,
+0x7458d321, 0x7d56d82c, 0xa1377a0c, 0xa8397101, 0xb32b6c16, 0xba25671b,
+0x850f5638, 0x8c015d35, 0x97134022, 0x9e1d4b2f, 0xe9472264, 0xe0492969,
+0xfb5b347e, 0xf2553f73, 0xcd7f0e50, 0xc471055d, 0xdf63184a, 0xd66d1347,
+0x31d7cadc, 0x38d9c1d1, 0x23cbdcc6, 0x2ac5d7cb, 0x15efe6e8, 0x1ce1ede5,
+0x07f3f0f2, 0x0efdfbff, 0x79a792b4, 0x70a999b9, 0x6bbb84ae, 0x62b58fa3,
+0x5d9fbe80, 0x5491b58d, 0x4f83a89a, 0x468da397  
+};
+#else
+static const PRUint32 _IMXC2[256] = 
+{
+0x00000000, 0x0d0b0e09, 0x1a161c12, 0x171d121b, 0x342c3824, 0x3927362d,
+0x2e3a2436, 0x23312a3f, 0x68587048, 0x65537e41, 0x724e6c5a, 0x7f456253,
+0x5c74486c, 0x517f4665, 0x4662547e, 0x4b695a77, 0xd0b0e090, 0xddbbee99,
+0xcaa6fc82, 0xc7adf28b, 0xe49cd8b4, 0xe997d6bd, 0xfe8ac4a6, 0xf381caaf,
+0xb8e890d8, 0xb5e39ed1, 0xa2fe8cca, 0xaff582c3, 0x8cc4a8fc, 0x81cfa6f5,
+0x96d2b4ee, 0x9bd9bae7, 0xbb7bdb3b, 0xb670d532, 0xa16dc729, 0xac66c920,
+0x8f57e31f, 0x825ced16, 0x9541ff0d, 0x984af104, 0xd323ab73, 0xde28a57a,
+0xc935b761, 0xc43eb968, 0xe70f9357, 0xea049d5e, 0xfd198f45, 0xf012814c,
+0x6bcb3bab, 0x66c035a2, 0x71dd27b9, 0x7cd629b0, 0x5fe7038f, 0x52ec0d86,
+0x45f11f9d, 0x48fa1194, 0x03934be3, 0x0e9845ea, 0x198557f1, 0x148e59f8,
+0x37bf73c7, 0x3ab47dce, 0x2da96fd5, 0x20a261dc, 0x6df6ad76, 0x60fda37f,
+0x77e0b164, 0x7aebbf6d, 0x59da9552, 0x54d19b5b, 0x43cc8940, 0x4ec78749,
+0x05aedd3e, 0x08a5d337, 0x1fb8c12c, 0x12b3cf25, 0x3182e51a, 0x3c89eb13,
+0x2b94f908, 0x269ff701, 0xbd464de6, 0xb04d43ef, 0xa75051f4, 0xaa5b5ffd,
+0x896a75c2, 0x84617bcb, 0x937c69d0, 0x9e7767d9, 0xd51e3dae, 0xd81533a7,
+0xcf0821bc, 0xc2032fb5, 0xe132058a, 0xec390b83, 0xfb241998, 0xf62f1791,
+0xd68d764d, 0xdb867844, 0xcc9b6a5f, 0xc1906456, 0xe2a14e69, 0xefaa4060,
+0xf8b7527b, 0xf5bc5c72, 0xbed50605, 0xb3de080c, 0xa4c31a17, 0xa9c8141e,
+0x8af93e21, 0x87f23028, 0x90ef2233, 0x9de42c3a, 0x063d96dd, 0x0b3698d4,
+0x1c2b8acf, 0x112084c6, 0x3211aef9, 0x3f1aa0f0, 0x2807b2eb, 0x250cbce2,
+0x6e65e695, 0x636ee89c, 0x7473fa87, 0x7978f48e, 0x5a49deb1, 0x5742d0b8,
+0x405fc2a3, 0x4d54ccaa, 0xdaf741ec, 0xd7fc4fe5, 0xc0e15dfe, 0xcdea53f7,
+0xeedb79c8, 0xe3d077c1, 0xf4cd65da, 0xf9c66bd3, 0xb2af31a4, 0xbfa43fad,
+0xa8b92db6, 0xa5b223bf, 0x86830980, 0x8b880789, 0x9c951592, 0x919e1b9b,
+0x0a47a17c, 0x074caf75, 0x1051bd6e, 0x1d5ab367, 0x3e6b9958, 0x33609751,
+0x247d854a, 0x29768b43, 0x621fd134, 0x6f14df3d, 0x7809cd26, 0x7502c32f,
+0x5633e910, 0x5b38e719, 0x4c25f502, 0x412efb0b, 0x618c9ad7, 0x6c8794de,
+0x7b9a86c5, 0x769188cc, 0x55a0a2f3, 0x58abacfa, 0x4fb6bee1, 0x42bdb0e8,
+0x09d4ea9f, 0x04dfe496, 0x13c2f68d, 0x1ec9f884, 0x3df8d2bb, 0x30f3dcb2,
+0x27eecea9, 0x2ae5c0a0, 0xb13c7a47, 0xbc37744e, 0xab2a6655, 0xa621685c,
+0x85104263, 0x881b4c6a, 0x9f065e71, 0x920d5078, 0xd9640a0f, 0xd46f0406,
+0xc372161d, 0xce791814, 0xed48322b, 0xe0433c22, 0xf75e2e39, 0xfa552030,
+0xb701ec9a, 0xba0ae293, 0xad17f088, 0xa01cfe81, 0x832dd4be, 0x8e26dab7,
+0x993bc8ac, 0x9430c6a5, 0xdf599cd2, 0xd25292db, 0xc54f80c0, 0xc8448ec9,
+0xeb75a4f6, 0xe67eaaff, 0xf163b8e4, 0xfc68b6ed, 0x67b10c0a, 0x6aba0203,
+0x7da71018, 0x70ac1e11, 0x539d342e, 0x5e963a27, 0x498b283c, 0x44802635,
+0x0fe97c42, 0x02e2724b, 0x15ff6050, 0x18f46e59, 0x3bc54466, 0x36ce4a6f,
+0x21d35874, 0x2cd8567d, 0x0c7a37a1, 0x017139a8, 0x166c2bb3, 0x1b6725ba,
+0x38560f85, 0x355d018c, 0x22401397, 0x2f4b1d9e, 0x642247e9, 0x692949e0,
+0x7e345bfb, 0x733f55f2, 0x500e7fcd, 0x5d0571c4, 0x4a1863df, 0x47136dd6,
+0xdccad731, 0xd1c1d938, 0xc6dccb23, 0xcbd7c52a, 0xe8e6ef15, 0xe5ede11c,
+0xf2f0f307, 0xfffbfd0e, 0xb492a779, 0xb999a970, 0xae84bb6b, 0xa38fb562,
+0x80be9f5d, 0x8db59154, 0x9aa8834f, 0x97a38d46  
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC3[256] = 
+{
+0x00000000, 0x0e0b0d09, 0x1c161a12, 0x121d171b, 0x382c3424, 0x3627392d,
+0x243a2e36, 0x2a31233f, 0x70586848, 0x7e536541, 0x6c4e725a, 0x62457f53,
+0x48745c6c, 0x467f5165, 0x5462467e, 0x5a694b77, 0xe0b0d090, 0xeebbdd99,
+0xfca6ca82, 0xf2adc78b, 0xd89ce4b4, 0xd697e9bd, 0xc48afea6, 0xca81f3af,
+0x90e8b8d8, 0x9ee3b5d1, 0x8cfea2ca, 0x82f5afc3, 0xa8c48cfc, 0xa6cf81f5,
+0xb4d296ee, 0xbad99be7, 0xdb7bbb3b, 0xd570b632, 0xc76da129, 0xc966ac20,
+0xe3578f1f, 0xed5c8216, 0xff41950d, 0xf14a9804, 0xab23d373, 0xa528de7a,
+0xb735c961, 0xb93ec468, 0x930fe757, 0x9d04ea5e, 0x8f19fd45, 0x8112f04c,
+0x3bcb6bab, 0x35c066a2, 0x27dd71b9, 0x29d67cb0, 0x03e75f8f, 0x0dec5286,
+0x1ff1459d, 0x11fa4894, 0x4b9303e3, 0x45980eea, 0x578519f1, 0x598e14f8,
+0x73bf37c7, 0x7db43ace, 0x6fa92dd5, 0x61a220dc, 0xadf66d76, 0xa3fd607f,
+0xb1e07764, 0xbfeb7a6d, 0x95da5952, 0x9bd1545b, 0x89cc4340, 0x87c74e49,
+0xddae053e, 0xd3a50837, 0xc1b81f2c, 0xcfb31225, 0xe582311a, 0xeb893c13,
+0xf9942b08, 0xf79f2601, 0x4d46bde6, 0x434db0ef, 0x5150a7f4, 0x5f5baafd,
+0x756a89c2, 0x7b6184cb, 0x697c93d0, 0x67779ed9, 0x3d1ed5ae, 0x3315d8a7,
+0x2108cfbc, 0x2f03c2b5, 0x0532e18a, 0x0b39ec83, 0x1924fb98, 0x172ff691,
+0x768dd64d, 0x7886db44, 0x6a9bcc5f, 0x6490c156, 0x4ea1e269, 0x40aaef60,
+0x52b7f87b, 0x5cbcf572, 0x06d5be05, 0x08deb30c, 0x1ac3a417, 0x14c8a91e,
+0x3ef98a21, 0x30f28728, 0x22ef9033, 0x2ce49d3a, 0x963d06dd, 0x98360bd4,
+0x8a2b1ccf, 0x842011c6, 0xae1132f9, 0xa01a3ff0, 0xb20728eb, 0xbc0c25e2,
+0xe6656e95, 0xe86e639c, 0xfa737487, 0xf478798e, 0xde495ab1, 0xd04257b8,
+0xc25f40a3, 0xcc544daa, 0x41f7daec, 0x4ffcd7e5, 0x5de1c0fe, 0x53eacdf7,
+0x79dbeec8, 0x77d0e3c1, 0x65cdf4da, 0x6bc6f9d3, 0x31afb2a4, 0x3fa4bfad,
+0x2db9a8b6, 0x23b2a5bf, 0x09838680, 0x07888b89, 0x15959c92, 0x1b9e919b,
+0xa1470a7c, 0xaf4c0775, 0xbd51106e, 0xb35a1d67, 0x996b3e58, 0x97603351,
+0x857d244a, 0x8b762943, 0xd11f6234, 0xdf146f3d, 0xcd097826, 0xc302752f,
+0xe9335610, 0xe7385b19, 0xf5254c02, 0xfb2e410b, 0x9a8c61d7, 0x94876cde,
+0x869a7bc5, 0x889176cc, 0xa2a055f3, 0xacab58fa, 0xbeb64fe1, 0xb0bd42e8,
+0xead4099f, 0xe4df0496, 0xf6c2138d, 0xf8c91e84, 0xd2f83dbb, 0xdcf330b2,
+0xceee27a9, 0xc0e52aa0, 0x7a3cb147, 0x7437bc4e, 0x662aab55, 0x6821a65c,
+0x42108563, 0x4c1b886a, 0x5e069f71, 0x500d9278, 0x0a64d90f, 0x046fd406,
+0x1672c31d, 0x1879ce14, 0x3248ed2b, 0x3c43e022, 0x2e5ef739, 0x2055fa30,
+0xec01b79a, 0xe20aba93, 0xf017ad88, 0xfe1ca081, 0xd42d83be, 0xda268eb7,
+0xc83b99ac, 0xc63094a5, 0x9c59dfd2, 0x9252d2db, 0x804fc5c0, 0x8e44c8c9,
+0xa475ebf6, 0xaa7ee6ff, 0xb863f1e4, 0xb668fced, 0x0cb1670a, 0x02ba6a03,
+0x10a77d18, 0x1eac7011, 0x349d532e, 0x3a965e27, 0x288b493c, 0x26804435,
+0x7ce90f42, 0x72e2024b, 0x60ff1550, 0x6ef41859, 0x44c53b66, 0x4ace366f,
+0x58d32174, 0x56d82c7d, 0x377a0ca1, 0x397101a8, 0x2b6c16b3, 0x25671bba,
+0x0f563885, 0x015d358c, 0x13402297, 0x1d4b2f9e, 0x472264e9, 0x492969e0,
+0x5b347efb, 0x553f73f2, 0x7f0e50cd, 0x71055dc4, 0x63184adf, 0x6d1347d6,
+0xd7cadc31, 0xd9c1d138, 0xcbdcc623, 0xc5d7cb2a, 0xefe6e815, 0xe1ede51c,
+0xf3f0f207, 0xfdfbff0e, 0xa792b479, 0xa999b970, 0xbb84ae6b, 0xb58fa362,
+0x9fbe805d, 0x91b58d54, 0x83a89a4f, 0x8da39746  
+};
+#else
+static const PRUint32 _IMXC3[256] = 
+{
+0x00000000, 0x090d0b0e, 0x121a161c, 0x1b171d12, 0x24342c38, 0x2d392736,
+0x362e3a24, 0x3f23312a, 0x48685870, 0x4165537e, 0x5a724e6c, 0x537f4562,
+0x6c5c7448, 0x65517f46, 0x7e466254, 0x774b695a, 0x90d0b0e0, 0x99ddbbee,
+0x82caa6fc, 0x8bc7adf2, 0xb4e49cd8, 0xbde997d6, 0xa6fe8ac4, 0xaff381ca,
+0xd8b8e890, 0xd1b5e39e, 0xcaa2fe8c, 0xc3aff582, 0xfc8cc4a8, 0xf581cfa6,
+0xee96d2b4, 0xe79bd9ba, 0x3bbb7bdb, 0x32b670d5, 0x29a16dc7, 0x20ac66c9,
+0x1f8f57e3, 0x16825ced, 0x0d9541ff, 0x04984af1, 0x73d323ab, 0x7ade28a5,
+0x61c935b7, 0x68c43eb9, 0x57e70f93, 0x5eea049d, 0x45fd198f, 0x4cf01281,
+0xab6bcb3b, 0xa266c035, 0xb971dd27, 0xb07cd629, 0x8f5fe703, 0x8652ec0d,
+0x9d45f11f, 0x9448fa11, 0xe303934b, 0xea0e9845, 0xf1198557, 0xf8148e59,
+0xc737bf73, 0xce3ab47d, 0xd52da96f, 0xdc20a261, 0x766df6ad, 0x7f60fda3,
+0x6477e0b1, 0x6d7aebbf, 0x5259da95, 0x5b54d19b, 0x4043cc89, 0x494ec787,
+0x3e05aedd, 0x3708a5d3, 0x2c1fb8c1, 0x2512b3cf, 0x1a3182e5, 0x133c89eb,
+0x082b94f9, 0x01269ff7, 0xe6bd464d, 0xefb04d43, 0xf4a75051, 0xfdaa5b5f,
+0xc2896a75, 0xcb84617b, 0xd0937c69, 0xd99e7767, 0xaed51e3d, 0xa7d81533,
+0xbccf0821, 0xb5c2032f, 0x8ae13205, 0x83ec390b, 0x98fb2419, 0x91f62f17,
+0x4dd68d76, 0x44db8678, 0x5fcc9b6a, 0x56c19064, 0x69e2a14e, 0x60efaa40,
+0x7bf8b752, 0x72f5bc5c, 0x05bed506, 0x0cb3de08, 0x17a4c31a, 0x1ea9c814,
+0x218af93e, 0x2887f230, 0x3390ef22, 0x3a9de42c, 0xdd063d96, 0xd40b3698,
+0xcf1c2b8a, 0xc6112084, 0xf93211ae, 0xf03f1aa0, 0xeb2807b2, 0xe2250cbc,
+0x956e65e6, 0x9c636ee8, 0x877473fa, 0x8e7978f4, 0xb15a49de, 0xb85742d0,
+0xa3405fc2, 0xaa4d54cc, 0xecdaf741, 0xe5d7fc4f, 0xfec0e15d, 0xf7cdea53,
+0xc8eedb79, 0xc1e3d077, 0xdaf4cd65, 0xd3f9c66b, 0xa4b2af31, 0xadbfa43f,
+0xb6a8b92d, 0xbfa5b223, 0x80868309, 0x898b8807, 0x929c9515, 0x9b919e1b,
+0x7c0a47a1, 0x75074caf, 0x6e1051bd, 0x671d5ab3, 0x583e6b99, 0x51336097,
+0x4a247d85, 0x4329768b, 0x34621fd1, 0x3d6f14df, 0x267809cd, 0x2f7502c3,
+0x105633e9, 0x195b38e7, 0x024c25f5, 0x0b412efb, 0xd7618c9a, 0xde6c8794,
+0xc57b9a86, 0xcc769188, 0xf355a0a2, 0xfa58abac, 0xe14fb6be, 0xe842bdb0,
+0x9f09d4ea, 0x9604dfe4, 0x8d13c2f6, 0x841ec9f8, 0xbb3df8d2, 0xb230f3dc,
+0xa927eece, 0xa02ae5c0, 0x47b13c7a, 0x4ebc3774, 0x55ab2a66, 0x5ca62168,
+0x63851042, 0x6a881b4c, 0x719f065e, 0x78920d50, 0x0fd9640a, 0x06d46f04,
+0x1dc37216, 0x14ce7918, 0x2bed4832, 0x22e0433c, 0x39f75e2e, 0x30fa5520,
+0x9ab701ec, 0x93ba0ae2, 0x88ad17f0, 0x81a01cfe, 0xbe832dd4, 0xb78e26da,
+0xac993bc8, 0xa59430c6, 0xd2df599c, 0xdbd25292, 0xc0c54f80, 0xc9c8448e,
+0xf6eb75a4, 0xffe67eaa, 0xe4f163b8, 0xedfc68b6, 0x0a67b10c, 0x036aba02,
+0x187da710, 0x1170ac1e, 0x2e539d34, 0x275e963a, 0x3c498b28, 0x35448026,
+0x420fe97c, 0x4b02e272, 0x5015ff60, 0x5918f46e, 0x663bc544, 0x6f36ce4a,
+0x7421d358, 0x7d2cd856, 0xa10c7a37, 0xa8017139, 0xb3166c2b, 0xba1b6725,
+0x8538560f, 0x8c355d01, 0x97224013, 0x9e2f4b1d, 0xe9642247, 0xe0692949,
+0xfb7e345b, 0xf2733f55, 0xcd500e7f, 0xc45d0571, 0xdf4a1863, 0xd647136d,
+0x31dccad7, 0x38d1c1d9, 0x23c6dccb, 0x2acbd7c5, 0x15e8e6ef, 0x1ce5ede1,
+0x07f2f0f3, 0x0efffbfd, 0x79b492a7, 0x70b999a9, 0x6bae84bb, 0x62a38fb5,
+0x5d80be9f, 0x548db591, 0x4f9aa883, 0x4697a38d  
+};
+#endif
+
+#endif /* RIJNDAEL_INCLUDE_TABLES */
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 Rcon[30] = {
+0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
+0x00000040, 0x00000080, 0x0000001b, 0x00000036, 0x0000006c, 0x000000d8,
+0x000000ab, 0x0000004d, 0x0000009a, 0x0000002f, 0x0000005e, 0x000000bc,
+0x00000063, 0x000000c6, 0x00000097, 0x00000035, 0x0000006a, 0x000000d4,
+0x000000b3, 0x0000007d, 0x000000fa, 0x000000ef, 0x000000c5, 0x00000091 
+};
+#else
+static const PRUint32 Rcon[30] = {
+0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+0x40000000, 0x80000000, 0x1b000000, 0x36000000, 0x6c000000, 0xd8000000,
+0xab000000, 0x4d000000, 0x9a000000, 0x2f000000, 0x5e000000, 0xbc000000,
+0x63000000, 0xc6000000, 0x97000000, 0x35000000, 0x6a000000, 0xd4000000,
+0xb3000000, 0x7d000000, 0xfa000000, 0xef000000, 0xc5000000, 0x91000000 
+};
+#endif
+
diff --git a/security/nss/lib/freebl/rijndael_tables.c b/security/nss/lib/freebl/rijndael_tables.c
new file mode 100644
index 000000000..78dd85a96
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael_tables.c
@@ -0,0 +1,215 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "stdio.h"
+#include "prtypes.h"
+#include "blapi.h"
+
+/*
+ * what follows is code thrown together to generate the myriad of tables
+ * used by Rijndael, the AES cipher.
+ */
+
+#define WORD_LE(b0, b1, b2, b3) \
+    (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | b0)
+
+#define WORD_BE(b0, b1, b2, b3) \
+    (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | b3)
+
+static const PRUint8 __S[256] =
+    {
+      99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118,
+      202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192,
+      183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21,
+      4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117,
+      9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132,
+      83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207,
+      208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168,
+      81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210,
+      205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115,
+      96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219,
+      224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121,
+      231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8,
+      186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138,
+      112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158,
+      225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223,
+      140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22,
+    };
+
+static const PRUint8 __SInv[256] =
+    {
+      82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251,
+      124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203,
+      84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78,
+      8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37,
+      114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146,
+      108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132,
+      144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6,
+      208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107,
+      58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115,
+      150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110,
+      71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27,
+      252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244,
+      31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95,
+      96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239,
+      160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97,
+      23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125
+    };
+
+/* GF_MULTIPLY
+ *
+ * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
+ */
+PRUint8
+gf_multiply(PRUint8 a, PRUint8 b)
+{
+    PRUint8 res = 0;
+    while (b > 0) {
+        res = (b & 0x01) ? res ^ a : res;
+        a = (a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1);
+        b >>= 1;
+    }
+    return res;
+}
+
+void
+make_T_Table(char *table, const PRUint8 Sx[256], FILE *file,
+             unsigned char m0, unsigned char m1,
+             unsigned char m2, unsigned char m3)
+{
+    PRUint32 Ti;
+    int i;
+    fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n");
+    fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table);
+    for (i = 0; i < 256; i++) {
+        Ti = WORD_LE(gf_multiply(Sx[i], m0),
+                     gf_multiply(Sx[i], m1),
+                     gf_multiply(Sx[i], m2),
+                     gf_multiply(Sx[i], m3));
+        if (Ti == 0)
+            fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',',
+                    (i % 6 == 5) ? '\n' : ' ');
+        else
+            fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',',
+                    (i % 6 == 5) ? '\n' : ' ');
+    }
+    fprintf(file, "\n};\n");
+    fprintf(file, "#else\n");
+    fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table);
+    for (i = 0; i < 256; i++) {
+        Ti = WORD_BE(gf_multiply(Sx[i], m0),
+                     gf_multiply(Sx[i], m1),
+                     gf_multiply(Sx[i], m2),
+                     gf_multiply(Sx[i], m3));
+        if (Ti == 0)
+            fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',',
+                    (i % 6 == 5) ? '\n' : ' ');
+        else
+            fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',',
+                    (i % 6 == 5) ? '\n' : ' ');
+    }
+    fprintf(file, "\n};\n");
+    fprintf(file, "#endif\n\n");
+}
+
+void
+make_InvMixCol_Table(int num, FILE *file, PRUint8 m0, PRUint8 m1, PRUint8 m2, PRUint8 m3)
+{
+    PRUint16 i;
+    PRUint8 b0, b1, b2, b3;
+    fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n");
+    fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num);
+    for (i = 0; i < 256; i++) {
+        b0 = gf_multiply(i, m0);
+        b1 = gf_multiply(i, m1);
+        b2 = gf_multiply(i, m2);
+        b3 = gf_multiply(i, m3);
+        fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b3, b2, b1, b0, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+    }
+    fprintf(file, "\n};\n");
+    fprintf(file, "#else\n");
+    fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num);
+    for (i = 0; i < 256; i++) {
+        b0 = gf_multiply(i, m0);
+        b1 = gf_multiply(i, m1);
+        b2 = gf_multiply(i, m2);
+        b3 = gf_multiply(i, m3);
+        fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b0, b1, b2, b3, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+    }
+    fprintf(file, "\n};\n");
+    fprintf(file, "#endif\n\n");
+}
+
+int
+main()
+{
+    int i, j;
+    PRUint8 cur, last;
+    PRUint32 tmp;
+    FILE *optfile;
+    optfile = fopen("rijndael32.tab", "w");
+    /* output S, if there are no T tables */
+    fprintf(optfile, "#ifndef RIJNDAEL_INCLUDE_TABLES\n");
+    fprintf(optfile, "static const PRUint8 _S[256] = \n{\n");
+    for (i = 0; i < 256; i++) {
+        fprintf(optfile, "%3d%c%c", __S[i], (i == 255) ? ' ' : ',',
+                (i % 16 == 15) ? '\n' : ' ');
+    }
+    fprintf(optfile, "};\n#endif /* not RIJNDAEL_INCLUDE_TABLES */\n\n");
+    /* output S**-1 */
+    fprintf(optfile, "static const PRUint8 _SInv[256] = \n{\n");
+    for (i = 0; i < 256; i++) {
+        fprintf(optfile, "%3d%c%c", __SInv[i], (i == 255) ? ' ' : ',',
+                (i % 16 == 15) ? '\n' : ' ');
+    }
+    fprintf(optfile, "};\n\n");
+    fprintf(optfile, "#ifdef RIJNDAEL_INCLUDE_TABLES\n");
+    /* The 32-bit word tables for optimized implementation */
+    /* T0 = [ S[a] * 02, S[a], S[a], S[a] * 03 ] */
+    make_T_Table("0", __S, optfile, 0x02, 0x01, 0x01, 0x03);
+    /* T1 = [ S[a] * 03, S[a] * 02, S[a], S[a] ] */
+    make_T_Table("1", __S, optfile, 0x03, 0x02, 0x01, 0x01);
+    /* T2 = [ S[a], S[a] * 03, S[a] * 02, S[a] ] */
+    make_T_Table("2", __S, optfile, 0x01, 0x03, 0x02, 0x01);
+    /* T3 = [ S[a], S[a], S[a] * 03, S[a] * 02 ] */
+    make_T_Table("3", __S, optfile, 0x01, 0x01, 0x03, 0x02);
+    /* TInv0 = [ Si[a] * 0E, Si[a] * 09, Si[a] * 0D, Si[a] * 0B ] */
+    make_T_Table("Inv0", __SInv, optfile, 0x0e, 0x09, 0x0d, 0x0b);
+    /* TInv1 = [ Si[a] * 0B, Si[a] * 0E, Si[a] * 09, Si[a] * 0D ] */
+    make_T_Table("Inv1", __SInv, optfile, 0x0b, 0x0e, 0x09, 0x0d);
+    /* TInv2 = [ Si[a] * 0D, Si[a] * 0B, Si[a] * 0E, Si[a] * 09 ] */
+    make_T_Table("Inv2", __SInv, optfile, 0x0d, 0x0b, 0x0e, 0x09);
+    /* TInv3 = [ Si[a] * 09, Si[a] * 0D, Si[a] * 0B, Si[a] * 0E ] */
+    make_T_Table("Inv3", __SInv, optfile, 0x09, 0x0d, 0x0b, 0x0e);
+    /* byte multiply tables for inverse key expansion (mimics InvMixColumn) */
+    make_InvMixCol_Table(0, optfile, 0x0e, 0x09, 0x0d, 0x0b);
+    make_InvMixCol_Table(1, optfile, 0x0b, 0x0E, 0x09, 0x0d);
+    make_InvMixCol_Table(2, optfile, 0x0d, 0x0b, 0x0e, 0x09);
+    make_InvMixCol_Table(3, optfile, 0x09, 0x0d, 0x0b, 0x0e);
+    fprintf(optfile, "#endif /* RIJNDAEL_INCLUDE_TABLES */\n\n");
+    /* round constants for key expansion */
+    fprintf(optfile, "#ifdef IS_LITTLE_ENDIAN\n");
+    fprintf(optfile, "static const PRUint32 Rcon[30] = {\n");
+    cur = 0x01;
+    for (i = 0; i < 30; i++) {
+        fprintf(optfile, "%#.8x%c%c", WORD_LE(cur, 0, 0, 0),
+                (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+        last = cur;
+        cur = gf_multiply(last, 0x02);
+    }
+    fprintf(optfile, "};\n");
+    fprintf(optfile, "#else\n");
+    fprintf(optfile, "static const PRUint32 Rcon[30] = {\n");
+    cur = 0x01;
+    for (i = 0; i < 30; i++) {
+        fprintf(optfile, "%#.8x%c%c", WORD_BE(cur, 0, 0, 0),
+                (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+        last = cur;
+        cur = gf_multiply(last, 0x02);
+    }
+    fprintf(optfile, "};\n");
+    fprintf(optfile, "#endif\n\n");
+    fclose(optfile);
+    return 0;
+}
diff --git a/security/nss/lib/freebl/rsa.c b/security/nss/lib/freebl/rsa.c
new file mode 100644
index 000000000..ff8c40ed9
--- /dev/null
+++ b/security/nss/lib/freebl/rsa.c
@@ -0,0 +1,1625 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * RSA key generation, public key op, private key op.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secerr.h"
+
+#include "prclist.h"
+#include "nssilock.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "mpi.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "blapii.h"
+
+/*
+** Number of times to attempt to generate a prime (p or q) from a random
+** seed (the seed changes for each iteration).
+*/
+#define MAX_PRIME_GEN_ATTEMPTS 10
+/*
+** Number of times to attempt to generate a key.  The primes p and q change
+** for each attempt.
+*/
+#define MAX_KEY_GEN_ATTEMPTS 10
+
+/* Blinding Parameters max cache size  */
+#define RSA_BLINDING_PARAMS_MAX_CACHE_SIZE 20
+
+/* exponent should not be greater than modulus */
+#define BAD_RSA_KEY_SIZE(modLen, expLen)                           \
+    ((expLen) > (modLen) || (modLen) > RSA_MAX_MODULUS_BITS / 8 || \
+     (expLen) > RSA_MAX_EXPONENT_BITS / 8)
+
+struct blindingParamsStr;
+typedef struct blindingParamsStr blindingParams;
+
+struct blindingParamsStr {
+    blindingParams *next;
+    mp_int f, g; /* blinding parameter                 */
+    int counter; /* number of remaining uses of (f, g) */
+};
+
+/*
+** RSABlindingParamsStr
+**
+** For discussion of Paul Kocher's timing attack against an RSA private key
+** operation, see http://www.cryptography.com/timingattack/paper.html.  The
+** countermeasure to this attack, known as blinding, is also discussed in
+** the Handbook of Applied Cryptography, 11.118-11.119.
+*/
+struct RSABlindingParamsStr {
+    /* Blinding-specific parameters */
+    PRCList link;              /* link to list of structs            */
+    SECItem modulus;           /* list element "key"                 */
+    blindingParams *free, *bp; /* Blinding parameters queue          */
+    blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE];
+};
+typedef struct RSABlindingParamsStr RSABlindingParams;
+
+/*
+** RSABlindingParamsListStr
+**
+** List of key-specific blinding params.  The arena holds the volatile pool
+** of memory for each entry and the list itself.  The lock is for list
+** operations, in this case insertions and iterations, as well as control
+** of the counter for each set of blinding parameters.
+*/
+struct RSABlindingParamsListStr {
+    PZLock *lock;    /* Lock for the list   */
+    PRCondVar *cVar; /* Condidtion Variable */
+    int waitCount;   /* Number of threads waiting on cVar */
+    PRCList head;    /* Pointer to the list */
+};
+
+/*
+** The master blinding params list.
+*/
+static struct RSABlindingParamsListStr blindingParamsList = { 0 };
+
+/* Number of times to reuse (f, g).  Suggested by Paul Kocher */
+#define RSA_BLINDING_PARAMS_MAX_REUSE 50
+
+/* Global, allows optional use of blinding.  On by default. */
+/* Cannot be changed at the moment, due to thread-safety issues. */
+static PRBool nssRSAUseBlinding = PR_TRUE;
+
+static SECStatus
+rsa_build_from_primes(const mp_int *p, const mp_int *q,
+                      mp_int *e, PRBool needPublicExponent,
+                      mp_int *d, PRBool needPrivateExponent,
+                      RSAPrivateKey *key, unsigned int keySizeInBits)
+{
+    mp_int n, phi;
+    mp_int psub1, qsub1, tmp;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    MP_DIGITS(&n) = 0;
+    MP_DIGITS(&phi) = 0;
+    MP_DIGITS(&psub1) = 0;
+    MP_DIGITS(&qsub1) = 0;
+    MP_DIGITS(&tmp) = 0;
+    CHECK_MPI_OK(mp_init(&n));
+    CHECK_MPI_OK(mp_init(&phi));
+    CHECK_MPI_OK(mp_init(&psub1));
+    CHECK_MPI_OK(mp_init(&qsub1));
+    CHECK_MPI_OK(mp_init(&tmp));
+    /* p and q must be distinct. */
+    if (mp_cmp(p, q) == 0) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    /* 1.  Compute n = p*q */
+    CHECK_MPI_OK(mp_mul(p, q, &n));
+    /*     verify that the modulus has the desired number of bits */
+    if ((unsigned)mpl_significant_bits(&n) != keySizeInBits) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        rv = SECFailure;
+        goto cleanup;
+    }
+
+    /* at least one exponent must be given */
+    PORT_Assert(!(needPublicExponent && needPrivateExponent));
+
+    /* 2.  Compute phi = (p-1)*(q-1) */
+    CHECK_MPI_OK(mp_sub_d(p, 1, &psub1));
+    CHECK_MPI_OK(mp_sub_d(q, 1, &qsub1));
+    if (needPublicExponent || needPrivateExponent) {
+        CHECK_MPI_OK(mp_lcm(&psub1, &qsub1, &phi));
+        /* 3.  Compute d = e**-1 mod(phi) */
+        /*     or      e = d**-1 mod(phi) as necessary */
+        if (needPublicExponent) {
+            err = mp_invmod(d, &phi, e);
+        } else {
+            err = mp_invmod(e, &phi, d);
+        }
+    } else {
+        err = MP_OKAY;
+    }
+    /*     Verify that phi(n) and e have no common divisors */
+    if (err != MP_OKAY) {
+        if (err == MP_UNDEF) {
+            PORT_SetError(SEC_ERROR_NEED_RANDOM);
+            err = MP_OKAY; /* to keep PORT_SetError from being called again */
+            rv = SECFailure;
+        }
+        goto cleanup;
+    }
+
+    /* 4.  Compute exponent1 = d mod (p-1) */
+    CHECK_MPI_OK(mp_mod(d, &psub1, &tmp));
+    MPINT_TO_SECITEM(&tmp, &key->exponent1, key->arena);
+    /* 5.  Compute exponent2 = d mod (q-1) */
+    CHECK_MPI_OK(mp_mod(d, &qsub1, &tmp));
+    MPINT_TO_SECITEM(&tmp, &key->exponent2, key->arena);
+    /* 6.  Compute coefficient = q**-1 mod p */
+    CHECK_MPI_OK(mp_invmod(q, p, &tmp));
+    MPINT_TO_SECITEM(&tmp, &key->coefficient, key->arena);
+
+    /* copy our calculated results, overwrite what is there */
+    key->modulus.data = NULL;
+    MPINT_TO_SECITEM(&n, &key->modulus, key->arena);
+    key->privateExponent.data = NULL;
+    MPINT_TO_SECITEM(d, &key->privateExponent, key->arena);
+    key->publicExponent.data = NULL;
+    MPINT_TO_SECITEM(e, &key->publicExponent, key->arena);
+    key->prime1.data = NULL;
+    MPINT_TO_SECITEM(p, &key->prime1, key->arena);
+    key->prime2.data = NULL;
+    MPINT_TO_SECITEM(q, &key->prime2, key->arena);
+cleanup:
+    mp_clear(&n);
+    mp_clear(&phi);
+    mp_clear(&psub1);
+    mp_clear(&qsub1);
+    mp_clear(&tmp);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+static SECStatus
+generate_prime(mp_int *prime, int primeLen)
+{
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    unsigned long counter = 0;
+    int piter;
+    unsigned char *pb = NULL;
+    pb = PORT_Alloc(primeLen);
+    if (!pb) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        goto cleanup;
+    }
+    for (piter = 0; piter < MAX_PRIME_GEN_ATTEMPTS; piter++) {
+        CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(pb, primeLen));
+        pb[0] |= 0xC0;            /* set two high-order bits */
+        pb[primeLen - 1] |= 0x01; /* set low-order bit       */
+        CHECK_MPI_OK(mp_read_unsigned_octets(prime, pb, primeLen));
+        err = mpp_make_prime(prime, primeLen * 8, PR_FALSE, &counter);
+        if (err != MP_NO)
+            goto cleanup;
+        /* keep going while err == MP_NO */
+    }
+cleanup:
+    if (pb)
+        PORT_ZFree(pb, primeLen);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/*
+ *  make sure the key components meet fips186 requirements.
+ */
+static PRBool
+rsa_fips186_verify(mp_int *p, mp_int *q, mp_int *d, int keySizeInBits)
+{
+    mp_int pq_diff;
+    mp_err err = MP_OKAY;
+    PRBool ret = PR_FALSE;
+
+    if (keySizeInBits < 250) {
+        /* not a valid FIPS length, no point in our other tests */
+        /* if you are here, and in FIPS mode, you are outside the security
+         * policy */
+        return PR_TRUE;
+    }
+
+    /* p & q are already known to be greater then sqrt(2)*2^(keySize/2-1) */
+    /* we also know that gcd(p-1,e) = 1 and gcd(q-1,e) = 1 because the
+     * mp_invmod() function will fail. */
+    /* now check p-q > 2^(keysize/2-100) */
+    MP_DIGITS(&pq_diff) = 0;
+    CHECK_MPI_OK(mp_init(&pq_diff));
+    /* NSS always has p > q, so we know pq_diff is positive */
+    CHECK_MPI_OK(mp_sub(p, q, &pq_diff));
+    if ((unsigned)mpl_significant_bits(&pq_diff) < (keySizeInBits / 2 - 100)) {
+        goto cleanup;
+    }
+    /* now verify d is large enough*/
+    if ((unsigned)mpl_significant_bits(d) < (keySizeInBits / 2)) {
+        goto cleanup;
+    }
+    ret = PR_TRUE;
+
+cleanup:
+    mp_clear(&pq_diff);
+    return ret;
+}
+
+/*
+** Generate and return a new RSA public and private key.
+**  Both keys are encoded in a single RSAPrivateKey structure.
+**  "cx" is the random number generator context
+**  "keySizeInBits" is the size of the key to be generated, in bits.
+**     512, 1024, etc.
+**  "publicExponent" when not NULL is a pointer to some data that
+**     represents the public exponent to use. The data is a byte
+**     encoded integer, in "big endian" order.
+*/
+RSAPrivateKey *
+RSA_NewKey(int keySizeInBits, SECItem *publicExponent)
+{
+    unsigned int primeLen;
+    mp_int p, q, e, d;
+    int kiter;
+    int max_attempts;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    int prerr = 0;
+    RSAPrivateKey *key = NULL;
+    PLArenaPool *arena = NULL;
+    /* Require key size to be a multiple of 16 bits. */
+    if (!publicExponent || keySizeInBits % 16 != 0 ||
+        BAD_RSA_KEY_SIZE((unsigned int)keySizeInBits / 8, publicExponent->len)) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return NULL;
+    }
+    /* 1. Allocate arena & key */
+    arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+    if (!arena) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return NULL;
+    }
+    key = PORT_ArenaZNew(arena, RSAPrivateKey);
+    if (!key) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        PORT_FreeArena(arena, PR_TRUE);
+        return NULL;
+    }
+    key->arena = arena;
+    /* length of primes p and q (in bytes) */
+    primeLen = keySizeInBits / (2 * PR_BITS_PER_BYTE);
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&e) = 0;
+    MP_DIGITS(&d) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&e));
+    CHECK_MPI_OK(mp_init(&d));
+    /* 2.  Set the version number (PKCS1 v1.5 says it should be zero) */
+    SECITEM_AllocItem(arena, &key->version, 1);
+    key->version.data[0] = 0;
+    /* 3.  Set the public exponent */
+    SECITEM_TO_MPINT(*publicExponent, &e);
+    kiter = 0;
+    max_attempts = 5 * (keySizeInBits / 2); /* FIPS 186-4 B.3.3 steps 4.7 and 5.8 */
+    do {
+        prerr = 0;
+        PORT_SetError(0);
+        CHECK_SEC_OK(generate_prime(&p, primeLen));
+        CHECK_SEC_OK(generate_prime(&q, primeLen));
+        /* Assure p > q */
+        /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any
+         * implementation optimization that requires p > q. We can remove
+         * this code in the future.
+         */
+        if (mp_cmp(&p, &q) < 0)
+            mp_exch(&p, &q);
+        /* Attempt to use these primes to generate a key */
+        rv = rsa_build_from_primes(&p, &q,
+                                   &e, PR_FALSE, /* needPublicExponent=false */
+                                   &d, PR_TRUE,  /* needPrivateExponent=true */
+                                   key, keySizeInBits);
+        if (rv == SECSuccess) {
+            if (rsa_fips186_verify(&p, &q, &d, keySizeInBits)) {
+                break;
+            }
+            prerr = SEC_ERROR_NEED_RANDOM; /* retry with different values */
+        } else {
+            prerr = PORT_GetError();
+        }
+        kiter++;
+        /* loop until have primes */
+    } while (prerr == SEC_ERROR_NEED_RANDOM && kiter < max_attempts);
+    if (prerr)
+        goto cleanup;
+cleanup:
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&e);
+    mp_clear(&d);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    if (rv && arena) {
+        PORT_FreeArena(arena, PR_TRUE);
+        key = NULL;
+    }
+    return key;
+}
+
+mp_err
+rsa_is_prime(mp_int *p)
+{
+    int res;
+
+    /* run a Fermat test */
+    res = mpp_fermat(p, 2);
+    if (res != MP_OKAY) {
+        return res;
+    }
+
+    /* If that passed, run some Miller-Rabin tests */
+    res = mpp_pprime(p, 2);
+    return res;
+}
+
+/*
+ * Factorize a RSA modulus n into p and q by using the exponents e and d.
+ *
+ * In: e, d, n
+ * Out: p, q
+ *
+ * See Handbook of Applied Cryptography, 8.2.2(i).
+ *
+ * The algorithm is probabilistic, it is run 64 times and each run has a 50%
+ * chance of succeeding with a runtime of O(log(e*d)).
+ *
+ * The returned p might be smaller than q.
+ */
+static mp_err
+rsa_factorize_n_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q,
+                               mp_int *n)
+{
+    /* lambda is the private modulus: e*d = 1 mod lambda */
+    /* so: e*d - 1 = k*lambda = t*2^s where t is odd */
+    mp_int klambda;
+    mp_int t, onetwentyeight;
+    unsigned long s = 0;
+    unsigned long i;
+
+    /* cand = a^(t * 2^i) mod n, next_cand = a^(t * 2^(i+1)) mod n */
+    mp_int a;
+    mp_int cand;
+    mp_int next_cand;
+
+    mp_int n_minus_one;
+    mp_err err = MP_OKAY;
+
+    MP_DIGITS(&klambda) = 0;
+    MP_DIGITS(&t) = 0;
+    MP_DIGITS(&a) = 0;
+    MP_DIGITS(&cand) = 0;
+    MP_DIGITS(&n_minus_one) = 0;
+    MP_DIGITS(&next_cand) = 0;
+    MP_DIGITS(&onetwentyeight) = 0;
+    CHECK_MPI_OK(mp_init(&klambda));
+    CHECK_MPI_OK(mp_init(&t));
+    CHECK_MPI_OK(mp_init(&a));
+    CHECK_MPI_OK(mp_init(&cand));
+    CHECK_MPI_OK(mp_init(&n_minus_one));
+    CHECK_MPI_OK(mp_init(&next_cand));
+    CHECK_MPI_OK(mp_init(&onetwentyeight));
+
+    mp_set_int(&onetwentyeight, 128);
+
+    /* calculate k*lambda = e*d - 1 */
+    CHECK_MPI_OK(mp_mul(e, d, &klambda));
+    CHECK_MPI_OK(mp_sub_d(&klambda, 1, &klambda));
+
+    /* factorize klambda into t*2^s */
+    CHECK_MPI_OK(mp_copy(&klambda, &t));
+    while (mpp_divis_d(&t, 2) == MP_YES) {
+        CHECK_MPI_OK(mp_div_2(&t, &t));
+        s += 1;
+    }
+
+    /* precompute n_minus_one = n - 1 */
+    CHECK_MPI_OK(mp_copy(n, &n_minus_one));
+    CHECK_MPI_OK(mp_sub_d(&n_minus_one, 1, &n_minus_one));
+
+    /* pick random bases a, each one has a 50% leading to a factorization */
+    CHECK_MPI_OK(mp_set_int(&a, 2));
+    /* The following is equivalent to for (a=2, a <= 128, a+=2) */
+    while (mp_cmp(&a, &onetwentyeight) <= 0) {
+        /* compute the base cand = a^(t * 2^0) [i = 0] */
+        CHECK_MPI_OK(mp_exptmod(&a, &t, n, &cand));
+
+        for (i = 0; i < s; i++) {
+            /* condition 1: skip the base if we hit a trivial factor of n */
+            if (mp_cmp(&cand, &n_minus_one) == 0 || mp_cmp_d(&cand, 1) == 0) {
+                break;
+            }
+
+            /* increase i in a^(t * 2^i) by squaring the number */
+            CHECK_MPI_OK(mp_exptmod_d(&cand, 2, n, &next_cand));
+
+            /* condition 2: a^(t * 2^(i+1)) = 1 mod n */
+            if (mp_cmp_d(&next_cand, 1) == 0) {
+                /* conditions verified, gcd(a^(t * 2^i) - 1, n) is a factor */
+                CHECK_MPI_OK(mp_sub_d(&cand, 1, &cand));
+                CHECK_MPI_OK(mp_gcd(&cand, n, p));
+                if (mp_cmp_d(p, 1) == 0) {
+                    CHECK_MPI_OK(mp_add_d(&cand, 1, &cand));
+                    break;
+                }
+                CHECK_MPI_OK(mp_div(n, p, q, NULL));
+                goto cleanup;
+            }
+            CHECK_MPI_OK(mp_copy(&next_cand, &cand));
+        }
+
+        CHECK_MPI_OK(mp_add_d(&a, 2, &a));
+    }
+
+    /* if we reach here it's likely (2^64 - 1 / 2^64) that d is wrong */
+    err = MP_RANGE;
+
+cleanup:
+    mp_clear(&klambda);
+    mp_clear(&t);
+    mp_clear(&a);
+    mp_clear(&cand);
+    mp_clear(&n_minus_one);
+    mp_clear(&next_cand);
+    mp_clear(&onetwentyeight);
+    return err;
+}
+
+/*
+ * Try to find the two primes based on 2 exponents plus a prime.
+ *
+ * In: e, d and p.
+ * Out: p,q.
+ *
+ * Step 1, Since d = e**-1 mod phi, we know that d*e == 1 mod phi, or
+ *  d*e = 1+k*phi, or d*e-1 = k*phi. since d is less than phi and e is
+ *  usually less than d, then k must be an integer between e-1 and 1
+ *  (probably on the order of e).
+ * Step 1a, We can divide k*phi by prime-1 and get k*(q-1). This will reduce
+ *      the size of our division through the rest of the loop.
+ * Step 2, Loop through the values k=e-1 to 1 looking for k. k should be on
+ *  the order or e, and e is typically small. This may take a while for
+ *  a large random e. We are looking for a k that divides kphi
+ *  evenly. Once we find a k that divides kphi evenly, we assume it
+ *  is the true k. It's possible this k is not the 'true' k but has
+ *  swapped factors of p-1 and/or q-1. Because of this, we
+ *  tentatively continue Steps 3-6 inside this loop, and may return looking
+ *  for another k on failure.
+ * Step 3, Calculate our tentative phi=kphi/k. Note: real phi is (p-1)*(q-1).
+ * Step 4a, kphi is k*(q-1), so phi is our tenative q-1. q = phi+1.
+ *      If k is correct, q should be the right length and prime.
+ * Step 4b, It's possible q-1 and k could have swapped factors. We now have a
+ *  possible solution that meets our criteria. It may not be the only
+ *      solution, however, so we keep looking. If we find more than one,
+ *      we will fail since we cannot determine which is the correct
+ *      solution, and returning the wrong modulus will compromise both
+ *      moduli. If no other solution is found, we return the unique solution.
+ *
+ * This will return p & q. q may be larger than p in the case that p was given
+ * and it was the smaller prime.
+ */
+static mp_err
+rsa_get_prime_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q,
+                             mp_int *n, unsigned int keySizeInBits)
+{
+    mp_int kphi; /* k*phi */
+    mp_int k;    /* current guess at 'k' */
+    mp_int phi;  /* (p-1)(q-1) */
+    mp_int r;    /* remainder */
+    mp_int tmp;  /* p-1 if p is given */
+    mp_err err = MP_OKAY;
+    unsigned int order_k;
+
+    MP_DIGITS(&kphi) = 0;
+    MP_DIGITS(&phi) = 0;
+    MP_DIGITS(&k) = 0;
+    MP_DIGITS(&r) = 0;
+    MP_DIGITS(&tmp) = 0;
+    CHECK_MPI_OK(mp_init(&kphi));
+    CHECK_MPI_OK(mp_init(&phi));
+    CHECK_MPI_OK(mp_init(&k));
+    CHECK_MPI_OK(mp_init(&r));
+    CHECK_MPI_OK(mp_init(&tmp));
+
+    /* our algorithm looks for a factor k whose maximum size is dependent
+     * on the size of our smallest exponent, which had better be the public
+     * exponent (if it's the private, the key is vulnerable to a brute force
+     * attack).
+     *
+     * since our factor search is linear, we need to limit the maximum
+     * size of the public key. this should not be a problem normally, since
+     * public keys are usually small.
+     *
+     * if we want to handle larger public key sizes, we should have
+     * a version which tries to 'completely' factor k*phi (where completely
+     * means 'factor into primes, or composites with which are products of
+     * large primes). Once we have all the factors, we can sort them out and
+     * try different combinations to form our phi. The risk is if (p-1)/2,
+     * (q-1)/2, and k are all large primes. In any case if the public key
+     * is small (order of 20 some bits), then a linear search for k is
+     * manageable.
+     */
+    if (mpl_significant_bits(e) > 23) {
+        err = MP_RANGE;
+        goto cleanup;
+    }
+
+    /* calculate k*phi = e*d - 1 */
+    CHECK_MPI_OK(mp_mul(e, d, &kphi));
+    CHECK_MPI_OK(mp_sub_d(&kphi, 1, &kphi));
+
+    /* kphi is (e*d)-1, which is the same as k*(p-1)(q-1)
+     * d < (p-1)(q-1), therefor k must be less than e-1
+     * We can narrow down k even more, though. Since p and q are odd and both
+     * have their high bit set, then we know that phi must be on order of
+     * keySizeBits.
+     */
+    order_k = (unsigned)mpl_significant_bits(&kphi) - keySizeInBits;
+
+    /* for (k=kinit; order(k) >= order_k; k--) { */
+    /* k=kinit: k can't be bigger than  kphi/2^(keySizeInBits -1) */
+    CHECK_MPI_OK(mp_2expt(&k, keySizeInBits - 1));
+    CHECK_MPI_OK(mp_div(&kphi, &k, &k, NULL));
+    if (mp_cmp(&k, e) >= 0) {
+        /* also can't be bigger then e-1 */
+        CHECK_MPI_OK(mp_sub_d(e, 1, &k));
+    }
+
+    /* calculate our temp value */
+    /* This saves recalculating this value when the k guess is wrong, which
+     * is reasonably frequent. */
+    /* tmp = p-1 (used to calculate q-1= phi/tmp) */
+    CHECK_MPI_OK(mp_sub_d(p, 1, &tmp));
+    CHECK_MPI_OK(mp_div(&kphi, &tmp, &kphi, &r));
+    if (mp_cmp_z(&r) != 0) {
+        /* p-1 doesn't divide kphi, some parameter wasn't correct */
+        err = MP_RANGE;
+        goto cleanup;
+    }
+    mp_zero(q);
+    /* kphi is now k*(q-1) */
+
+    /* rest of the for loop */
+    for (; (err == MP_OKAY) && (mpl_significant_bits(&k) >= order_k);
+         err = mp_sub_d(&k, 1, &k)) {
+        CHECK_MPI_OK(err);
+        /* looking for k as a factor of kphi */
+        CHECK_MPI_OK(mp_div(&kphi, &k, &phi, &r));
+        if (mp_cmp_z(&r) != 0) {
+            /* not a factor, try the next one */
+            continue;
+        }
+        /* we have a possible phi, see if it works */
+        if ((unsigned)mpl_significant_bits(&phi) != keySizeInBits / 2) {
+            /* phi is not the right size */
+            continue;
+        }
+        /* phi should be divisible by 2, since
+         * q is odd and phi=(q-1). */
+        if (mpp_divis_d(&phi, 2) == MP_NO) {
+            /* phi is not divisible by 4 */
+            continue;
+        }
+        /* we now have a candidate for the second prime */
+        CHECK_MPI_OK(mp_add_d(&phi, 1, &tmp));
+
+        /* check to make sure it is prime */
+        err = rsa_is_prime(&tmp);
+        if (err != MP_OKAY) {
+            if (err == MP_NO) {
+                /* No, then we still have the wrong phi */
+                continue;
+            }
+            goto cleanup;
+        }
+        /*
+         * It is possible that we have the wrong phi if
+         * k_guess*(q_guess-1) = k*(q-1) (k and q-1 have swapped factors).
+         * since our q_quess is prime, however. We have found a valid
+         * rsa key because:
+         *   q is the correct order of magnitude.
+         *   phi = (p-1)(q-1) where p and q are both primes.
+         *   e*d mod phi = 1.
+         * There is no way to know from the info given if this is the
+         * original key. We never want to return the wrong key because if
+         * two moduli with the same factor is known, then euclid's gcd
+         * algorithm can be used to find that factor. Even though the
+         * caller didn't pass the original modulus, it doesn't mean the
+         * modulus wasn't known or isn't available somewhere. So to be safe
+         * if we can't be sure we have the right q, we don't return any.
+         *
+         * So to make sure we continue looking for other valid q's. If none
+         * are found, then we can safely return this one, otherwise we just
+         * fail */
+        if (mp_cmp_z(q) != 0) {
+            /* this is the second valid q, don't return either,
+             * just fail */
+            err = MP_RANGE;
+            break;
+        }
+        /* we only have one q so far, save it and if no others are found,
+         * it's safe to return it */
+        CHECK_MPI_OK(mp_copy(&tmp, q));
+        continue;
+    }
+    if ((unsigned)mpl_significant_bits(&k) < order_k) {
+        if (mp_cmp_z(q) == 0) {
+            /* If we get here, something was wrong with the parameters we
+             * were given */
+            err = MP_RANGE;
+        }
+    }
+cleanup:
+    mp_clear(&kphi);
+    mp_clear(&phi);
+    mp_clear(&k);
+    mp_clear(&r);
+    mp_clear(&tmp);
+    return err;
+}
+
+/*
+ * take a private key with only a few elements and fill out the missing pieces.
+ *
+ * All the entries will be overwritten with data allocated out of the arena
+ * If no arena is supplied, one will be created.
+ *
+ * The following fields must be supplied in order for this function
+ * to succeed:
+ *   one of either publicExponent or privateExponent
+ *   two more of the following 5 parameters.
+ *      modulus (n)
+ *      prime1  (p)
+ *      prime2  (q)
+ *      publicExponent (e)
+ *      privateExponent (d)
+ *
+ * NOTE: if only the publicExponent, privateExponent, and one prime is given,
+ * then there may be more than one RSA key that matches that combination.
+ *
+ * All parameters will be replaced in the key structure with new parameters
+ * Allocated out of the arena. There is no attempt to free the old structures.
+ * Prime1 will always be greater than prime2 (even if the caller supplies the
+ * smaller prime as prime1 or the larger prime as prime2). The parameters are
+ * not overwritten on failure.
+ *
+ *  How it works:
+ *     We can generate all the parameters from one of the exponents, plus the
+ *        two primes. (rsa_build_key_from_primes)
+ *     If we are given one of the exponents and both primes, we are done.
+ *     If we are given one of the exponents, the modulus and one prime, we
+ *        caclulate the second prime by dividing the modulus by the given
+ *        prime, giving us an exponent and 2 primes.
+ *     If we are given 2 exponents and one of the primes we calculate
+ *        k*phi = d*e-1, where k is an integer less than d which
+ *        divides d*e-1. We find factor k so we can isolate phi.
+ *            phi = (p-1)(q-1)
+ *        We can use phi to find the other prime as follows:
+ *        q = (phi/(p-1)) + 1. We now have 2 primes and an exponent.
+ *        (NOTE: if more then one prime meets this condition, the operation
+ *        will fail. See comments elsewhere in this file about this).
+ *        (rsa_get_prime_from_exponents)
+ *     If we are given 2 exponents and the modulus we factor the modulus to
+ *        get the 2 missing primes (rsa_factorize_n_from_exponents)
+ *
+ */
+SECStatus
+RSA_PopulatePrivateKey(RSAPrivateKey *key)
+{
+    PLArenaPool *arena = NULL;
+    PRBool needPublicExponent = PR_TRUE;
+    PRBool needPrivateExponent = PR_TRUE;
+    PRBool hasModulus = PR_FALSE;
+    unsigned int keySizeInBits = 0;
+    int prime_count = 0;
+    /* standard RSA nominclature */
+    mp_int p, q, e, d, n;
+    /* remainder */
+    mp_int r;
+    mp_err err = 0;
+    SECStatus rv = SECFailure;
+
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&e) = 0;
+    MP_DIGITS(&d) = 0;
+    MP_DIGITS(&n) = 0;
+    MP_DIGITS(&r) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&e));
+    CHECK_MPI_OK(mp_init(&d));
+    CHECK_MPI_OK(mp_init(&n));
+    CHECK_MPI_OK(mp_init(&r));
+
+    /* if the key didn't already have an arena, create one. */
+    if (key->arena == NULL) {
+        arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+        if (!arena) {
+            goto cleanup;
+        }
+        key->arena = arena;
+    }
+
+    /* load up the known exponents */
+    if (key->publicExponent.data) {
+        SECITEM_TO_MPINT(key->publicExponent, &e);
+        needPublicExponent = PR_FALSE;
+    }
+    if (key->privateExponent.data) {
+        SECITEM_TO_MPINT(key->privateExponent, &d);
+        needPrivateExponent = PR_FALSE;
+    }
+    if (needPrivateExponent && needPublicExponent) {
+        /* Not enough information, we need at least one exponent */
+        err = MP_BADARG;
+        goto cleanup;
+    }
+
+    /* load up the known primes. If only one prime is given, it will be
+     * assigned 'p'. Once we have both primes, well make sure p is the larger.
+     * The value prime_count tells us howe many we have acquired.
+     */
+    if (key->prime1.data) {
+        int primeLen = key->prime1.len;
+        if (key->prime1.data[0] == 0) {
+            primeLen--;
+        }
+        keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE;
+        SECITEM_TO_MPINT(key->prime1, &p);
+        prime_count++;
+    }
+    if (key->prime2.data) {
+        int primeLen = key->prime2.len;
+        if (key->prime2.data[0] == 0) {
+            primeLen--;
+        }
+        keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE;
+        SECITEM_TO_MPINT(key->prime2, prime_count ? &q : &p);
+        prime_count++;
+    }
+    /* load up the modulus */
+    if (key->modulus.data) {
+        int modLen = key->modulus.len;
+        if (key->modulus.data[0] == 0) {
+            modLen--;
+        }
+        keySizeInBits = modLen * PR_BITS_PER_BYTE;
+        SECITEM_TO_MPINT(key->modulus, &n);
+        hasModulus = PR_TRUE;
+    }
+    /* if we have the modulus and one prime, calculate the second. */
+    if ((prime_count == 1) && (hasModulus)) {
+        if (mp_div(&n, &p, &q, &r) != MP_OKAY || mp_cmp_z(&r) != 0) {
+            /* p is not a factor or n, fail */
+            err = MP_BADARG;
+            goto cleanup;
+        }
+        prime_count++;
+    }
+
+    /* If we didn't have enough primes try to calculate the primes from
+     * the exponents */
+    if (prime_count < 2) {
+        /* if we don't have at least 2 primes at this point, then we need both
+         * exponents and one prime or a modulus*/
+        if (!needPublicExponent && !needPrivateExponent &&
+            (prime_count > 0)) {
+            CHECK_MPI_OK(rsa_get_prime_from_exponents(&e, &d, &p, &q, &n,
+                                                      keySizeInBits));
+        } else if (!needPublicExponent && !needPrivateExponent && hasModulus) {
+            CHECK_MPI_OK(rsa_factorize_n_from_exponents(&e, &d, &p, &q, &n));
+        } else {
+            /* not enough given parameters to get both primes */
+            err = MP_BADARG;
+            goto cleanup;
+        }
+    }
+
+    /* Assure p > q */
+    /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any
+      * implementation optimization that requires p > q. We can remove
+      * this code in the future.
+      */
+    if (mp_cmp(&p, &q) < 0)
+        mp_exch(&p, &q);
+
+    /* we now have our 2 primes and at least one exponent, we can fill
+      * in the key */
+    rv = rsa_build_from_primes(&p, &q,
+                               &e, needPublicExponent,
+                               &d, needPrivateExponent,
+                               key, keySizeInBits);
+cleanup:
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&e);
+    mp_clear(&d);
+    mp_clear(&n);
+    mp_clear(&r);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    if (rv && arena) {
+        PORT_FreeArena(arena, PR_TRUE);
+        key->arena = NULL;
+    }
+    return rv;
+}
+
+static unsigned int
+rsa_modulusLen(SECItem *modulus)
+{
+    unsigned char byteZero = modulus->data[0];
+    unsigned int modLen = modulus->len - !byteZero;
+    return modLen;
+}
+
+/*
+** Perform a raw public-key operation
+**  Length of input and output buffers are equal to key's modulus len.
+*/
+SECStatus
+RSA_PublicKeyOp(RSAPublicKey *key,
+                unsigned char *output,
+                const unsigned char *input)
+{
+    unsigned int modLen, expLen, offset;
+    mp_int n, e, m, c;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    if (!key || !output || !input) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    MP_DIGITS(&n) = 0;
+    MP_DIGITS(&e) = 0;
+    MP_DIGITS(&m) = 0;
+    MP_DIGITS(&c) = 0;
+    CHECK_MPI_OK(mp_init(&n));
+    CHECK_MPI_OK(mp_init(&e));
+    CHECK_MPI_OK(mp_init(&m));
+    CHECK_MPI_OK(mp_init(&c));
+    modLen = rsa_modulusLen(&key->modulus);
+    expLen = rsa_modulusLen(&key->publicExponent);
+    /* 1.  Obtain public key (n, e) */
+    if (BAD_RSA_KEY_SIZE(modLen, expLen)) {
+        PORT_SetError(SEC_ERROR_INVALID_KEY);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    SECITEM_TO_MPINT(key->modulus, &n);
+    SECITEM_TO_MPINT(key->publicExponent, &e);
+    if (e.used > n.used) {
+        /* exponent should not be greater than modulus */
+        PORT_SetError(SEC_ERROR_INVALID_KEY);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    /* 2. check input out of range (needs to be in range [0..n-1]) */
+    offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */
+    if (memcmp(input, key->modulus.data + offset, modLen) >= 0) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    /* 2 bis.  Represent message as integer in range [0..n-1] */
+    CHECK_MPI_OK(mp_read_unsigned_octets(&m, input, modLen));
+/* 3.  Compute c = m**e mod n */
+#ifdef USE_MPI_EXPT_D
+    /* XXX see which is faster */
+    if (MP_USED(&e) == 1) {
+        CHECK_MPI_OK(mp_exptmod_d(&m, MP_DIGIT(&e, 0), &n, &c));
+    } else
+#endif
+        CHECK_MPI_OK(mp_exptmod(&m, &e, &n, &c));
+    /* 4.  result c is ciphertext */
+    err = mp_to_fixlen_octets(&c, output, modLen);
+    if (err >= 0)
+        err = MP_OKAY;
+cleanup:
+    mp_clear(&n);
+    mp_clear(&e);
+    mp_clear(&m);
+    mp_clear(&c);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/*
+**  RSA Private key operation (no CRT).
+*/
+static SECStatus
+rsa_PrivateKeyOpNoCRT(RSAPrivateKey *key, mp_int *m, mp_int *c, mp_int *n,
+                      unsigned int modLen)
+{
+    mp_int d;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    MP_DIGITS(&d) = 0;
+    CHECK_MPI_OK(mp_init(&d));
+    SECITEM_TO_MPINT(key->privateExponent, &d);
+    /* 1. m = c**d mod n */
+    CHECK_MPI_OK(mp_exptmod(c, &d, n, m));
+cleanup:
+    mp_clear(&d);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/*
+**  RSA Private key operation using CRT.
+*/
+static SECStatus
+rsa_PrivateKeyOpCRTNoCheck(RSAPrivateKey *key, mp_int *m, mp_int *c)
+{
+    mp_int p, q, d_p, d_q, qInv;
+    mp_int m1, m2, h, ctmp;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&d_p) = 0;
+    MP_DIGITS(&d_q) = 0;
+    MP_DIGITS(&qInv) = 0;
+    MP_DIGITS(&m1) = 0;
+    MP_DIGITS(&m2) = 0;
+    MP_DIGITS(&h) = 0;
+    MP_DIGITS(&ctmp) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&d_p));
+    CHECK_MPI_OK(mp_init(&d_q));
+    CHECK_MPI_OK(mp_init(&qInv));
+    CHECK_MPI_OK(mp_init(&m1));
+    CHECK_MPI_OK(mp_init(&m2));
+    CHECK_MPI_OK(mp_init(&h));
+    CHECK_MPI_OK(mp_init(&ctmp));
+    /* copy private key parameters into mp integers */
+    SECITEM_TO_MPINT(key->prime1, &p);         /* p */
+    SECITEM_TO_MPINT(key->prime2, &q);         /* q */
+    SECITEM_TO_MPINT(key->exponent1, &d_p);    /* d_p  = d mod (p-1) */
+    SECITEM_TO_MPINT(key->exponent2, &d_q);    /* d_q  = d mod (q-1) */
+    SECITEM_TO_MPINT(key->coefficient, &qInv); /* qInv = q**-1 mod p */
+    /* 1. m1 = c**d_p mod p */
+    CHECK_MPI_OK(mp_mod(c, &p, &ctmp));
+    CHECK_MPI_OK(mp_exptmod(&ctmp, &d_p, &p, &m1));
+    /* 2. m2 = c**d_q mod q */
+    CHECK_MPI_OK(mp_mod(c, &q, &ctmp));
+    CHECK_MPI_OK(mp_exptmod(&ctmp, &d_q, &q, &m2));
+    /* 3.  h = (m1 - m2) * qInv mod p */
+    CHECK_MPI_OK(mp_submod(&m1, &m2, &p, &h));
+    CHECK_MPI_OK(mp_mulmod(&h, &qInv, &p, &h));
+    /* 4.  m = m2 + h * q */
+    CHECK_MPI_OK(mp_mul(&h, &q, m));
+    CHECK_MPI_OK(mp_add(m, &m2, m));
+cleanup:
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&d_p);
+    mp_clear(&d_q);
+    mp_clear(&qInv);
+    mp_clear(&m1);
+    mp_clear(&m2);
+    mp_clear(&h);
+    mp_clear(&ctmp);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+/*
+** An attack against RSA CRT was described by Boneh, DeMillo, and Lipton in:
+** "On the Importance of Eliminating Errors in Cryptographic Computations",
+** http://theory.stanford.edu/~dabo/papers/faults.ps.gz
+**
+** As a defense against the attack, carry out the private key operation,
+** followed up with a public key operation to invert the result.
+** Verify that result against the input.
+*/
+static SECStatus
+rsa_PrivateKeyOpCRTCheckedPubKey(RSAPrivateKey *key, mp_int *m, mp_int *c)
+{
+    mp_int n, e, v;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    MP_DIGITS(&n) = 0;
+    MP_DIGITS(&e) = 0;
+    MP_DIGITS(&v) = 0;
+    CHECK_MPI_OK(mp_init(&n));
+    CHECK_MPI_OK(mp_init(&e));
+    CHECK_MPI_OK(mp_init(&v));
+    CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, m, c));
+    SECITEM_TO_MPINT(key->modulus, &n);
+    SECITEM_TO_MPINT(key->publicExponent, &e);
+    /* Perform a public key operation v = m ** e mod n */
+    CHECK_MPI_OK(mp_exptmod(m, &e, &n, &v));
+    if (mp_cmp(&v, c) != 0) {
+        rv = SECFailure;
+    }
+cleanup:
+    mp_clear(&n);
+    mp_clear(&e);
+    mp_clear(&v);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+static PRCallOnceType coBPInit = { 0, 0, 0 };
+static PRStatus
+init_blinding_params_list(void)
+{
+    blindingParamsList.lock = PZ_NewLock(nssILockOther);
+    if (!blindingParamsList.lock) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return PR_FAILURE;
+    }
+    blindingParamsList.cVar = PR_NewCondVar(blindingParamsList.lock);
+    if (!blindingParamsList.cVar) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return PR_FAILURE;
+    }
+    blindingParamsList.waitCount = 0;
+    PR_INIT_CLIST(&blindingParamsList.head);
+    return PR_SUCCESS;
+}
+
+static SECStatus
+generate_blinding_params(RSAPrivateKey *key, mp_int *f, mp_int *g, mp_int *n,
+                         unsigned int modLen)
+{
+    SECStatus rv = SECSuccess;
+    mp_int e, k;
+    mp_err err = MP_OKAY;
+    unsigned char *kb = NULL;
+
+    MP_DIGITS(&e) = 0;
+    MP_DIGITS(&k) = 0;
+    CHECK_MPI_OK(mp_init(&e));
+    CHECK_MPI_OK(mp_init(&k));
+    SECITEM_TO_MPINT(key->publicExponent, &e);
+    /* generate random k < n */
+    kb = PORT_Alloc(modLen);
+    if (!kb) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        goto cleanup;
+    }
+    CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen));
+    CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen));
+    /* k < n */
+    CHECK_MPI_OK(mp_mod(&k, n, &k));
+    /* f = k**e mod n */
+    CHECK_MPI_OK(mp_exptmod(&k, &e, n, f));
+    /* g = k**-1 mod n */
+    CHECK_MPI_OK(mp_invmod(&k, n, g));
+cleanup:
+    if (kb)
+        PORT_ZFree(kb, modLen);
+    mp_clear(&k);
+    mp_clear(&e);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+static SECStatus
+init_blinding_params(RSABlindingParams *rsabp, RSAPrivateKey *key,
+                     mp_int *n, unsigned int modLen)
+{
+    blindingParams *bp = rsabp->array;
+    int i = 0;
+
+    /* Initialize the list pointer for the element */
+    PR_INIT_CLIST(&rsabp->link);
+    for (i = 0; i < RSA_BLINDING_PARAMS_MAX_CACHE_SIZE; ++i, ++bp) {
+        bp->next = bp + 1;
+        MP_DIGITS(&bp->f) = 0;
+        MP_DIGITS(&bp->g) = 0;
+        bp->counter = 0;
+    }
+    /* The last bp->next value was initialized with out
+     * of rsabp->array pointer and must be set to NULL
+     */
+    rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL;
+
+    bp = rsabp->array;
+    rsabp->bp = NULL;
+    rsabp->free = bp;
+
+    /* List elements are keyed using the modulus */
+    return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus);
+}
+
+static SECStatus
+get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen,
+                    mp_int *f, mp_int *g)
+{
+    RSABlindingParams *rsabp = NULL;
+    blindingParams *bpUnlinked = NULL;
+    blindingParams *bp;
+    PRCList *el;
+    SECStatus rv = SECSuccess;
+    mp_err err = MP_OKAY;
+    int cmp = -1;
+    PRBool holdingLock = PR_FALSE;
+
+    do {
+        if (blindingParamsList.lock == NULL) {
+            PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+            return SECFailure;
+        }
+        /* Acquire the list lock */
+        PZ_Lock(blindingParamsList.lock);
+        holdingLock = PR_TRUE;
+
+        /* Walk the list looking for the private key */
+        for (el = PR_NEXT_LINK(&blindingParamsList.head);
+             el != &blindingParamsList.head;
+             el = PR_NEXT_LINK(el)) {
+            rsabp = (RSABlindingParams *)el;
+            cmp = SECITEM_CompareItem(&rsabp->modulus, &key->modulus);
+            if (cmp >= 0) {
+                /* The key is found or not in the list. */
+                break;
+            }
+        }
+
+        if (cmp) {
+            /* At this point, the key is not in the list.  el should point to
+            ** the list element before which this key should be inserted.
+            */
+            rsabp = PORT_ZNew(RSABlindingParams);
+            if (!rsabp) {
+                PORT_SetError(SEC_ERROR_NO_MEMORY);
+                goto cleanup;
+            }
+
+            rv = init_blinding_params(rsabp, key, n, modLen);
+            if (rv != SECSuccess) {
+                PORT_ZFree(rsabp, sizeof(RSABlindingParams));
+                goto cleanup;
+            }
+
+            /* Insert the new element into the list
+            ** If inserting in the middle of the list, el points to the link
+            ** to insert before.  Otherwise, the link needs to be appended to
+            ** the end of the list, which is the same as inserting before the
+            ** head (since el would have looped back to the head).
+            */
+            PR_INSERT_BEFORE(&rsabp->link, el);
+        }
+
+        /* We've found (or created) the RSAblindingParams struct for this key.
+         * Now, search its list of ready blinding params for a usable one.
+         */
+        while (0 != (bp = rsabp->bp)) {
+            if (--(bp->counter) > 0) {
+                /* Found a match and there are still remaining uses left */
+                /* Return the parameters */
+                CHECK_MPI_OK(mp_copy(&bp->f, f));
+                CHECK_MPI_OK(mp_copy(&bp->g, g));
+
+                PZ_Unlock(blindingParamsList.lock);
+                return SECSuccess;
+            }
+            /* exhausted this one, give its values to caller, and
+             * then retire it.
+             */
+            mp_exch(&bp->f, f);
+            mp_exch(&bp->g, g);
+            mp_clear(&bp->f);
+            mp_clear(&bp->g);
+            bp->counter = 0;
+            /* Move to free list */
+            rsabp->bp = bp->next;
+            bp->next = rsabp->free;
+            rsabp->free = bp;
+            /* In case there're threads waiting for new blinding
+             * value - notify 1 thread the value is ready
+             */
+            if (blindingParamsList.waitCount > 0) {
+                PR_NotifyCondVar(blindingParamsList.cVar);
+                blindingParamsList.waitCount--;
+            }
+            PZ_Unlock(blindingParamsList.lock);
+            return SECSuccess;
+        }
+        /* We did not find a usable set of blinding params.  Can we make one? */
+        /* Find a free bp struct. */
+        if ((bp = rsabp->free) != NULL) {
+            /* unlink this bp */
+            rsabp->free = bp->next;
+            bp->next = NULL;
+            bpUnlinked = bp; /* In case we fail */
+
+            PZ_Unlock(blindingParamsList.lock);
+            holdingLock = PR_FALSE;
+            /* generate blinding parameter values for the current thread */
+            CHECK_SEC_OK(generate_blinding_params(key, f, g, n, modLen));
+
+            /* put the blinding parameter values into cache */
+            CHECK_MPI_OK(mp_init(&bp->f));
+            CHECK_MPI_OK(mp_init(&bp->g));
+            CHECK_MPI_OK(mp_copy(f, &bp->f));
+            CHECK_MPI_OK(mp_copy(g, &bp->g));
+
+            /* Put this at head of queue of usable params. */
+            PZ_Lock(blindingParamsList.lock);
+            holdingLock = PR_TRUE;
+            (void)holdingLock;
+            /* initialize RSABlindingParamsStr */
+            bp->counter = RSA_BLINDING_PARAMS_MAX_REUSE;
+            bp->next = rsabp->bp;
+            rsabp->bp = bp;
+            bpUnlinked = NULL;
+            /* In case there're threads waiting for new blinding value
+             * just notify them the value is ready
+             */
+            if (blindingParamsList.waitCount > 0) {
+                PR_NotifyAllCondVar(blindingParamsList.cVar);
+                blindingParamsList.waitCount = 0;
+            }
+            PZ_Unlock(blindingParamsList.lock);
+            return SECSuccess;
+        }
+        /* Here, there are no usable blinding parameters available,
+         * and no free bp blocks, presumably because they're all
+         * actively having parameters generated for them.
+         * So, we need to wait here and not eat up CPU until some
+         * change happens.
+         */
+        blindingParamsList.waitCount++;
+        PR_WaitCondVar(blindingParamsList.cVar, PR_INTERVAL_NO_TIMEOUT);
+        PZ_Unlock(blindingParamsList.lock);
+        holdingLock = PR_FALSE;
+        (void)holdingLock;
+    } while (1);
+
+cleanup:
+    /* It is possible to reach this after the lock is already released.  */
+    if (bpUnlinked) {
+        if (!holdingLock) {
+            PZ_Lock(blindingParamsList.lock);
+            holdingLock = PR_TRUE;
+        }
+        bp = bpUnlinked;
+        mp_clear(&bp->f);
+        mp_clear(&bp->g);
+        bp->counter = 0;
+        /* Must put the unlinked bp back on the free list */
+        bp->next = rsabp->free;
+        rsabp->free = bp;
+    }
+    if (holdingLock) {
+        PZ_Unlock(blindingParamsList.lock);
+    }
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+    }
+    return SECFailure;
+}
+
+/*
+** Perform a raw private-key operation
+**  Length of input and output buffers are equal to key's modulus len.
+*/
+static SECStatus
+rsa_PrivateKeyOp(RSAPrivateKey *key,
+                 unsigned char *output,
+                 const unsigned char *input,
+                 PRBool check)
+{
+    unsigned int modLen;
+    unsigned int offset;
+    SECStatus rv = SECSuccess;
+    mp_err err;
+    mp_int n, c, m;
+    mp_int f, g;
+    if (!key || !output || !input) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    /* check input out of range (needs to be in range [0..n-1]) */
+    modLen = rsa_modulusLen(&key->modulus);
+    offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */
+    if (memcmp(input, key->modulus.data + offset, modLen) >= 0) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+    MP_DIGITS(&n) = 0;
+    MP_DIGITS(&c) = 0;
+    MP_DIGITS(&m) = 0;
+    MP_DIGITS(&f) = 0;
+    MP_DIGITS(&g) = 0;
+    CHECK_MPI_OK(mp_init(&n));
+    CHECK_MPI_OK(mp_init(&c));
+    CHECK_MPI_OK(mp_init(&m));
+    CHECK_MPI_OK(mp_init(&f));
+    CHECK_MPI_OK(mp_init(&g));
+    SECITEM_TO_MPINT(key->modulus, &n);
+    OCTETS_TO_MPINT(input, &c, modLen);
+    /* If blinding, compute pre-image of ciphertext by multiplying by
+    ** blinding factor
+    */
+    if (nssRSAUseBlinding) {
+        CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g));
+        /* c' = c*f mod n */
+        CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c));
+    }
+    /* Do the private key operation m = c**d mod n */
+    if (key->prime1.len == 0 ||
+        key->prime2.len == 0 ||
+        key->exponent1.len == 0 ||
+        key->exponent2.len == 0 ||
+        key->coefficient.len == 0) {
+        CHECK_SEC_OK(rsa_PrivateKeyOpNoCRT(key, &m, &c, &n, modLen));
+    } else if (check) {
+        CHECK_SEC_OK(rsa_PrivateKeyOpCRTCheckedPubKey(key, &m, &c));
+    } else {
+        CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c));
+    }
+    /* If blinding, compute post-image of plaintext by multiplying by
+    ** blinding factor
+    */
+    if (nssRSAUseBlinding) {
+        /* m = m'*g mod n */
+        CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m));
+    }
+    err = mp_to_fixlen_octets(&m, output, modLen);
+    if (err >= 0)
+        err = MP_OKAY;
+cleanup:
+    mp_clear(&n);
+    mp_clear(&c);
+    mp_clear(&m);
+    mp_clear(&f);
+    mp_clear(&g);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+SECStatus
+RSA_PrivateKeyOp(RSAPrivateKey *key,
+                 unsigned char *output,
+                 const unsigned char *input)
+{
+    return rsa_PrivateKeyOp(key, output, input, PR_FALSE);
+}
+
+SECStatus
+RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+                              unsigned char *output,
+                              const unsigned char *input)
+{
+    return rsa_PrivateKeyOp(key, output, input, PR_TRUE);
+}
+
+SECStatus
+RSA_PrivateKeyCheck(const RSAPrivateKey *key)
+{
+    mp_int p, q, n, psub1, qsub1, e, d, d_p, d_q, qInv, res;
+    mp_err err = MP_OKAY;
+    SECStatus rv = SECSuccess;
+    MP_DIGITS(&p) = 0;
+    MP_DIGITS(&q) = 0;
+    MP_DIGITS(&n) = 0;
+    MP_DIGITS(&psub1) = 0;
+    MP_DIGITS(&qsub1) = 0;
+    MP_DIGITS(&e) = 0;
+    MP_DIGITS(&d) = 0;
+    MP_DIGITS(&d_p) = 0;
+    MP_DIGITS(&d_q) = 0;
+    MP_DIGITS(&qInv) = 0;
+    MP_DIGITS(&res) = 0;
+    CHECK_MPI_OK(mp_init(&p));
+    CHECK_MPI_OK(mp_init(&q));
+    CHECK_MPI_OK(mp_init(&n));
+    CHECK_MPI_OK(mp_init(&psub1));
+    CHECK_MPI_OK(mp_init(&qsub1));
+    CHECK_MPI_OK(mp_init(&e));
+    CHECK_MPI_OK(mp_init(&d));
+    CHECK_MPI_OK(mp_init(&d_p));
+    CHECK_MPI_OK(mp_init(&d_q));
+    CHECK_MPI_OK(mp_init(&qInv));
+    CHECK_MPI_OK(mp_init(&res));
+
+    if (!key->modulus.data || !key->prime1.data || !key->prime2.data ||
+        !key->publicExponent.data || !key->privateExponent.data ||
+        !key->exponent1.data || !key->exponent2.data ||
+        !key->coefficient.data) {
+        /* call RSA_PopulatePrivateKey first, if the application wishes to
+         * recover these parameters */
+        err = MP_BADARG;
+        goto cleanup;
+    }
+
+    SECITEM_TO_MPINT(key->modulus, &n);
+    SECITEM_TO_MPINT(key->prime1, &p);
+    SECITEM_TO_MPINT(key->prime2, &q);
+    SECITEM_TO_MPINT(key->publicExponent, &e);
+    SECITEM_TO_MPINT(key->privateExponent, &d);
+    SECITEM_TO_MPINT(key->exponent1, &d_p);
+    SECITEM_TO_MPINT(key->exponent2, &d_q);
+    SECITEM_TO_MPINT(key->coefficient, &qInv);
+    /* p and q must be distinct. */
+    if (mp_cmp(&p, &q) == 0) {
+        rv = SECFailure;
+        goto cleanup;
+    }
+#define VERIFY_MPI_EQUAL(m1, m2) \
+    if (mp_cmp(m1, m2) != 0) {   \
+        rv = SECFailure;         \
+        goto cleanup;            \
+    }
+#define VERIFY_MPI_EQUAL_1(m)  \
+    if (mp_cmp_d(m, 1) != 0) { \
+        rv = SECFailure;       \
+        goto cleanup;          \
+    }
+    /* n == p * q */
+    CHECK_MPI_OK(mp_mul(&p, &q, &res));
+    VERIFY_MPI_EQUAL(&res, &n);
+    /* gcd(e, p-1) == 1 */
+    CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+    CHECK_MPI_OK(mp_gcd(&e, &psub1, &res));
+    VERIFY_MPI_EQUAL_1(&res);
+    /* gcd(e, q-1) == 1 */
+    CHECK_MPI_OK(mp_sub_d(&q, 1, &qsub1));
+    CHECK_MPI_OK(mp_gcd(&e, &qsub1, &res));
+    VERIFY_MPI_EQUAL_1(&res);
+    /* d*e == 1 mod p-1 */
+    CHECK_MPI_OK(mp_mulmod(&d, &e, &psub1, &res));
+    VERIFY_MPI_EQUAL_1(&res);
+    /* d*e == 1 mod q-1 */
+    CHECK_MPI_OK(mp_mulmod(&d, &e, &qsub1, &res));
+    VERIFY_MPI_EQUAL_1(&res);
+    /* d_p == d mod p-1 */
+    CHECK_MPI_OK(mp_mod(&d, &psub1, &res));
+    VERIFY_MPI_EQUAL(&res, &d_p);
+    /* d_q == d mod q-1 */
+    CHECK_MPI_OK(mp_mod(&d, &qsub1, &res));
+    VERIFY_MPI_EQUAL(&res, &d_q);
+    /* q * q**-1 == 1 mod p */
+    CHECK_MPI_OK(mp_mulmod(&q, &qInv, &p, &res));
+    VERIFY_MPI_EQUAL_1(&res);
+
+cleanup:
+    mp_clear(&n);
+    mp_clear(&p);
+    mp_clear(&q);
+    mp_clear(&psub1);
+    mp_clear(&qsub1);
+    mp_clear(&e);
+    mp_clear(&d);
+    mp_clear(&d_p);
+    mp_clear(&d_q);
+    mp_clear(&qInv);
+    mp_clear(&res);
+    if (err) {
+        MP_TO_SEC_ERROR(err);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+static SECStatus
+RSA_Init(void)
+{
+    if (PR_CallOnce(&coBPInit, init_blinding_params_list) != PR_SUCCESS) {
+        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+SECStatus
+BL_Init(void)
+{
+    return RSA_Init();
+}
+
+/* cleanup at shutdown */
+void
+RSA_Cleanup(void)
+{
+    blindingParams *bp = NULL;
+    if (!coBPInit.initialized)
+        return;
+
+    while (!PR_CLIST_IS_EMPTY(&blindingParamsList.head)) {
+        RSABlindingParams *rsabp =
+            (RSABlindingParams *)PR_LIST_HEAD(&blindingParamsList.head);
+        PR_REMOVE_LINK(&rsabp->link);
+        /* clear parameters cache */
+        while (rsabp->bp != NULL) {
+            bp = rsabp->bp;
+            rsabp->bp = rsabp->bp->next;
+            mp_clear(&bp->f);
+            mp_clear(&bp->g);
+        }
+        SECITEM_FreeItem(&rsabp->modulus, PR_FALSE);
+        PORT_Free(rsabp);
+    }
+
+    if (blindingParamsList.cVar) {
+        PR_DestroyCondVar(blindingParamsList.cVar);
+        blindingParamsList.cVar = NULL;
+    }
+
+    if (blindingParamsList.lock) {
+        SKIP_AFTER_FORK(PZ_DestroyLock(blindingParamsList.lock));
+        blindingParamsList.lock = NULL;
+    }
+
+    coBPInit.initialized = 0;
+    coBPInit.inProgress = 0;
+    coBPInit.status = 0;
+}
+
+/*
+ * need a central place for this function to free up all the memory that
+ * free_bl may have allocated along the way. Currently only RSA does this,
+ * so I've put it here for now.
+ */
+void
+BL_Cleanup(void)
+{
+    RSA_Cleanup();
+}
+
+PRBool bl_parentForkedAfterC_Initialize;
+
+/*
+ * Set fork flag so it can be tested in SKIP_AFTER_FORK on relevant platforms.
+ */
+void
+BL_SetForkState(PRBool forked)
+{
+    bl_parentForkedAfterC_Initialize = forked;
+}
diff --git a/security/nss/lib/freebl/rsapkcs.c b/security/nss/lib/freebl/rsapkcs.c
new file mode 100644
index 000000000..577fe1f61
--- /dev/null
+++ b/security/nss/lib/freebl/rsapkcs.c
@@ -0,0 +1,1385 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * RSA PKCS#1 v2.1 (RFC 3447) operations
+ */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secerr.h"
+
+#include "blapi.h"
+#include "secitem.h"
+#include "blapii.h"
+
+#define RSA_BLOCK_MIN_PAD_LEN 8
+#define RSA_BLOCK_FIRST_OCTET 0x00
+#define RSA_BLOCK_PRIVATE_PAD_OCTET 0xff
+#define RSA_BLOCK_AFTER_PAD_OCTET 0x00
+
+/*
+ * RSA block types
+ *
+ * The values of RSA_BlockPrivate and RSA_BlockPublic are fixed.
+ * The value of RSA_BlockRaw isn't fixed by definition, but we are keeping
+ * the value that NSS has been using in the past.
+ */
+typedef enum {
+    RSA_BlockPrivate = 1, /* pad for a private-key operation */
+    RSA_BlockPublic = 2,  /* pad for a public-key operation */
+    RSA_BlockRaw = 4      /* simply justify the block appropriately */
+} RSA_BlockType;
+
+/* Needed for RSA-PSS functions */
+static const unsigned char eightZeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/* Constant time comparison of a single byte.
+ * Returns 1 iff a == b, otherwise returns 0.
+ * Note: For ranges of bytes, use constantTimeCompare.
+ */
+static unsigned char
+constantTimeEQ8(unsigned char a, unsigned char b)
+{
+    unsigned char c = ~((a - b) | (b - a));
+    c >>= 7;
+    return c;
+}
+
+/* Constant time comparison of a range of bytes.
+ * Returns 1 iff len bytes of a are identical to len bytes of b, otherwise
+ * returns 0.
+ */
+static unsigned char
+constantTimeCompare(const unsigned char *a,
+                    const unsigned char *b,
+                    unsigned int len)
+{
+    unsigned char tmp = 0;
+    unsigned int i;
+    for (i = 0; i < len; ++i, ++a, ++b)
+        tmp |= *a ^ *b;
+    return constantTimeEQ8(0x00, tmp);
+}
+
+/* Constant time conditional.
+ * Returns a if c is 1, or b if c is 0. The result is undefined if c is
+ * not 0 or 1.
+ */
+static unsigned int
+constantTimeCondition(unsigned int c,
+                      unsigned int a,
+                      unsigned int b)
+{
+    return (~(c - 1) & a) | ((c - 1) & b);
+}
+
+static unsigned int
+rsa_modulusLen(SECItem *modulus)
+{
+    unsigned char byteZero = modulus->data[0];
+    unsigned int modLen = modulus->len - !byteZero;
+    return modLen;
+}
+
+/*
+ * Format one block of data for public/private key encryption using
+ * the rules defined in PKCS #1.
+ */
+static unsigned char *
+rsa_FormatOneBlock(unsigned modulusLen,
+                   RSA_BlockType blockType,
+                   SECItem *data)
+{
+    unsigned char *block;
+    unsigned char *bp;
+    int padLen;
+    int i, j;
+    SECStatus rv;
+
+    block = (unsigned char *)PORT_Alloc(modulusLen);
+    if (block == NULL)
+        return NULL;
+
+    bp = block;
+
+    /*
+     * All RSA blocks start with two octets:
+     *  0x00 || BlockType
+     */
+    *bp++ = RSA_BLOCK_FIRST_OCTET;
+    *bp++ = (unsigned char)blockType;
+
+    switch (blockType) {
+
+        /*
+       * Blocks intended for private-key operation.
+       */
+        case RSA_BlockPrivate: /* preferred method */
+            /*
+         * 0x00 || BT || Pad || 0x00 || ActualData
+         *   1      1   padLen    1      data->len
+         * Pad is either all 0x00 or all 0xff bytes, depending on blockType.
+         */
+            padLen = modulusLen - data->len - 3;
+            PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN);
+            if (padLen < RSA_BLOCK_MIN_PAD_LEN) {
+                PORT_Free(block);
+                return NULL;
+            }
+            PORT_Memset(bp, RSA_BLOCK_PRIVATE_PAD_OCTET, padLen);
+            bp += padLen;
+            *bp++ = RSA_BLOCK_AFTER_PAD_OCTET;
+            PORT_Memcpy(bp, data->data, data->len);
+            break;
+
+        /*
+         * Blocks intended for public-key operation.
+         */
+        case RSA_BlockPublic:
+            /*
+             * 0x00 || BT || Pad || 0x00 || ActualData
+             *   1      1   padLen    1      data->len
+             * Pad is all non-zero random bytes.
+             *
+             * Build the block left to right.
+             * Fill the entire block from Pad to the end with random bytes.
+             * Use the bytes after Pad as a supply of extra random bytes from
+             * which to find replacements for the zero bytes in Pad.
+             * If we need more than that, refill the bytes after Pad with
+             * new random bytes as necessary.
+             */
+            padLen = modulusLen - (data->len + 3);
+            PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN);
+            if (padLen < RSA_BLOCK_MIN_PAD_LEN) {
+                PORT_Free(block);
+                return NULL;
+            }
+            j = modulusLen - 2;
+            rv = RNG_GenerateGlobalRandomBytes(bp, j);
+            if (rv == SECSuccess) {
+                for (i = 0; i < padLen;) {
+                    unsigned char repl;
+                    /* Pad with non-zero random data. */
+                    if (bp[i] != RSA_BLOCK_AFTER_PAD_OCTET) {
+                        ++i;
+                        continue;
+                    }
+                    if (j <= padLen) {
+                        rv = RNG_GenerateGlobalRandomBytes(bp + padLen,
+                                                           modulusLen - (2 + padLen));
+                        if (rv != SECSuccess)
+                            break;
+                        j = modulusLen - 2;
+                    }
+                    do {
+                        repl = bp[--j];
+                    } while (repl == RSA_BLOCK_AFTER_PAD_OCTET && j > padLen);
+                    if (repl != RSA_BLOCK_AFTER_PAD_OCTET) {
+                        bp[i++] = repl;
+                    }
+                }
+            }
+            if (rv != SECSuccess) {
+                PORT_Free(block);
+                PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+                return NULL;
+            }
+            bp += padLen;
+            *bp++ = RSA_BLOCK_AFTER_PAD_OCTET;
+            PORT_Memcpy(bp, data->data, data->len);
+            break;
+
+        default:
+            PORT_Assert(0);
+            PORT_Free(block);
+            return NULL;
+    }
+
+    return block;
+}
+
+static SECStatus
+rsa_FormatBlock(SECItem *result,
+                unsigned modulusLen,
+                RSA_BlockType blockType,
+                SECItem *data)
+{
+    switch (blockType) {
+        case RSA_BlockPrivate:
+        case RSA_BlockPublic:
+            /*
+             * 0x00 || BT || Pad || 0x00 || ActualData
+             *
+             * The "3" below is the first octet + the second octet + the 0x00
+             * octet that always comes just before the ActualData.
+             */
+            PORT_Assert(data->len <= (modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN)));
+
+            result->data = rsa_FormatOneBlock(modulusLen, blockType, data);
+            if (result->data == NULL) {
+                result->len = 0;
+                return SECFailure;
+            }
+            result->len = modulusLen;
+
+            break;
+
+        case RSA_BlockRaw:
+            /*
+             * Pad || ActualData
+             * Pad is zeros. The application is responsible for recovering
+             * the actual data.
+             */
+            if (data->len > modulusLen) {
+                return SECFailure;
+            }
+            result->data = (unsigned char *)PORT_ZAlloc(modulusLen);
+            result->len = modulusLen;
+            PORT_Memcpy(result->data + (modulusLen - data->len),
+                        data->data, data->len);
+            break;
+
+        default:
+            PORT_Assert(0);
+            result->data = NULL;
+            result->len = 0;
+            return SECFailure;
+    }
+
+    return SECSuccess;
+}
+
+/*
+ * Mask generation function MGF1 as defined in PKCS #1 v2.1 / RFC 3447.
+ */
+static SECStatus
+MGF1(HASH_HashType hashAlg,
+     unsigned char *mask,
+     unsigned int maskLen,
+     const unsigned char *mgfSeed,
+     unsigned int mgfSeedLen)
+{
+    unsigned int digestLen;
+    PRUint32 counter;
+    PRUint32 rounds;
+    unsigned char *tempHash;
+    unsigned char *temp;
+    const SECHashObject *hash;
+    void *hashContext;
+    unsigned char C[4];
+
+    hash = HASH_GetRawHashObject(hashAlg);
+    if (hash == NULL)
+        return SECFailure;
+
+    hashContext = (*hash->create)();
+    rounds = (maskLen + hash->length - 1) / hash->length;
+    for (counter = 0; counter < rounds; counter++) {
+        C[0] = (unsigned char)((counter >> 24) & 0xff);
+        C[1] = (unsigned char)((counter >> 16) & 0xff);
+        C[2] = (unsigned char)((counter >> 8) & 0xff);
+        C[3] = (unsigned char)(counter & 0xff);
+
+        /* This could be optimized when the clone functions in
+         * rawhash.c are implemented. */
+        (*hash->begin)(hashContext);
+        (*hash->update)(hashContext, mgfSeed, mgfSeedLen);
+        (*hash->update)(hashContext, C, sizeof C);
+
+        tempHash = mask + counter * hash->length;
+        if (counter != (rounds - 1)) {
+            (*hash->end)(hashContext, tempHash, &digestLen, hash->length);
+        } else { /* we're in the last round and need to cut the hash */
+            temp = (unsigned char *)PORT_Alloc(hash->length);
+            (*hash->end)(hashContext, temp, &digestLen, hash->length);
+            PORT_Memcpy(tempHash, temp, maskLen - counter * hash->length);
+            PORT_Free(temp);
+        }
+    }
+    (*hash->destroy)(hashContext, PR_TRUE);
+
+    return SECSuccess;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+            unsigned char *output,
+            unsigned int *outputLen,
+            unsigned int maxOutputLen,
+            const unsigned char *data,
+            unsigned int dataLen)
+{
+    SECStatus rv = SECSuccess;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    SECItem formatted;
+    SECItem unformatted;
+
+    if (maxOutputLen < modulusLen)
+        return SECFailure;
+
+    unformatted.len = dataLen;
+    unformatted.data = (unsigned char *)data;
+    formatted.data = NULL;
+    rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted);
+    if (rv != SECSuccess)
+        goto done;
+
+    rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data);
+    *outputLen = modulusLen;
+
+done:
+    if (formatted.data != NULL)
+        PORT_ZFree(formatted.data, modulusLen);
+    return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+                 const unsigned char *sig,
+                 unsigned int sigLen,
+                 const unsigned char *hash,
+                 unsigned int hashLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    unsigned char *buffer;
+
+    if (sigLen != modulusLen)
+        goto failure;
+    if (hashLen > modulusLen)
+        goto failure;
+
+    buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+    if (!buffer)
+        goto failure;
+
+    rv = RSA_PublicKeyOp(key, buffer, sig);
+    if (rv != SECSuccess)
+        goto loser;
+
+    /*
+     * make sure we get the same results
+     */
+    /* XXX(rsleevi): Constant time */
+    /* NOTE: should we verify the leading zeros? */
+    if (PORT_Memcmp(buffer + (modulusLen - hashLen), hash, hashLen) != 0)
+        goto loser;
+
+    PORT_Free(buffer);
+    return SECSuccess;
+
+loser:
+    PORT_Free(buffer);
+failure:
+    return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+                        unsigned char *data,
+                        unsigned int *dataLen,
+                        unsigned int maxDataLen,
+                        const unsigned char *sig,
+                        unsigned int sigLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+
+    if (sigLen != modulusLen)
+        goto failure;
+    if (maxDataLen < modulusLen)
+        goto failure;
+
+    rv = RSA_PublicKeyOp(key, data, sig);
+    if (rv != SECSuccess)
+        goto failure;
+
+    *dataLen = modulusLen;
+    return SECSuccess;
+
+failure:
+    return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+               unsigned char *output,
+               unsigned int *outputLen,
+               unsigned int maxOutputLen,
+               const unsigned char *input,
+               unsigned int inputLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    SECItem formatted;
+    SECItem unformatted;
+
+    formatted.data = NULL;
+    if (maxOutputLen < modulusLen)
+        goto failure;
+
+    unformatted.len = inputLen;
+    unformatted.data = (unsigned char *)input;
+    formatted.data = NULL;
+    rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted);
+    if (rv != SECSuccess)
+        goto failure;
+
+    rv = RSA_PublicKeyOp(key, output, formatted.data);
+    if (rv != SECSuccess)
+        goto failure;
+
+    PORT_ZFree(formatted.data, modulusLen);
+    *outputLen = modulusLen;
+    return SECSuccess;
+
+failure:
+    if (formatted.data != NULL)
+        PORT_ZFree(formatted.data, modulusLen);
+    return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+               unsigned char *output,
+               unsigned int *outputLen,
+               unsigned int maxOutputLen,
+               const unsigned char *input,
+               unsigned int inputLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+
+    if (modulusLen > maxOutputLen)
+        goto failure;
+    if (inputLen != modulusLen)
+        goto failure;
+
+    rv = RSA_PrivateKeyOp(key, output, input);
+    if (rv != SECSuccess)
+        goto failure;
+
+    *outputLen = modulusLen;
+    return SECSuccess;
+
+failure:
+    return SECFailure;
+}
+
+/*
+ * Decodes an EME-OAEP encoded block, validating the encoding in constant
+ * time.
+ * Described in RFC 3447, section 7.1.2.
+ * input contains the encoded block, after decryption.
+ * label is the optional value L that was associated with the message.
+ * On success, the original message and message length will be stored in
+ * output and outputLen.
+ */
+static SECStatus
+eme_oaep_decode(unsigned char *output,
+                unsigned int *outputLen,
+                unsigned int maxOutputLen,
+                const unsigned char *input,
+                unsigned int inputLen,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *label,
+                unsigned int labelLen)
+{
+    const SECHashObject *hash;
+    void *hashContext;
+    SECStatus rv = SECFailure;
+    unsigned char labelHash[HASH_LENGTH_MAX];
+    unsigned int i;
+    unsigned int maskLen;
+    unsigned int paddingOffset;
+    unsigned char *mask = NULL;
+    unsigned char *tmpOutput = NULL;
+    unsigned char isGood;
+    unsigned char foundPaddingEnd;
+
+    hash = HASH_GetRawHashObject(hashAlg);
+
+    /* 1.c */
+    if (inputLen < (hash->length * 2) + 2) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+
+    /* Step 3.a - Generate lHash */
+    hashContext = (*hash->create)();
+    if (hashContext == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    (*hash->begin)(hashContext);
+    if (labelLen > 0)
+        (*hash->update)(hashContext, label, labelLen);
+    (*hash->end)(hashContext, labelHash, &i, sizeof(labelHash));
+    (*hash->destroy)(hashContext, PR_TRUE);
+
+    tmpOutput = (unsigned char *)PORT_Alloc(inputLen);
+    if (tmpOutput == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        goto done;
+    }
+
+    maskLen = inputLen - hash->length - 1;
+    mask = (unsigned char *)PORT_Alloc(maskLen);
+    if (mask == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        goto done;
+    }
+
+    PORT_Memcpy(tmpOutput, input, inputLen);
+
+    /* 3.c - Generate seedMask */
+    MGF1(maskHashAlg, mask, hash->length, &tmpOutput[1 + hash->length],
+         inputLen - hash->length - 1);
+    /* 3.d - Unmask seed */
+    for (i = 0; i < hash->length; ++i)
+        tmpOutput[1 + i] ^= mask[i];
+
+    /* 3.e - Generate dbMask */
+    MGF1(maskHashAlg, mask, maskLen, &tmpOutput[1], hash->length);
+    /* 3.f - Unmask DB */
+    for (i = 0; i < maskLen; ++i)
+        tmpOutput[1 + hash->length + i] ^= mask[i];
+
+    /* 3.g - Compare Y, lHash, and PS in constant time
+     * Warning: This code is timing dependent and must not disclose which of
+     * these were invalid.
+     */
+    paddingOffset = 0;
+    isGood = 1;
+    foundPaddingEnd = 0;
+
+    /* Compare Y */
+    isGood &= constantTimeEQ8(0x00, tmpOutput[0]);
+
+    /* Compare lHash and lHash' */
+    isGood &= constantTimeCompare(&labelHash[0],
+                                  &tmpOutput[1 + hash->length],
+                                  hash->length);
+
+    /* Compare that the padding is zero or more zero octets, followed by a
+     * 0x01 octet */
+    for (i = 1 + (hash->length * 2); i < inputLen; ++i) {
+        unsigned char isZero = constantTimeEQ8(0x00, tmpOutput[i]);
+        unsigned char isOne = constantTimeEQ8(0x01, tmpOutput[i]);
+        /* non-constant time equivalent:
+         * if (tmpOutput[i] == 0x01 && !foundPaddingEnd)
+         *     paddingOffset = i;
+         */
+        paddingOffset = constantTimeCondition(isOne & ~foundPaddingEnd, i,
+                                              paddingOffset);
+        /* non-constant time equivalent:
+         * if (tmpOutput[i] == 0x01)
+         *    foundPaddingEnd = true;
+         *
+         * Note: This may yield false positives, as it will be set whenever
+         * a 0x01 byte is encountered. If there was bad padding (eg:
+         * 0x03 0x02 0x01), foundPaddingEnd will still be set to true, and
+         * paddingOffset will still be set to 2.
+         */
+        foundPaddingEnd = constantTimeCondition(isOne, 1, foundPaddingEnd);
+        /* non-constant time equivalent:
+         * if (tmpOutput[i] != 0x00 && tmpOutput[i] != 0x01 &&
+         *     !foundPaddingEnd) {
+         *    isGood = false;
+         * }
+         *
+         * Note: This may yield false positives, as a message (and padding)
+         * that is entirely zeros will result in isGood still being true. Thus
+         * it's necessary to check foundPaddingEnd is positive below.
+         */
+        isGood = constantTimeCondition(~foundPaddingEnd & ~isZero, 0, isGood);
+    }
+
+    /* While both isGood and foundPaddingEnd may have false positives, they
+     * cannot BOTH have false positives. If both are not true, then an invalid
+     * message was received. Note, this comparison must still be done in constant
+     * time so as not to leak either condition.
+     */
+    if (!(isGood & foundPaddingEnd)) {
+        PORT_SetError(SEC_ERROR_BAD_DATA);
+        goto done;
+    }
+
+    /* End timing dependent code */
+
+    ++paddingOffset; /* Skip the 0x01 following the end of PS */
+
+    *outputLen = inputLen - paddingOffset;
+    if (*outputLen > maxOutputLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        goto done;
+    }
+
+    if (*outputLen)
+        PORT_Memcpy(output, &tmpOutput[paddingOffset], *outputLen);
+    rv = SECSuccess;
+
+done:
+    if (mask)
+        PORT_ZFree(mask, maskLen);
+    if (tmpOutput)
+        PORT_ZFree(tmpOutput, inputLen);
+    return rv;
+}
+
+/*
+ * Generate an EME-OAEP encoded block for encryption
+ * Described in RFC 3447, section 7.1.1
+ * We use input instead of M for the message to be encrypted
+ * label is the optional value L to be associated with the message.
+ */
+static SECStatus
+eme_oaep_encode(unsigned char *em,
+                unsigned int emLen,
+                const unsigned char *input,
+                unsigned int inputLen,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *label,
+                unsigned int labelLen,
+                const unsigned char *seed,
+                unsigned int seedLen)
+{
+    const SECHashObject *hash;
+    void *hashContext;
+    SECStatus rv;
+    unsigned char *mask;
+    unsigned int reservedLen;
+    unsigned int dbMaskLen;
+    unsigned int i;
+
+    hash = HASH_GetRawHashObject(hashAlg);
+    PORT_Assert(seed == NULL || seedLen == hash->length);
+
+    /* Step 1.b */
+    reservedLen = (2 * hash->length) + 2;
+    if (emLen < reservedLen || inputLen > (emLen - reservedLen)) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+
+    /*
+     * From RFC 3447, Section 7.1
+     *                      +----------+---------+-------+
+     *                 DB = |  lHash   |    PS   |   M   |
+     *                      +----------+---------+-------+
+     *                                     |
+     *           +----------+              V
+     *           |   seed   |--> MGF ---> xor
+     *           +----------+              |
+     *                 |                   |
+     *        +--+     V                   |
+     *        |00|    xor <----- MGF <-----|
+     *        +--+     |                   |
+     *          |      |                   |
+     *          V      V                   V
+     *        +--+----------+----------------------------+
+     *  EM =  |00|maskedSeed|          maskedDB          |
+     *        +--+----------+----------------------------+
+     *
+     * We use mask to hold the result of the MGF functions, and all other
+     * values are generated in their final resting place.
+     */
+    *em = 0x00;
+
+    /* Step 2.a - Generate lHash */
+    hashContext = (*hash->create)();
+    if (hashContext == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    (*hash->begin)(hashContext);
+    if (labelLen > 0)
+        (*hash->update)(hashContext, label, labelLen);
+    (*hash->end)(hashContext, &em[1 + hash->length], &i, hash->length);
+    (*hash->destroy)(hashContext, PR_TRUE);
+
+    /* Step 2.b - Generate PS */
+    if (emLen - reservedLen - inputLen > 0) {
+        PORT_Memset(em + 1 + (hash->length * 2), 0x00,
+                    emLen - reservedLen - inputLen);
+    }
+
+    /* Step 2.c. - Generate DB
+     * DB = lHash || PS || 0x01 || M
+     * Note that PS and lHash have already been placed into em at their
+     * appropriate offsets. This just copies M into place
+     */
+    em[emLen - inputLen - 1] = 0x01;
+    if (inputLen)
+        PORT_Memcpy(em + emLen - inputLen, input, inputLen);
+
+    if (seed == NULL) {
+        /* Step 2.d - Generate seed */
+        rv = RNG_GenerateGlobalRandomBytes(em + 1, hash->length);
+        if (rv != SECSuccess) {
+            return rv;
+        }
+    } else {
+        /* For Known Answer Tests, copy the supplied seed. */
+        PORT_Memcpy(em + 1, seed, seedLen);
+    }
+
+    /* Step 2.e - Generate dbMask*/
+    dbMaskLen = emLen - hash->length - 1;
+    mask = (unsigned char *)PORT_Alloc(dbMaskLen);
+    if (mask == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    MGF1(maskHashAlg, mask, dbMaskLen, em + 1, hash->length);
+    /* Step 2.f - Compute maskedDB*/
+    for (i = 0; i < dbMaskLen; ++i)
+        em[1 + hash->length + i] ^= mask[i];
+
+    /* Step 2.g - Generate seedMask */
+    MGF1(maskHashAlg, mask, hash->length, &em[1 + hash->length], dbMaskLen);
+    /* Step 2.h - Compute maskedSeed */
+    for (i = 0; i < hash->length; ++i)
+        em[1 + i] ^= mask[i];
+
+    PORT_ZFree(mask, dbMaskLen);
+    return SECSuccess;
+}
+
+SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *label,
+                unsigned int labelLen,
+                const unsigned char *seed,
+                unsigned int seedLen,
+                unsigned char *output,
+                unsigned int *outputLen,
+                unsigned int maxOutputLen,
+                const unsigned char *input,
+                unsigned int inputLen)
+{
+    SECStatus rv = SECFailure;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    unsigned char *oaepEncoded = NULL;
+
+    if (maxOutputLen < modulusLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+        PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+        return SECFailure;
+    }
+
+    if ((labelLen == 0 && label != NULL) ||
+        (labelLen > 0 && label == NULL)) {
+        PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+        return SECFailure;
+    }
+
+    oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen);
+    if (oaepEncoded == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    rv = eme_oaep_encode(oaepEncoded, modulusLen, input, inputLen,
+                         hashAlg, maskHashAlg, label, labelLen, seed, seedLen);
+    if (rv != SECSuccess)
+        goto done;
+
+    rv = RSA_PublicKeyOp(key, output, oaepEncoded);
+    if (rv != SECSuccess)
+        goto done;
+    *outputLen = modulusLen;
+
+done:
+    PORT_Free(oaepEncoded);
+    return rv;
+}
+
+SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *label,
+                unsigned int labelLen,
+                unsigned char *output,
+                unsigned int *outputLen,
+                unsigned int maxOutputLen,
+                const unsigned char *input,
+                unsigned int inputLen)
+{
+    SECStatus rv = SECFailure;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    unsigned char *oaepEncoded = NULL;
+
+    if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+        PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+        return SECFailure;
+    }
+
+    if (inputLen != modulusLen) {
+        PORT_SetError(SEC_ERROR_INPUT_LEN);
+        return SECFailure;
+    }
+
+    if ((labelLen == 0 && label != NULL) ||
+        (labelLen > 0 && label == NULL)) {
+        PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+        return SECFailure;
+    }
+
+    oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen);
+    if (oaepEncoded == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+
+    rv = RSA_PrivateKeyOpDoubleChecked(key, oaepEncoded, input);
+    if (rv != SECSuccess) {
+        goto done;
+    }
+    rv = eme_oaep_decode(output, outputLen, maxOutputLen, oaepEncoded,
+                         modulusLen, hashAlg, maskHashAlg, label,
+                         labelLen);
+
+done:
+    if (oaepEncoded)
+        PORT_ZFree(oaepEncoded, modulusLen);
+    return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+                 unsigned char *output,
+                 unsigned int *outputLen,
+                 unsigned int maxOutputLen,
+                 const unsigned char *input,
+                 unsigned int inputLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    SECItem formatted;
+    SECItem unformatted;
+
+    formatted.data = NULL;
+    if (maxOutputLen < modulusLen)
+        goto failure;
+
+    unformatted.len = inputLen;
+    unformatted.data = (unsigned char *)input;
+    formatted.data = NULL;
+    rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPublic,
+                         &unformatted);
+    if (rv != SECSuccess)
+        goto failure;
+
+    rv = RSA_PublicKeyOp(key, output, formatted.data);
+    if (rv != SECSuccess)
+        goto failure;
+
+    PORT_ZFree(formatted.data, modulusLen);
+    *outputLen = modulusLen;
+    return SECSuccess;
+
+failure:
+    if (formatted.data != NULL)
+        PORT_ZFree(formatted.data, modulusLen);
+    return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+                 unsigned char *output,
+                 unsigned int *outputLen,
+                 unsigned int maxOutputLen,
+                 const unsigned char *input,
+                 unsigned int inputLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    unsigned int i;
+    unsigned char *buffer;
+
+    if (inputLen != modulusLen)
+        goto failure;
+
+    buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+    if (!buffer)
+        goto failure;
+
+    rv = RSA_PrivateKeyOp(key, buffer, input);
+    if (rv != SECSuccess)
+        goto loser;
+
+    /* XXX(rsleevi): Constant time */
+    if (buffer[0] != RSA_BLOCK_FIRST_OCTET ||
+        buffer[1] != (unsigned char)RSA_BlockPublic) {
+        goto loser;
+    }
+    *outputLen = 0;
+    for (i = 2; i < modulusLen; i++) {
+        if (buffer[i] == RSA_BLOCK_AFTER_PAD_OCTET) {
+            *outputLen = modulusLen - i - 1;
+            break;
+        }
+    }
+    if (*outputLen == 0)
+        goto loser;
+    if (*outputLen > maxOutputLen)
+        goto loser;
+
+    PORT_Memcpy(output, buffer + modulusLen - *outputLen, *outputLen);
+
+    PORT_Free(buffer);
+    return SECSuccess;
+
+loser:
+    PORT_Free(buffer);
+failure:
+    return SECFailure;
+}
+
+/*
+ * Encode a RSA-PSS signature.
+ * Described in RFC 3447, section 9.1.1.
+ * We use mHash instead of M as input.
+ * emBits from the RFC is just modBits - 1, see section 8.1.1.
+ * We only support MGF1 as the MGF.
+ *
+ * NOTE: this code assumes modBits is a multiple of 8.
+ */
+static SECStatus
+emsa_pss_encode(unsigned char *em,
+                unsigned int emLen,
+                const unsigned char *mHash,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                const unsigned char *salt,
+                unsigned int saltLen)
+{
+    const SECHashObject *hash;
+    void *hash_context;
+    unsigned char *dbMask;
+    unsigned int dbMaskLen;
+    unsigned int i;
+    SECStatus rv;
+
+    hash = HASH_GetRawHashObject(hashAlg);
+    dbMaskLen = emLen - hash->length - 1;
+
+    /* Step 3 */
+    if (emLen < hash->length + saltLen + 2) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    /* Step 4 */
+    if (salt == NULL) {
+        rv = RNG_GenerateGlobalRandomBytes(&em[dbMaskLen - saltLen], saltLen);
+        if (rv != SECSuccess) {
+            return rv;
+        }
+    } else {
+        PORT_Memcpy(&em[dbMaskLen - saltLen], salt, saltLen);
+    }
+
+    /* Step 5 + 6 */
+    /* Compute H and store it at its final location &em[dbMaskLen]. */
+    hash_context = (*hash->create)();
+    if (hash_context == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    (*hash->begin)(hash_context);
+    (*hash->update)(hash_context, eightZeros, 8);
+    (*hash->update)(hash_context, mHash, hash->length);
+    (*hash->update)(hash_context, &em[dbMaskLen - saltLen], saltLen);
+    (*hash->end)(hash_context, &em[dbMaskLen], &i, hash->length);
+    (*hash->destroy)(hash_context, PR_TRUE);
+
+    /* Step 7 + 8 */
+    PORT_Memset(em, 0, dbMaskLen - saltLen - 1);
+    em[dbMaskLen - saltLen - 1] = 0x01;
+
+    /* Step 9 */
+    dbMask = (unsigned char *)PORT_Alloc(dbMaskLen);
+    if (dbMask == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    MGF1(maskHashAlg, dbMask, dbMaskLen, &em[dbMaskLen], hash->length);
+
+    /* Step 10 */
+    for (i = 0; i < dbMaskLen; i++)
+        em[i] ^= dbMask[i];
+    PORT_Free(dbMask);
+
+    /* Step 11 */
+    em[0] &= 0x7f;
+
+    /* Step 12 */
+    em[emLen - 1] = 0xbc;
+
+    return SECSuccess;
+}
+
+/*
+ * Verify a RSA-PSS signature.
+ * Described in RFC 3447, section 9.1.2.
+ * We use mHash instead of M as input.
+ * emBits from the RFC is just modBits - 1, see section 8.1.2.
+ * We only support MGF1 as the MGF.
+ *
+ * NOTE: this code assumes modBits is a multiple of 8.
+ */
+static SECStatus
+emsa_pss_verify(const unsigned char *mHash,
+                const unsigned char *em,
+                unsigned int emLen,
+                HASH_HashType hashAlg,
+                HASH_HashType maskHashAlg,
+                unsigned int saltLen)
+{
+    const SECHashObject *hash;
+    void *hash_context;
+    unsigned char *db;
+    unsigned char *H_; /* H' from the RFC */
+    unsigned int i;
+    unsigned int dbMaskLen;
+    SECStatus rv;
+
+    hash = HASH_GetRawHashObject(hashAlg);
+    dbMaskLen = emLen - hash->length - 1;
+
+    /* Step 3 + 4 + 6 */
+    if ((emLen < (hash->length + saltLen + 2)) ||
+        (em[emLen - 1] != 0xbc) ||
+        ((em[0] & 0x80) != 0)) {
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        return SECFailure;
+    }
+
+    /* Step 7 */
+    db = (unsigned char *)PORT_Alloc(dbMaskLen);
+    if (db == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    /* &em[dbMaskLen] points to H, used as mgfSeed */
+    MGF1(maskHashAlg, db, dbMaskLen, &em[dbMaskLen], hash->length);
+
+    /* Step 8 */
+    for (i = 0; i < dbMaskLen; i++) {
+        db[i] ^= em[i];
+    }
+
+    /* Step 9 */
+    db[0] &= 0x7f;
+
+    /* Step 10 */
+    for (i = 0; i < (dbMaskLen - saltLen - 1); i++) {
+        if (db[i] != 0) {
+            PORT_Free(db);
+            PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+            return SECFailure;
+        }
+    }
+    if (db[dbMaskLen - saltLen - 1] != 0x01) {
+        PORT_Free(db);
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        return SECFailure;
+    }
+
+    /* Step 12 + 13 */
+    H_ = (unsigned char *)PORT_Alloc(hash->length);
+    if (H_ == NULL) {
+        PORT_Free(db);
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    hash_context = (*hash->create)();
+    if (hash_context == NULL) {
+        PORT_Free(db);
+        PORT_Free(H_);
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    (*hash->begin)(hash_context);
+    (*hash->update)(hash_context, eightZeros, 8);
+    (*hash->update)(hash_context, mHash, hash->length);
+    (*hash->update)(hash_context, &db[dbMaskLen - saltLen], saltLen);
+    (*hash->end)(hash_context, H_, &i, hash->length);
+    (*hash->destroy)(hash_context, PR_TRUE);
+
+    PORT_Free(db);
+
+    /* Step 14 */
+    if (PORT_Memcmp(H_, &em[dbMaskLen], hash->length) != 0) {
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        rv = SECFailure;
+    } else {
+        rv = SECSuccess;
+    }
+
+    PORT_Free(H_);
+    return rv;
+}
+
+SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+            HASH_HashType hashAlg,
+            HASH_HashType maskHashAlg,
+            const unsigned char *salt,
+            unsigned int saltLength,
+            unsigned char *output,
+            unsigned int *outputLen,
+            unsigned int maxOutputLen,
+            const unsigned char *input,
+            unsigned int inputLen)
+{
+    SECStatus rv = SECSuccess;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    unsigned char *pssEncoded = NULL;
+
+    if (maxOutputLen < modulusLen) {
+        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+        return SECFailure;
+    }
+
+    if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+        PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+        return SECFailure;
+    }
+
+    pssEncoded = (unsigned char *)PORT_Alloc(modulusLen);
+    if (pssEncoded == NULL) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+    rv = emsa_pss_encode(pssEncoded, modulusLen, input, hashAlg,
+                         maskHashAlg, salt, saltLength);
+    if (rv != SECSuccess)
+        goto done;
+
+    rv = RSA_PrivateKeyOpDoubleChecked(key, output, pssEncoded);
+    *outputLen = modulusLen;
+
+done:
+    PORT_Free(pssEncoded);
+    return rv;
+}
+
+SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+                 HASH_HashType hashAlg,
+                 HASH_HashType maskHashAlg,
+                 unsigned int saltLength,
+                 const unsigned char *sig,
+                 unsigned int sigLen,
+                 const unsigned char *hash,
+                 unsigned int hashLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    unsigned char *buffer;
+
+    if (sigLen != modulusLen) {
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        return SECFailure;
+    }
+
+    if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+        PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+        return SECFailure;
+    }
+
+    buffer = (unsigned char *)PORT_Alloc(modulusLen);
+    if (!buffer) {
+        PORT_SetError(SEC_ERROR_NO_MEMORY);
+        return SECFailure;
+    }
+
+    rv = RSA_PublicKeyOp(key, buffer, sig);
+    if (rv != SECSuccess) {
+        PORT_Free(buffer);
+        PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+        return SECFailure;
+    }
+
+    rv = emsa_pss_verify(hash, buffer, modulusLen, hashAlg,
+                         maskHashAlg, saltLength);
+    PORT_Free(buffer);
+
+    return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_Sign(RSAPrivateKey *key,
+         unsigned char *output,
+         unsigned int *outputLen,
+         unsigned int maxOutputLen,
+         const unsigned char *input,
+         unsigned int inputLen)
+{
+    SECStatus rv = SECSuccess;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    SECItem formatted;
+    SECItem unformatted;
+
+    if (maxOutputLen < modulusLen)
+        return SECFailure;
+
+    unformatted.len = inputLen;
+    unformatted.data = (unsigned char *)input;
+    formatted.data = NULL;
+    rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPrivate,
+                         &unformatted);
+    if (rv != SECSuccess)
+        goto done;
+
+    rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data);
+    *outputLen = modulusLen;
+
+    goto done;
+
+done:
+    if (formatted.data != NULL)
+        PORT_ZFree(formatted.data, modulusLen);
+    return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+              const unsigned char *sig,
+              unsigned int sigLen,
+              const unsigned char *data,
+              unsigned int dataLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    unsigned int i;
+    unsigned char *buffer;
+
+    if (sigLen != modulusLen)
+        goto failure;
+    /*
+     * 0x00 || BT || Pad || 0x00 || ActualData
+     *
+     * The "3" below is the first octet + the second octet + the 0x00
+     * octet that always comes just before the ActualData.
+     */
+    if (dataLen > modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN))
+        goto failure;
+
+    buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+    if (!buffer)
+        goto failure;
+
+    rv = RSA_PublicKeyOp(key, buffer, sig);
+    if (rv != SECSuccess)
+        goto loser;
+
+    /*
+     * check the padding that was used
+     */
+    if (buffer[0] != RSA_BLOCK_FIRST_OCTET ||
+        buffer[1] != (unsigned char)RSA_BlockPrivate) {
+        goto loser;
+    }
+    for (i = 2; i < modulusLen - dataLen - 1; i++) {
+        if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET)
+            goto loser;
+    }
+    if (buffer[i] != RSA_BLOCK_AFTER_PAD_OCTET)
+        goto loser;
+
+    /*
+     * make sure we get the same results
+     */
+    if (PORT_Memcmp(buffer + modulusLen - dataLen, data, dataLen) != 0)
+        goto loser;
+
+    PORT_Free(buffer);
+    return SECSuccess;
+
+loser:
+    PORT_Free(buffer);
+failure:
+    return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+                     unsigned char *output,
+                     unsigned int *outputLen,
+                     unsigned int maxOutputLen,
+                     const unsigned char *sig,
+                     unsigned int sigLen)
+{
+    SECStatus rv;
+    unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+    unsigned int i;
+    unsigned char *buffer;
+
+    if (sigLen != modulusLen)
+        goto failure;
+
+    buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+    if (!buffer)
+        goto failure;
+
+    rv = RSA_PublicKeyOp(key, buffer, sig);
+    if (rv != SECSuccess)
+        goto loser;
+    *outputLen = 0;
+
+    /*
+     * check the padding that was used
+     */
+    if (buffer[0] != RSA_BLOCK_FIRST_OCTET ||
+        buffer[1] != (unsigned char)RSA_BlockPrivate) {
+        goto loser;
+    }
+    for (i = 2; i < modulusLen; i++) {
+        if (buffer[i] == RSA_BLOCK_AFTER_PAD_OCTET) {
+            *outputLen = modulusLen - i - 1;
+            break;
+        }
+        if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET)
+            goto loser;
+    }
+    if (*outputLen == 0)
+        goto loser;
+    if (*outputLen > maxOutputLen)
+        goto loser;
+
+    PORT_Memcpy(output, buffer + modulusLen - *outputLen, *outputLen);
+
+    PORT_Free(buffer);
+    return SECSuccess;
+
+loser:
+    PORT_Free(buffer);
+failure:
+    return SECFailure;
+}
diff --git a/security/nss/lib/freebl/secmpi.h b/security/nss/lib/freebl/secmpi.h
new file mode 100644
index 000000000..5e8fd1105
--- /dev/null
+++ b/security/nss/lib/freebl/secmpi.h
@@ -0,0 +1,54 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+
+#define CHECK_SEC_OK(func)         \
+    if (SECSuccess != (rv = func)) \
+    goto cleanup
+
+#define CHECK_MPI_OK(func)      \
+    if (MP_OKAY > (err = func)) \
+    goto cleanup
+
+#define OCTETS_TO_MPINT(oc, mp, len) \
+    CHECK_MPI_OK(mp_read_unsigned_octets((mp), oc, len))
+
+#define SECITEM_TO_MPINT(it, mp) \
+    CHECK_MPI_OK(mp_read_unsigned_octets((mp), (it).data, (it).len))
+
+#define MPINT_TO_SECITEM(mp, it, arena)                         \
+    do {                                                        \
+        int mpintLen = mp_unsigned_octet_size(mp);              \
+        if (mpintLen <= 0) {                                    \
+            err = MP_RANGE;                                     \
+            goto cleanup;                                       \
+        }                                                       \
+        SECITEM_AllocItem(arena, (it), mpintLen);               \
+        if ((it)->data == NULL) {                               \
+            err = MP_MEM;                                       \
+            goto cleanup;                                       \
+        }                                                       \
+        err = mp_to_unsigned_octets(mp, (it)->data, (it)->len); \
+        if (err < 0)                                            \
+            goto cleanup;                                       \
+        else                                                    \
+            err = MP_OKAY;                                      \
+    } while (0)
+
+#define MP_TO_SEC_ERROR(err)                          \
+    switch (err) {                                    \
+        case MP_MEM:                                  \
+            PORT_SetError(SEC_ERROR_NO_MEMORY);       \
+            break;                                    \
+        case MP_RANGE:                                \
+            PORT_SetError(SEC_ERROR_BAD_DATA);        \
+            break;                                    \
+        case MP_BADARG:                               \
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);    \
+            break;                                    \
+        default:                                      \
+            PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); \
+            break;                                    \
+    }
diff --git a/security/nss/lib/freebl/secrng.h b/security/nss/lib/freebl/secrng.h
new file mode 100644
index 000000000..19eae4833
--- /dev/null
+++ b/security/nss/lib/freebl/secrng.h
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SECRNG_H_
+#define _SECRNG_H_
+/*
+ * secrng.h - public data structures and prototypes for the secure random
+ *	      number generator
+ */
+
+/******************************************/
+/*
+** Random number generation. A cryptographically strong random number
+** generator.
+*/
+
+#include "blapi.h"
+
+/* the number of bytes to read from the system random number generator */
+#define SYSTEM_RNG_SEED_COUNT 1024
+
+SEC_BEGIN_PROTOS
+
+/*
+** The following functions are provided by the security library
+** but are differently implemented for the UNIX, Win, and OS/2
+** versions
+*/
+
+/*
+** Get the "noisiest" information available on the system.
+** The amount of data returned depends on the system implementation.
+** It will not exceed maxbytes, but may be (much) less.
+** Returns number of noise bytes copied into buf, or zero if error.
+*/
+extern size_t RNG_GetNoise(void *buf, size_t maxbytes);
+
+/*
+** RNG_SystemInfoForRNG should be called before any use of SSL. It
+** gathers up the system specific information to help seed the
+** state of the global random number generator.
+*/
+extern void RNG_SystemInfoForRNG(void);
+
+/*
+** Use the contents (and stat) of a file to help seed the
+** global random number generator.
+*/
+extern void RNG_FileForRNG(const char *filename);
+
+/*
+** Get maxbytes bytes of random data from the system random number
+** generator.
+** Returns the number of bytes copied into buf -- maxbytes if success
+** or zero if error.
+** Errors:
+**   PR_NOT_IMPLEMENTED_ERROR   There is no system RNG on the platform.
+**   SEC_ERROR_NEED_RANDOM      The system RNG failed.
+*/
+extern size_t RNG_SystemRNG(void *buf, size_t maxbytes);
+
+SEC_END_PROTOS
+
+#endif /* _SECRNG_H_ */
diff --git a/security/nss/lib/freebl/seed.c b/security/nss/lib/freebl/seed.c
new file mode 100644
index 000000000..f198cce45
--- /dev/null
+++ b/security/nss/lib/freebl/seed.c
@@ -0,0 +1,641 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#ifdef WIN32
+#include <memory.h>
+#endif
+
+#include "seed.h"
+#include "secerr.h"
+
+static const seed_word SS[4][256] = {
+    { 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0,
+      0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124,
+      0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c,
+      0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360,
+      0x28082028, 0x04444044, 0x20002020, 0x1d8d919c,
+      0x20c0e0e0, 0x22c2e2e0, 0x08c8c0c8, 0x17071314,
+      0x2585a1a4, 0x0f8f838c, 0x03030300, 0x3b4b7378,
+      0x3b8bb3b8, 0x13031310, 0x12c2d2d0, 0x2ecee2ec,
+      0x30407070, 0x0c8c808c, 0x3f0f333c, 0x2888a0a8,
+      0x32023230, 0x1dcdd1dc, 0x36c6f2f4, 0x34447074,
+      0x2ccce0ec, 0x15859194, 0x0b0b0308, 0x17475354,
+      0x1c4c505c, 0x1b4b5358, 0x3d8db1bc, 0x01010100,
+      0x24042024, 0x1c0c101c, 0x33437370, 0x18889098,
+      0x10001010, 0x0cccc0cc, 0x32c2f2f0, 0x19c9d1d8,
+      0x2c0c202c, 0x27c7e3e4, 0x32427270, 0x03838380,
+      0x1b8b9398, 0x11c1d1d0, 0x06868284, 0x09c9c1c8,
+      0x20406060, 0x10405050, 0x2383a3a0, 0x2bcbe3e8,
+      0x0d0d010c, 0x3686b2b4, 0x1e8e929c, 0x0f4f434c,
+      0x3787b3b4, 0x1a4a5258, 0x06c6c2c4, 0x38487078,
+      0x2686a2a4, 0x12021210, 0x2f8fa3ac, 0x15c5d1d4,
+      0x21416160, 0x03c3c3c0, 0x3484b0b4, 0x01414140,
+      0x12425250, 0x3d4d717c, 0x0d8d818c, 0x08080008,
+      0x1f0f131c, 0x19899198, 0x00000000, 0x19091118,
+      0x04040004, 0x13435350, 0x37c7f3f4, 0x21c1e1e0,
+      0x3dcdf1fc, 0x36467274, 0x2f0f232c, 0x27072324,
+      0x3080b0b0, 0x0b8b8388, 0x0e0e020c, 0x2b8ba3a8,
+      0x2282a2a0, 0x2e4e626c, 0x13839390, 0x0d4d414c,
+      0x29496168, 0x3c4c707c, 0x09090108, 0x0a0a0208,
+      0x3f8fb3bc, 0x2fcfe3ec, 0x33c3f3f0, 0x05c5c1c4,
+      0x07878384, 0x14041014, 0x3ecef2fc, 0x24446064,
+      0x1eced2dc, 0x2e0e222c, 0x0b4b4348, 0x1a0a1218,
+      0x06060204, 0x21012120, 0x2b4b6368, 0x26466264,
+      0x02020200, 0x35c5f1f4, 0x12829290, 0x0a8a8288,
+      0x0c0c000c, 0x3383b3b0, 0x3e4e727c, 0x10c0d0d0,
+      0x3a4a7278, 0x07474344, 0x16869294, 0x25c5e1e4,
+      0x26062224, 0x00808080, 0x2d8da1ac, 0x1fcfd3dc,
+      0x2181a1a0, 0x30003030, 0x37073334, 0x2e8ea2ac,
+      0x36063234, 0x15051114, 0x22022220, 0x38083038,
+      0x34c4f0f4, 0x2787a3a4, 0x05454144, 0x0c4c404c,
+      0x01818180, 0x29c9e1e8, 0x04848084, 0x17879394,
+      0x35053134, 0x0bcbc3c8, 0x0ecec2cc, 0x3c0c303c,
+      0x31417170, 0x11011110, 0x07c7c3c4, 0x09898188,
+      0x35457174, 0x3bcbf3f8, 0x1acad2d8, 0x38c8f0f8,
+      0x14849094, 0x19495158, 0x02828280, 0x04c4c0c4,
+      0x3fcff3fc, 0x09494148, 0x39093138, 0x27476364,
+      0x00c0c0c0, 0x0fcfc3cc, 0x17c7d3d4, 0x3888b0b8,
+      0x0f0f030c, 0x0e8e828c, 0x02424240, 0x23032320,
+      0x11819190, 0x2c4c606c, 0x1bcbd3d8, 0x2484a0a4,
+      0x34043034, 0x31c1f1f0, 0x08484048, 0x02c2c2c0,
+      0x2f4f636c, 0x3d0d313c, 0x2d0d212c, 0x00404040,
+      0x3e8eb2bc, 0x3e0e323c, 0x3c8cb0bc, 0x01c1c1c0,
+      0x2a8aa2a8, 0x3a8ab2b8, 0x0e4e424c, 0x15455154,
+      0x3b0b3338, 0x1cccd0dc, 0x28486068, 0x3f4f737c,
+      0x1c8c909c, 0x18c8d0d8, 0x0a4a4248, 0x16465254,
+      0x37477374, 0x2080a0a0, 0x2dcde1ec, 0x06464244,
+      0x3585b1b4, 0x2b0b2328, 0x25456164, 0x3acaf2f8,
+      0x23c3e3e0, 0x3989b1b8, 0x3181b1b0, 0x1f8f939c,
+      0x1e4e525c, 0x39c9f1f8, 0x26c6e2e4, 0x3282b2b0,
+      0x31013130, 0x2acae2e8, 0x2d4d616c, 0x1f4f535c,
+      0x24c4e0e4, 0x30c0f0f0, 0x0dcdc1cc, 0x08888088,
+      0x16061214, 0x3a0a3238, 0x18485058, 0x14c4d0d4,
+      0x22426260, 0x29092128, 0x07070304, 0x33033330,
+      0x28c8e0e8, 0x1b0b1318, 0x05050104, 0x39497178,
+      0x10809090, 0x2a4a6268, 0x2a0a2228, 0x1a8a9298 },
+    { 0x38380830, 0xe828c8e0, 0x2c2d0d21, 0xa42686a2,
+      0xcc0fcfc3, 0xdc1eced2, 0xb03383b3, 0xb83888b0,
+      0xac2f8fa3, 0x60204060, 0x54154551, 0xc407c7c3,
+      0x44044440, 0x6c2f4f63, 0x682b4b63, 0x581b4b53,
+      0xc003c3c3, 0x60224262, 0x30330333, 0xb43585b1,
+      0x28290921, 0xa02080a0, 0xe022c2e2, 0xa42787a3,
+      0xd013c3d3, 0x90118191, 0x10110111, 0x04060602,
+      0x1c1c0c10, 0xbc3c8cb0, 0x34360632, 0x480b4b43,
+      0xec2fcfe3, 0x88088880, 0x6c2c4c60, 0xa82888a0,
+      0x14170713, 0xc404c4c0, 0x14160612, 0xf434c4f0,
+      0xc002c2c2, 0x44054541, 0xe021c1e1, 0xd416c6d2,
+      0x3c3f0f33, 0x3c3d0d31, 0x8c0e8e82, 0x98188890,
+      0x28280820, 0x4c0e4e42, 0xf436c6f2, 0x3c3e0e32,
+      0xa42585a1, 0xf839c9f1, 0x0c0d0d01, 0xdc1fcfd3,
+      0xd818c8d0, 0x282b0b23, 0x64264662, 0x783a4a72,
+      0x24270723, 0x2c2f0f23, 0xf031c1f1, 0x70324272,
+      0x40024242, 0xd414c4d0, 0x40014141, 0xc000c0c0,
+      0x70334373, 0x64274763, 0xac2c8ca0, 0x880b8b83,
+      0xf437c7f3, 0xac2d8da1, 0x80008080, 0x1c1f0f13,
+      0xc80acac2, 0x2c2c0c20, 0xa82a8aa2, 0x34340430,
+      0xd012c2d2, 0x080b0b03, 0xec2ecee2, 0xe829c9e1,
+      0x5c1d4d51, 0x94148490, 0x18180810, 0xf838c8f0,
+      0x54174753, 0xac2e8ea2, 0x08080800, 0xc405c5c1,
+      0x10130313, 0xcc0dcdc1, 0x84068682, 0xb83989b1,
+      0xfc3fcff3, 0x7c3d4d71, 0xc001c1c1, 0x30310131,
+      0xf435c5f1, 0x880a8a82, 0x682a4a62, 0xb03181b1,
+      0xd011c1d1, 0x20200020, 0xd417c7d3, 0x00020202,
+      0x20220222, 0x04040400, 0x68284860, 0x70314171,
+      0x04070703, 0xd81bcbd3, 0x9c1d8d91, 0x98198991,
+      0x60214161, 0xbc3e8eb2, 0xe426c6e2, 0x58194951,
+      0xdc1dcdd1, 0x50114151, 0x90108090, 0xdc1cccd0,
+      0x981a8a92, 0xa02383a3, 0xa82b8ba3, 0xd010c0d0,
+      0x80018181, 0x0c0f0f03, 0x44074743, 0x181a0a12,
+      0xe023c3e3, 0xec2ccce0, 0x8c0d8d81, 0xbc3f8fb3,
+      0x94168692, 0x783b4b73, 0x5c1c4c50, 0xa02282a2,
+      0xa02181a1, 0x60234363, 0x20230323, 0x4c0d4d41,
+      0xc808c8c0, 0x9c1e8e92, 0x9c1c8c90, 0x383a0a32,
+      0x0c0c0c00, 0x2c2e0e22, 0xb83a8ab2, 0x6c2e4e62,
+      0x9c1f8f93, 0x581a4a52, 0xf032c2f2, 0x90128292,
+      0xf033c3f3, 0x48094941, 0x78384870, 0xcc0cccc0,
+      0x14150511, 0xf83bcbf3, 0x70304070, 0x74354571,
+      0x7c3f4f73, 0x34350531, 0x10100010, 0x00030303,
+      0x64244460, 0x6c2d4d61, 0xc406c6c2, 0x74344470,
+      0xd415c5d1, 0xb43484b0, 0xe82acae2, 0x08090901,
+      0x74364672, 0x18190911, 0xfc3ecef2, 0x40004040,
+      0x10120212, 0xe020c0e0, 0xbc3d8db1, 0x04050501,
+      0xf83acaf2, 0x00010101, 0xf030c0f0, 0x282a0a22,
+      0x5c1e4e52, 0xa82989a1, 0x54164652, 0x40034343,
+      0x84058581, 0x14140410, 0x88098981, 0x981b8b93,
+      0xb03080b0, 0xe425c5e1, 0x48084840, 0x78394971,
+      0x94178793, 0xfc3cccf0, 0x1c1e0e12, 0x80028282,
+      0x20210121, 0x8c0c8c80, 0x181b0b13, 0x5c1f4f53,
+      0x74374773, 0x54144450, 0xb03282b2, 0x1c1d0d11,
+      0x24250521, 0x4c0f4f43, 0x00000000, 0x44064642,
+      0xec2dcde1, 0x58184850, 0x50124252, 0xe82bcbe3,
+      0x7c3e4e72, 0xd81acad2, 0xc809c9c1, 0xfc3dcdf1,
+      0x30300030, 0x94158591, 0x64254561, 0x3c3c0c30,
+      0xb43686b2, 0xe424c4e0, 0xb83b8bb3, 0x7c3c4c70,
+      0x0c0e0e02, 0x50104050, 0x38390931, 0x24260622,
+      0x30320232, 0x84048480, 0x68294961, 0x90138393,
+      0x34370733, 0xe427c7e3, 0x24240420, 0xa42484a0,
+      0xc80bcbc3, 0x50134353, 0x080a0a02, 0x84078783,
+      0xd819c9d1, 0x4c0c4c40, 0x80038383, 0x8c0f8f83,
+      0xcc0ecec2, 0x383b0b33, 0x480a4a42, 0xb43787b3 },
+    { 0xa1a82989, 0x81840585, 0xd2d416c6, 0xd3d013c3,
+      0x50541444, 0x111c1d0d, 0xa0ac2c8c, 0x21242505,
+      0x515c1d4d, 0x43400343, 0x10181808, 0x121c1e0e,
+      0x51501141, 0xf0fc3ccc, 0xc2c80aca, 0x63602343,
+      0x20282808, 0x40440444, 0x20202000, 0x919c1d8d,
+      0xe0e020c0, 0xe2e022c2, 0xc0c808c8, 0x13141707,
+      0xa1a42585, 0x838c0f8f, 0x03000303, 0x73783b4b,
+      0xb3b83b8b, 0x13101303, 0xd2d012c2, 0xe2ec2ece,
+      0x70703040, 0x808c0c8c, 0x333c3f0f, 0xa0a82888,
+      0x32303202, 0xd1dc1dcd, 0xf2f436c6, 0x70743444,
+      0xe0ec2ccc, 0x91941585, 0x03080b0b, 0x53541747,
+      0x505c1c4c, 0x53581b4b, 0xb1bc3d8d, 0x01000101,
+      0x20242404, 0x101c1c0c, 0x73703343, 0x90981888,
+      0x10101000, 0xc0cc0ccc, 0xf2f032c2, 0xd1d819c9,
+      0x202c2c0c, 0xe3e427c7, 0x72703242, 0x83800383,
+      0x93981b8b, 0xd1d011c1, 0x82840686, 0xc1c809c9,
+      0x60602040, 0x50501040, 0xa3a02383, 0xe3e82bcb,
+      0x010c0d0d, 0xb2b43686, 0x929c1e8e, 0x434c0f4f,
+      0xb3b43787, 0x52581a4a, 0xc2c406c6, 0x70783848,
+      0xa2a42686, 0x12101202, 0xa3ac2f8f, 0xd1d415c5,
+      0x61602141, 0xc3c003c3, 0xb0b43484, 0x41400141,
+      0x52501242, 0x717c3d4d, 0x818c0d8d, 0x00080808,
+      0x131c1f0f, 0x91981989, 0x00000000, 0x11181909,
+      0x00040404, 0x53501343, 0xf3f437c7, 0xe1e021c1,
+      0xf1fc3dcd, 0x72743646, 0x232c2f0f, 0x23242707,
+      0xb0b03080, 0x83880b8b, 0x020c0e0e, 0xa3a82b8b,
+      0xa2a02282, 0x626c2e4e, 0x93901383, 0x414c0d4d,
+      0x61682949, 0x707c3c4c, 0x01080909, 0x02080a0a,
+      0xb3bc3f8f, 0xe3ec2fcf, 0xf3f033c3, 0xc1c405c5,
+      0x83840787, 0x10141404, 0xf2fc3ece, 0x60642444,
+      0xd2dc1ece, 0x222c2e0e, 0x43480b4b, 0x12181a0a,
+      0x02040606, 0x21202101, 0x63682b4b, 0x62642646,
+      0x02000202, 0xf1f435c5, 0x92901282, 0x82880a8a,
+      0x000c0c0c, 0xb3b03383, 0x727c3e4e, 0xd0d010c0,
+      0x72783a4a, 0x43440747, 0x92941686, 0xe1e425c5,
+      0x22242606, 0x80800080, 0xa1ac2d8d, 0xd3dc1fcf,
+      0xa1a02181, 0x30303000, 0x33343707, 0xa2ac2e8e,
+      0x32343606, 0x11141505, 0x22202202, 0x30383808,
+      0xf0f434c4, 0xa3a42787, 0x41440545, 0x404c0c4c,
+      0x81800181, 0xe1e829c9, 0x80840484, 0x93941787,
+      0x31343505, 0xc3c80bcb, 0xc2cc0ece, 0x303c3c0c,
+      0x71703141, 0x11101101, 0xc3c407c7, 0x81880989,
+      0x71743545, 0xf3f83bcb, 0xd2d81aca, 0xf0f838c8,
+      0x90941484, 0x51581949, 0x82800282, 0xc0c404c4,
+      0xf3fc3fcf, 0x41480949, 0x31383909, 0x63642747,
+      0xc0c000c0, 0xc3cc0fcf, 0xd3d417c7, 0xb0b83888,
+      0x030c0f0f, 0x828c0e8e, 0x42400242, 0x23202303,
+      0x91901181, 0x606c2c4c, 0xd3d81bcb, 0xa0a42484,
+      0x30343404, 0xf1f031c1, 0x40480848, 0xc2c002c2,
+      0x636c2f4f, 0x313c3d0d, 0x212c2d0d, 0x40400040,
+      0xb2bc3e8e, 0x323c3e0e, 0xb0bc3c8c, 0xc1c001c1,
+      0xa2a82a8a, 0xb2b83a8a, 0x424c0e4e, 0x51541545,
+      0x33383b0b, 0xd0dc1ccc, 0x60682848, 0x737c3f4f,
+      0x909c1c8c, 0xd0d818c8, 0x42480a4a, 0x52541646,
+      0x73743747, 0xa0a02080, 0xe1ec2dcd, 0x42440646,
+      0xb1b43585, 0x23282b0b, 0x61642545, 0xf2f83aca,
+      0xe3e023c3, 0xb1b83989, 0xb1b03181, 0x939c1f8f,
+      0x525c1e4e, 0xf1f839c9, 0xe2e426c6, 0xb2b03282,
+      0x31303101, 0xe2e82aca, 0x616c2d4d, 0x535c1f4f,
+      0xe0e424c4, 0xf0f030c0, 0xc1cc0dcd, 0x80880888,
+      0x12141606, 0x32383a0a, 0x50581848, 0xd0d414c4,
+      0x62602242, 0x21282909, 0x03040707, 0x33303303,
+      0xe0e828c8, 0x13181b0b, 0x01040505, 0x71783949,
+      0x90901080, 0x62682a4a, 0x22282a0a, 0x92981a8a },
+    { 0x08303838, 0xc8e0e828, 0x0d212c2d, 0x86a2a426,
+      0xcfc3cc0f, 0xced2dc1e, 0x83b3b033, 0x88b0b838,
+      0x8fa3ac2f, 0x40606020, 0x45515415, 0xc7c3c407,
+      0x44404404, 0x4f636c2f, 0x4b63682b, 0x4b53581b,
+      0xc3c3c003, 0x42626022, 0x03333033, 0x85b1b435,
+      0x09212829, 0x80a0a020, 0xc2e2e022, 0x87a3a427,
+      0xc3d3d013, 0x81919011, 0x01111011, 0x06020406,
+      0x0c101c1c, 0x8cb0bc3c, 0x06323436, 0x4b43480b,
+      0xcfe3ec2f, 0x88808808, 0x4c606c2c, 0x88a0a828,
+      0x07131417, 0xc4c0c404, 0x06121416, 0xc4f0f434,
+      0xc2c2c002, 0x45414405, 0xc1e1e021, 0xc6d2d416,
+      0x0f333c3f, 0x0d313c3d, 0x8e828c0e, 0x88909818,
+      0x08202828, 0x4e424c0e, 0xc6f2f436, 0x0e323c3e,
+      0x85a1a425, 0xc9f1f839, 0x0d010c0d, 0xcfd3dc1f,
+      0xc8d0d818, 0x0b23282b, 0x46626426, 0x4a72783a,
+      0x07232427, 0x0f232c2f, 0xc1f1f031, 0x42727032,
+      0x42424002, 0xc4d0d414, 0x41414001, 0xc0c0c000,
+      0x43737033, 0x47636427, 0x8ca0ac2c, 0x8b83880b,
+      0xc7f3f437, 0x8da1ac2d, 0x80808000, 0x0f131c1f,
+      0xcac2c80a, 0x0c202c2c, 0x8aa2a82a, 0x04303434,
+      0xc2d2d012, 0x0b03080b, 0xcee2ec2e, 0xc9e1e829,
+      0x4d515c1d, 0x84909414, 0x08101818, 0xc8f0f838,
+      0x47535417, 0x8ea2ac2e, 0x08000808, 0xc5c1c405,
+      0x03131013, 0xcdc1cc0d, 0x86828406, 0x89b1b839,
+      0xcff3fc3f, 0x4d717c3d, 0xc1c1c001, 0x01313031,
+      0xc5f1f435, 0x8a82880a, 0x4a62682a, 0x81b1b031,
+      0xc1d1d011, 0x00202020, 0xc7d3d417, 0x02020002,
+      0x02222022, 0x04000404, 0x48606828, 0x41717031,
+      0x07030407, 0xcbd3d81b, 0x8d919c1d, 0x89919819,
+      0x41616021, 0x8eb2bc3e, 0xc6e2e426, 0x49515819,
+      0xcdd1dc1d, 0x41515011, 0x80909010, 0xccd0dc1c,
+      0x8a92981a, 0x83a3a023, 0x8ba3a82b, 0xc0d0d010,
+      0x81818001, 0x0f030c0f, 0x47434407, 0x0a12181a,
+      0xc3e3e023, 0xcce0ec2c, 0x8d818c0d, 0x8fb3bc3f,
+      0x86929416, 0x4b73783b, 0x4c505c1c, 0x82a2a022,
+      0x81a1a021, 0x43636023, 0x03232023, 0x4d414c0d,
+      0xc8c0c808, 0x8e929c1e, 0x8c909c1c, 0x0a32383a,
+      0x0c000c0c, 0x0e222c2e, 0x8ab2b83a, 0x4e626c2e,
+      0x8f939c1f, 0x4a52581a, 0xc2f2f032, 0x82929012,
+      0xc3f3f033, 0x49414809, 0x48707838, 0xccc0cc0c,
+      0x05111415, 0xcbf3f83b, 0x40707030, 0x45717435,
+      0x4f737c3f, 0x05313435, 0x00101010, 0x03030003,
+      0x44606424, 0x4d616c2d, 0xc6c2c406, 0x44707434,
+      0xc5d1d415, 0x84b0b434, 0xcae2e82a, 0x09010809,
+      0x46727436, 0x09111819, 0xcef2fc3e, 0x40404000,
+      0x02121012, 0xc0e0e020, 0x8db1bc3d, 0x05010405,
+      0xcaf2f83a, 0x01010001, 0xc0f0f030, 0x0a22282a,
+      0x4e525c1e, 0x89a1a829, 0x46525416, 0x43434003,
+      0x85818405, 0x04101414, 0x89818809, 0x8b93981b,
+      0x80b0b030, 0xc5e1e425, 0x48404808, 0x49717839,
+      0x87939417, 0xccf0fc3c, 0x0e121c1e, 0x82828002,
+      0x01212021, 0x8c808c0c, 0x0b13181b, 0x4f535c1f,
+      0x47737437, 0x44505414, 0x82b2b032, 0x0d111c1d,
+      0x05212425, 0x4f434c0f, 0x00000000, 0x46424406,
+      0xcde1ec2d, 0x48505818, 0x42525012, 0xcbe3e82b,
+      0x4e727c3e, 0xcad2d81a, 0xc9c1c809, 0xcdf1fc3d,
+      0x00303030, 0x85919415, 0x45616425, 0x0c303c3c,
+      0x86b2b436, 0xc4e0e424, 0x8bb3b83b, 0x4c707c3c,
+      0x0e020c0e, 0x40505010, 0x09313839, 0x06222426,
+      0x02323032, 0x84808404, 0x49616829, 0x83939013,
+      0x07333437, 0xc7e3e427, 0x04202424, 0x84a0a424,
+      0xcbc3c80b, 0x43535013, 0x0a02080a, 0x87838407,
+      0xc9d1d819, 0x4c404c0c, 0x83838003, 0x8f838c0f,
+      0xcec2cc0e, 0x0b33383b, 0x4a42480a, 0x87b3b437 }
+};
+
+/* key schedule constants - golden ratio */
+#define KC0 0x9e3779b9
+#define KC1 0x3c6ef373
+#define KC2 0x78dde6e6
+#define KC3 0xf1bbcdcc
+#define KC4 0xe3779b99
+#define KC5 0xc6ef3733
+#define KC6 0x8dde6e67
+#define KC7 0x1bbcdccf
+#define KC8 0x3779b99e
+#define KC9 0x6ef3733c
+#define KC10 0xdde6e678
+#define KC11 0xbbcdccf1
+#define KC12 0x779b99e3
+#define KC13 0xef3733c6
+#define KC14 0xde6e678d
+#define KC15 0xbcdccf1b
+
+void
+SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH],
+             SEED_KEY_SCHEDULE *ks)
+{
+    seed_word K0, K1, K2, K3;
+    seed_word t0, t1;
+
+    char2word(rawkey, K0);
+    char2word(rawkey + 4, K1);
+    char2word(rawkey + 8, K2);
+    char2word(rawkey + 12, K3);
+
+    t0 = (K0 + K2 - KC0);
+    t1 = (K1 - K3 + KC0);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[0]);
+    KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC1);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[2]);
+    KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC2);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[4]);
+    KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC3);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[6]);
+    KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC4);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[8]);
+    KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC5);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[10]);
+    KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC6);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[12]);
+    KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC7);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[14]);
+    KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC8);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[16]);
+    KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC9);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[18]);
+    KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC10);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[20]);
+    KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC11);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[22]);
+    KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC12);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[24]);
+    KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC13);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[26]);
+    KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC14);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[28]);
+    KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC15);
+    KEYUPDATE_TEMP(t0, t1, &ks->data[30]);
+}
+
+void
+SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE],
+             unsigned char d[SEED_BLOCK_SIZE],
+             const SEED_KEY_SCHEDULE *ks)
+{
+    seed_word L0, L1, R0, R1;
+    seed_word t0, t1;
+
+    char2word(s, L0);
+    char2word(s + 4, L1);
+    char2word(s + 8, R0);
+    char2word(s + 12, R1);
+
+    E_SEED(t0, t1, L0, L1, R0, R1, 0);
+    E_SEED(t0, t1, R0, R1, L0, L1, 2);
+    E_SEED(t0, t1, L0, L1, R0, R1, 4);
+    E_SEED(t0, t1, R0, R1, L0, L1, 6);
+    E_SEED(t0, t1, L0, L1, R0, R1, 8);
+    E_SEED(t0, t1, R0, R1, L0, L1, 10);
+    E_SEED(t0, t1, L0, L1, R0, R1, 12);
+    E_SEED(t0, t1, R0, R1, L0, L1, 14);
+    E_SEED(t0, t1, L0, L1, R0, R1, 16);
+    E_SEED(t0, t1, R0, R1, L0, L1, 18);
+    E_SEED(t0, t1, L0, L1, R0, R1, 20);
+    E_SEED(t0, t1, R0, R1, L0, L1, 22);
+    E_SEED(t0, t1, L0, L1, R0, R1, 24);
+    E_SEED(t0, t1, R0, R1, L0, L1, 26);
+    E_SEED(t0, t1, L0, L1, R0, R1, 28);
+    E_SEED(t0, t1, R0, R1, L0, L1, 30);
+
+    word2char(R0, d);
+    word2char(R1, d + 4);
+    word2char(L0, d + 8);
+    word2char(L1, d + 12);
+}
+
+void
+SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE],
+             unsigned char d[SEED_BLOCK_SIZE],
+             const SEED_KEY_SCHEDULE *ks)
+{
+    seed_word L0, L1, R0, R1;
+    seed_word t0, t1;
+
+    char2word(s, L0);
+    char2word(s + 4, L1);
+    char2word(s + 8, R0);
+    char2word(s + 12, R1);
+
+    E_SEED(t0, t1, L0, L1, R0, R1, 30);
+    E_SEED(t0, t1, R0, R1, L0, L1, 28);
+    E_SEED(t0, t1, L0, L1, R0, R1, 26);
+    E_SEED(t0, t1, R0, R1, L0, L1, 24);
+    E_SEED(t0, t1, L0, L1, R0, R1, 22);
+    E_SEED(t0, t1, R0, R1, L0, L1, 20);
+    E_SEED(t0, t1, L0, L1, R0, R1, 18);
+    E_SEED(t0, t1, R0, R1, L0, L1, 16);
+    E_SEED(t0, t1, L0, L1, R0, R1, 14);
+    E_SEED(t0, t1, R0, R1, L0, L1, 12);
+    E_SEED(t0, t1, L0, L1, R0, R1, 10);
+    E_SEED(t0, t1, R0, R1, L0, L1, 8);
+    E_SEED(t0, t1, L0, L1, R0, R1, 6);
+    E_SEED(t0, t1, R0, R1, L0, L1, 4);
+    E_SEED(t0, t1, L0, L1, R0, R1, 2);
+    E_SEED(t0, t1, R0, R1, L0, L1, 0);
+
+    word2char(R0, d);
+    word2char(R1, d + 4);
+    word2char(L0, d + 8);
+    word2char(L1, d + 12);
+}
+
+void
+SEED_ecb_encrypt(const unsigned char *in,
+                 unsigned char *out,
+                 const SEED_KEY_SCHEDULE *ks, int enc)
+{
+    if (enc) {
+        SEED_encrypt(in, out, ks);
+    } else {
+        SEED_decrypt(in, out, ks);
+    }
+}
+
+void
+SEED_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                 size_t len, const SEED_KEY_SCHEDULE *ks,
+                 unsigned char ivec[SEED_BLOCK_SIZE], int enc)
+{
+    size_t n;
+    unsigned char tmp[SEED_BLOCK_SIZE];
+    const unsigned char *iv = ivec;
+
+    if (enc) {
+        while (len >= SEED_BLOCK_SIZE) {
+            for (n = 0; n < SEED_BLOCK_SIZE; ++n)
+                out[n] = in[n] ^ iv[n];
+
+            SEED_encrypt(out, out, ks);
+            iv = out;
+            len -= SEED_BLOCK_SIZE;
+            in += SEED_BLOCK_SIZE;
+            out += SEED_BLOCK_SIZE;
+        }
+
+        if (len) {
+            for (n = 0; n < len; ++n)
+                out[n] = in[n] ^ iv[n];
+
+            for (n = len; n < SEED_BLOCK_SIZE; ++n)
+                out[n] = iv[n];
+
+            SEED_encrypt(out, out, ks);
+            iv = out;
+        }
+
+        memcpy(ivec, iv, SEED_BLOCK_SIZE);
+    } else if (in != out) {
+        while (len >= SEED_BLOCK_SIZE) {
+            SEED_decrypt(in, out, ks);
+
+            for (n = 0; n < SEED_BLOCK_SIZE; ++n)
+                out[n] ^= iv[n];
+
+            iv = in;
+            len -= SEED_BLOCK_SIZE;
+            in += SEED_BLOCK_SIZE;
+            out += SEED_BLOCK_SIZE;
+        }
+
+        if (len) {
+            SEED_decrypt(in, tmp, ks);
+
+            for (n = 0; n < len; ++n)
+                out[n] = tmp[n] ^ iv[n];
+
+            iv = in;
+        }
+
+        memcpy(ivec, iv, SEED_BLOCK_SIZE);
+    } else {
+        while (len >= SEED_BLOCK_SIZE) {
+            memcpy(tmp, in, SEED_BLOCK_SIZE);
+            SEED_decrypt(in, out, ks);
+
+            for (n = 0; n < SEED_BLOCK_SIZE; ++n)
+                out[n] ^= ivec[n];
+
+            memcpy(ivec, tmp, SEED_BLOCK_SIZE);
+            len -= SEED_BLOCK_SIZE;
+            in += SEED_BLOCK_SIZE;
+            out += SEED_BLOCK_SIZE;
+        }
+
+        if (len) {
+            memcpy(tmp, in, SEED_BLOCK_SIZE);
+            SEED_decrypt(tmp, tmp, ks);
+
+            for (n = 0; n < len; ++n)
+                out[n] = tmp[n] ^ ivec[n];
+
+            memcpy(ivec, tmp, SEED_BLOCK_SIZE);
+        }
+    }
+}
+
+SEEDContext *
+SEED_AllocateContext(void)
+{
+    return PORT_ZNew(SEEDContext);
+}
+
+SECStatus
+SEED_InitContext(SEEDContext *cx, const unsigned char *key,
+                 unsigned int keylen, const unsigned char *iv,
+                 int mode, unsigned int encrypt, unsigned int unused)
+{
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    switch (mode) {
+        case NSS_SEED:
+            SEED_set_key(key, &cx->ks);
+            cx->mode = NSS_SEED;
+            cx->encrypt = encrypt;
+            break;
+
+        case NSS_SEED_CBC:
+            memcpy(cx->iv, iv, 16);
+            SEED_set_key(key, &cx->ks);
+            cx->mode = NSS_SEED_CBC;
+            cx->encrypt = encrypt;
+            break;
+
+        default:
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+    }
+
+    return SECSuccess;
+}
+
+SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+                   int mode, PRBool encrypt)
+{
+    SEEDContext *cx = PORT_ZNew(SEEDContext);
+    SECStatus rv = SEED_InitContext(cx, key, SEED_KEY_LENGTH, iv, mode,
+                                    encrypt, 0);
+
+    if (rv != SECSuccess) {
+        PORT_ZFree(cx, sizeof *cx);
+        cx = NULL;
+    }
+
+    return cx;
+}
+
+void
+SEED_DestroyContext(SEEDContext *cx, PRBool freeit)
+{
+    if (cx) {
+        memset(cx, 0, sizeof *cx);
+
+        if (freeit)
+            PORT_Free(cx);
+    }
+}
+
+SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen,
+             unsigned int maxOutLen, const unsigned char *in,
+             unsigned int inLen)
+{
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    if (!cx->encrypt) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    switch (cx->mode) {
+        case NSS_SEED:
+            SEED_ecb_encrypt(in, out, &cx->ks, 1);
+            *outLen = inLen;
+            break;
+
+        case NSS_SEED_CBC:
+            SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 1);
+            *outLen = inLen;
+            break;
+
+        default:
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+    }
+
+    return SECSuccess;
+}
+
+SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen,
+             unsigned int maxOutLen, const unsigned char *in,
+             unsigned int inLen)
+{
+    if (!cx) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    if (cx->encrypt) {
+        PORT_SetError(SEC_ERROR_INVALID_ARGS);
+        return SECFailure;
+    }
+
+    switch (cx->mode) {
+        case NSS_SEED:
+            SEED_ecb_encrypt(in, out, &cx->ks, 0);
+            *outLen = inLen;
+            break;
+
+        case NSS_SEED_CBC:
+            SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 0);
+            *outLen = inLen;
+            break;
+
+        default:
+            PORT_SetError(SEC_ERROR_INVALID_ARGS);
+            return SECFailure;
+    }
+
+    return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/seed.h b/security/nss/lib/freebl/seed.h
new file mode 100644
index 000000000..f527165b7
--- /dev/null
+++ b/security/nss/lib/freebl/seed.h
@@ -0,0 +1,125 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef HEADER_SEED_H
+#define HEADER_SEED_H
+
+#include <string.h>
+#include "blapi.h"
+
+#if !defined(NO_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+typedef PRUint32 seed_word;
+
+#define G_FUNC(v)                   \
+    SS[0][((v)&0xff)] ^             \
+        SS[1][((v) >> 8 & 0xff)] ^  \
+        SS[2][((v) >> 16 & 0xff)] ^ \
+        SS[3][((v) >> 24 & 0xff)]
+
+#define char2word(c, i)                    \
+    (i) = ((((seed_word)((c)[0])) << 24) | \
+           (((seed_word)((c)[1])) << 16) | \
+           (((seed_word)((c)[2])) << 8) |  \
+           ((seed_word)((c)[3])))
+
+#define word2char(l, c)                      \
+    *((c) + 0) = (unsigned char)((l) >> 24); \
+    *((c) + 1) = (unsigned char)((l) >> 16); \
+    *((c) + 2) = (unsigned char)((l) >> 8);  \
+    *((c) + 3) = (unsigned char)((l))
+
+#define KEYSCHEDULE_UPDATE0(T0, T1, K0, K1, K2, K3, KC) \
+    (T0) = (K2);                                        \
+    (K2) = (((K2) << 8) ^ ((K3) >> 24));                \
+    (K3) = (((K3) << 8) ^ ((T0) >> 24));                \
+    (T0) = ((K0) + (K2) - (KC));                        \
+    (T1) = ((K1) + (KC) - (K3))
+
+#define KEYSCHEDULE_UPDATE1(T0, T1, K0, K1, K2, K3, KC) \
+    (T0) = (K0);                                        \
+    (K0) = (((K0) >> 8) ^ ((K1) << 24));                \
+    (K1) = (((K1) >> 8) ^ ((T0) << 24));                \
+    (T0) = ((K0) + (K2) - (KC));                        \
+    (T1) = ((K1) + (KC) - (K3))
+
+#define KEYUPDATE_TEMP(T0, T1, K) \
+    (K)[0] = G_FUNC((T0));        \
+    (K)[1] = G_FUNC((T1))
+
+#define XOR_SEEDBLOCK(DST, SRC) \
+    (DST)[0] ^= (SRC)[0];       \
+    (DST)[1] ^= (SRC)[1];       \
+    (DST)[2] ^= (SRC)[2];       \
+    (DST)[3] ^= (SRC)[3]
+
+#define MOV_SEEDBLOCK(DST, SRC) \
+    (DST)[0] = (SRC)[0];        \
+    (DST)[1] = (SRC)[1];        \
+    (DST)[2] = (SRC)[2];        \
+    (DST)[3] = (SRC)[3]
+
+#define CHAR2WORD(C, I)         \
+    char2word((C), (I)[0]);     \
+    char2word((C) + 4, (I)[1]); \
+    char2word((C) + 8, (I)[2]); \
+    char2word((C) + 12, (I)[3])
+
+#define WORD2CHAR(I, C)         \
+    word2char((I)[0], (C));     \
+    word2char((I)[1], (C + 4)); \
+    word2char((I)[2], (C + 8)); \
+    word2char((I)[3], (C + 12))
+
+#define E_SEED(T0, T1, X1, X2, X3, X4, rbase) \
+    (T0) = (X3) ^ (ks->data)[(rbase)];        \
+    (T1) = (X4) ^ (ks->data)[(rbase) + 1];    \
+    (T1) ^= (T0);                             \
+    (T1) = G_FUNC(T1);                        \
+    (T0) += (T1);                             \
+    (T0) = G_FUNC(T0);                        \
+    (T1) += (T0);                             \
+    (T1) = G_FUNC(T1);                        \
+    (T0) += (T1);                             \
+    (X1) ^= (T0);                             \
+    (X2) ^= (T1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct seed_key_st {
+    PRUint32 data[32];
+} SEED_KEY_SCHEDULE;
+
+struct SEEDContextStr {
+    unsigned char iv[SEED_BLOCK_SIZE];
+    SEED_KEY_SCHEDULE ks;
+    int mode;
+    unsigned int encrypt;
+};
+
+void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH],
+                  SEED_KEY_SCHEDULE *ks);
+
+void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE],
+                  unsigned char d[SEED_BLOCK_SIZE],
+                  const SEED_KEY_SCHEDULE *ks);
+void SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE],
+                  unsigned char d[SEED_BLOCK_SIZE],
+                  const SEED_KEY_SCHEDULE *ks);
+
+void SEED_ecb_encrypt(const unsigned char *in, unsigned char *out,
+                      const SEED_KEY_SCHEDULE *ks, int enc);
+void SEED_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                      size_t len, const SEED_KEY_SCHEDULE *ks,
+                      unsigned char ivec[SEED_BLOCK_SIZE], int enc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HEADER_SEED_H */
diff --git a/security/nss/lib/freebl/sha-fast-amd64-sun.s b/security/nss/lib/freebl/sha-fast-amd64-sun.s
new file mode 100644
index 000000000..6430469a4
--- /dev/null
+++ b/security/nss/lib/freebl/sha-fast-amd64-sun.s
@@ -0,0 +1,2151 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+	.file	"sha_fast.c"
+	.text
+	.align 16
+.globl SHA1_Begin
+	.type	SHA1_Begin, @function
+SHA1_Begin:
+.LFB4:
+	movl	$4023233417, %ecx
+	movl	$2562383102, %edx
+	movl	$3285377520, %eax
+	movq	$0, 64(%rdi)
+	movq	$1732584193, 72(%rdi)
+	movq	%rcx, 80(%rdi)
+	movq	%rdx, 88(%rdi)
+	movq	$271733878, 96(%rdi)
+	movq	%rax, 104(%rdi)
+	ret
+.LFE4:
+	.size	SHA1_Begin, .-SHA1_Begin
+	.align 16
+	.type	shaCompress, @function
+shaCompress:
+.LFB7:
+	pushq	%r15
+.LCFI0:
+	pushq	%r14
+.LCFI1:
+	pushq	%r13
+.LCFI2:
+	pushq	%r12
+.LCFI3:
+	movq	-88(%rdi), %r12
+	movq	-80(%rdi), %r10
+	movq	-72(%rdi), %r13
+	movq	-64(%rdi), %r8
+	pushq	%rbx
+.LCFI4:
+	movq	-56(%rdi), %rcx
+	movl	(%rsi), %eax
+	movl	%r12d, %edx
+	movq	%r13, %r9
+	roll	$5, %edx
+	movl	4(%rsi), %ebx
+	xorq	%r8, %r9 
+/APP
+	bswap %eax
+/NO_APP
+	andq	%r10, %r9
+	mov	%eax, %r15d
+	roll	$30, %r10d
+	movq	%r15, -48(%rdi)
+	xorq	%r8, %r9 
+	movq	-48(%rdi), %r14
+	addq	%r9, %rdx
+	movq	%r10, %rax
+	movl	%r12d, %r15d
+	addq	%rcx, %rdx
+	xorq	%r13, %rax 
+	roll	$30, %r15d
+	leaq	1518500249(%rdx,%r14), %rdx
+	andq	%r12, %rax
+	movq	%r15, %r12
+/APP
+	bswap %ebx
+/NO_APP
+	movl	%edx, %ecx
+	mov	%ebx, %r11d
+	xorq	%r13, %rax 
+	movq	%r11, -40(%rdi)
+	roll	$5, %ecx
+	movq	-40(%rdi), %r9
+	addq	%rax, %rcx
+	xorq	%r10, %r12 
+	movl	8(%rsi), %r14d
+	addq	%r8, %rcx
+	andq	%rdx, %r12
+	movl	%edx, %r11d
+	leaq	1518500249(%rcx,%r9), %rcx
+	xorq	%r10, %r12 
+	roll	$30, %r11d
+/APP
+	bswap %r14d
+/NO_APP
+	movl	%ecx, %r8d
+	mov	%r14d, %ebx
+	movl	12(%rsi), %r9d
+	movq	%rbx, -32(%rdi)
+	roll	$5, %r8d
+	movq	-32(%rdi), %rax
+	addq	%r12, %r8
+	movq	%r11, %r12
+	movl	%ecx, %ebx
+	addq	%r13, %r8
+	xorq	%r15, %r12 
+	roll	$30, %ebx
+	leaq	1518500249(%r8,%rax), %r8
+	andq	%rcx, %r12
+	movl	16(%rsi), %eax
+/APP
+	bswap %r9d
+/NO_APP
+	movl	%r8d, %edx
+	mov	%r9d, %r14d
+	xorq	%r15, %r12 
+	movq	%r14, -24(%rdi)
+	roll	$5, %edx
+	movq	-24(%rdi), %r13
+	addq	%r12, %rdx
+	movq	%rbx, %r12
+	movl	%r8d, %r14d
+	addq	%r10, %rdx
+	leaq	1518500249(%rdx,%r13), %rdx
+	movl	20(%rsi), %r13d
+/APP
+	bswap %eax
+/NO_APP
+	movl	%edx, %ecx
+	mov	%eax, %r9d
+	roll	$5, %ecx
+	xorq	%r11, %r12 
+	movq	%r9, -16(%rdi)
+	andq	%r8, %r12
+	movq	-16(%rdi), %r10
+	roll	$30, %r14d
+	xorq	%r11, %r12 
+	movq	%r14, %rax
+	movl	%edx, %r9d
+	addq	%r12, %rcx
+	xorq	%rbx, %rax 
+	roll	$30, %r9d
+	addq	%r15, %rcx
+	andq	%rdx, %rax
+	leaq	1518500249(%rcx,%r10), %rcx
+	xorq	%rbx, %rax 
+	movl	24(%rsi), %r10d
+/APP
+	bswap %r13d
+/NO_APP
+	movl	%ecx, %r8d
+	mov	%r13d, %r15d
+	movq	%r15, -8(%rdi)
+	roll	$5, %r8d
+	movq	-8(%rdi), %r12
+	addq	%rax, %r8
+	movl	%ecx, %r15d
+	addq	%r11, %r8
+	movq	%r9, %r11
+	roll	$30, %r15d
+	leaq	1518500249(%r8,%r12), %r8
+	xorq	%r14, %r11 
+	movl	28(%rsi), %r12d
+/APP
+	bswap %r10d
+/NO_APP
+	andq	%rcx, %r11
+	mov	%r10d, %r13d
+	movl	%r8d, %edx
+	movq	%r13, (%rdi)
+	xorq	%r14, %r11 
+	movq	(%rdi), %rax
+	roll	$5, %edx
+	movq	%r15, %r10
+	movl	%r8d, %r13d
+	addq	%r11, %rdx
+	xorq	%r9, %r10 
+	roll	$30, %r13d
+	addq	%rbx, %rdx
+	andq	%r8, %r10
+	leaq	1518500249(%rdx,%rax), %rdx
+	xorq	%r9, %r10 
+	movl	32(%rsi), %eax
+/APP
+	bswap %r12d
+/NO_APP
+	movl	%edx, %ecx
+	mov	%r12d, %ebx
+	movq	%rbx, 8(%rdi)
+	roll	$5, %ecx
+	movq	8(%rdi), %r11
+	addq	%r10, %rcx
+	movq	%r13, %r10
+	movl	%edx, %ebx
+	addq	%r14, %rcx
+	leaq	1518500249(%rcx,%r11), %rcx
+/APP
+	bswap %eax
+/NO_APP
+	movl	%ecx, %r8d
+	mov	%eax, %r12d
+	roll	$5, %r8d
+	xorq	%r15, %r10 
+	movq	%r12, 16(%rdi)
+	andq	%rdx, %r10
+	movq	16(%rdi), %r14
+	roll	$30, %ebx
+	xorq	%r15, %r10 
+	movq	%rbx, %rax
+	movl	36(%rsi), %r11d
+	addq	%r10, %r8
+	xorq	%r13, %rax 
+	movl	%ecx, %r12d
+	addq	%r9, %r8
+	andq	%rcx, %rax
+	roll	$30, %r12d
+	leaq	1518500249(%r8,%r14), %r8
+	xorq	%r13, %rax 
+	movl	40(%rsi), %r14d
+/APP
+	bswap %r11d
+/NO_APP
+	movl	%r8d, %edx
+	mov	%r11d, %r9d
+	movq	%r12, %r11
+	movq	%r9, 24(%rdi)
+	roll	$5, %edx
+	movq	24(%rdi), %r10
+	addq	%rax, %rdx
+	xorq	%rbx, %r11 
+	movl	%r8d, %r9d
+	addq	%r15, %rdx
+	andq	%r8, %r11
+	roll	$30, %r9d
+	leaq	1518500249(%rdx,%r10), %rdx
+	xorq	%rbx, %r11 
+	movl	44(%rsi), %r10d
+/APP
+	bswap %r14d
+/NO_APP
+	movl	%edx, %ecx
+	mov	%r14d, %r15d
+	movq	%r15, 32(%rdi)
+	roll	$5, %ecx
+	movq	32(%rdi), %rax
+	addq	%r11, %rcx
+	movq	%r9, %r11
+	movl	%edx, %r15d
+	addq	%r13, %rcx
+	xorq	%r12, %r11 
+	roll	$30, %r15d
+	leaq	1518500249(%rcx,%rax), %rcx
+	andq	%rdx, %r11
+	movl	48(%rsi), %eax
+/APP
+	bswap %r10d
+/NO_APP
+	movl	%ecx, %r8d
+	mov	%r10d, %r14d
+	xorq	%r12, %r11 
+	movq	%r14, 40(%rdi)
+	roll	$5, %r8d
+	movq	40(%rdi), %r13
+	addq	%r11, %r8
+	movq	%r15, %r10
+	movl	%ecx, %r14d
+	addq	%rbx, %r8
+	xorq	%r9, %r10 
+	leaq	1518500249(%r8,%r13), %r8
+	movl	52(%rsi), %r13d
+/APP
+	bswap %eax
+/NO_APP
+	movl	%r8d, %edx
+	mov	%eax, %ebx
+	roll	$5, %edx
+	andq	%rcx, %r10
+	movq	%rbx, 48(%rdi)
+	xorq	%r9, %r10 
+	movq	48(%rdi), %r11
+	roll	$30, %r14d
+	addq	%r10, %rdx
+	movq	%r14, %rax
+	movl	%r8d, %ebx
+	addq	%r12, %rdx
+	xorq	%r15, %rax 
+	roll	$30, %ebx
+	leaq	1518500249(%rdx,%r11), %rdx
+	andq	%r8, %rax
+	movl	56(%rsi), %r11d
+/APP
+	bswap %r13d
+/NO_APP
+	movl	%edx, %ecx
+	mov	%r13d, %r12d
+	xorq	%r15, %rax 
+	movq	%r12, 56(%rdi)
+	roll	$5, %ecx
+	movq	56(%rdi), %r10
+	addq	%rax, %rcx
+	movl	%edx, %r12d
+	addq	%r9, %rcx
+	movq	%rbx, %r9
+	roll	$30, %r12d
+	leaq	1518500249(%rcx,%r10), %rcx
+	xorq	%r14, %r9 
+	movl	60(%rsi), %r10d
+/APP
+	bswap %r11d
+/NO_APP
+	andq	%rdx, %r9
+	mov	%r11d, %r13d
+	movl	%ecx, %r8d
+	movq	%r13, 64(%rdi)
+	xorq	%r14, %r9 
+	movq	64(%rdi), %rax
+	roll	$5, %r8d
+	movq	%r12, %r11
+	movl	%ecx, %r13d
+	addq	%r9, %r8
+	xorq	%rbx, %r11 
+	roll	$30, %r13d
+	addq	%r15, %r8
+	andq	%rcx, %r11
+	leaq	1518500249(%r8,%rax), %r8
+	xorq	%rbx, %r11 
+/APP
+	bswap %r10d
+/NO_APP
+	movl	%r8d, %esi
+	mov	%r10d, %r15d
+	movq	%r15, 72(%rdi)
+	roll	$5, %esi
+	movq	72(%rdi), %r9
+	movq	56(%rdi), %r10
+	movq	16(%rdi), %rcx
+	addq	%r11, %rsi
+	movq	-32(%rdi), %rdx
+	addq	%r14, %rsi
+	movq	-48(%rdi), %rax
+	leaq	1518500249(%rsi,%r9), %r14
+	movq	%r13, %r11
+	movl	%r8d, %r15d
+	xorq	%rcx, %r10 
+	xorq	%rdx, %r10 
+	movl	%r14d, %ecx
+	xorl	%eax, %r10d
+	roll	%r10d
+	roll	$5, %ecx
+	xorq	%r12, %r11 
+	andq	%r8, %r11
+	movq	%r10, -48(%rdi)
+	movq	-48(%rdi), %r9
+	xorq	%r12, %r11 
+	roll	$30, %r15d
+	movl	%r14d, %r10d
+	addq	%r11, %rcx
+	movq	64(%rdi), %r11
+	movq	24(%rdi), %rdx
+	addq	%rbx, %rcx
+	movq	-24(%rdi), %rbx
+	movq	-40(%rdi), %rax
+	leaq	1518500249(%rcx,%r9), %rcx
+	movq	%r15, %r8
+	roll	$30, %r10d
+	xorq	%rdx, %r11 
+	xorq	%r13, %r8 
+	xorq	%rbx, %r11 
+	andq	%r14, %r8
+	movl	%ecx, %r9d
+	xorl	%eax, %r11d
+	xorq	%r13, %r8 
+	roll	$5, %r9d
+	roll	%r11d
+	addq	%r8, %r9
+	movq	%r10, %rax
+	movq	%r11, -40(%rdi)
+	movq	-40(%rdi), %rsi
+	addq	%r12, %r9
+	movq	72(%rdi), %rbx
+	movq	32(%rdi), %rdx
+	xorq	%r15, %rax 
+	movq	-16(%rdi), %r14
+	movq	-32(%rdi), %r12
+	andq	%rcx, %rax
+	leaq	1518500249(%r9,%rsi), %r9
+	xorq	%r15, %rax 
+	movl	%ecx, %r11d
+	xorq	%rdx, %rbx 
+	roll	$30, %r11d
+	xorq	%r14, %rbx 
+	movl	%r9d, %esi
+	xorl	%r12d, %ebx
+	roll	$5, %esi
+	roll	%ebx
+	addq	%rax, %rsi
+	movq	%rbx, -32(%rdi)
+	movq	-32(%rdi), %r8
+	addq	%r13, %rsi
+	movq	-48(%rdi), %r12
+	movq	40(%rdi), %rdx
+	movq	%r11, %r13
+	movq	-8(%rdi), %r14
+	movq	-24(%rdi), %rcx
+	movl	%r9d, %ebx
+	leaq	1518500249(%rsi,%r8), %rsi
+	xorq	%rdx, %r12 
+	xorq	%r14, %r12 
+	movl	%esi, %r8d
+	xorl	%ecx, %r12d
+	roll	%r12d
+	roll	$5, %r8d
+	xorq	%r10, %r13 
+	andq	%r9, %r13
+	movq	%r12, -24(%rdi)
+	movq	-24(%rdi), %rax
+	xorq	%r10, %r13 
+	roll	$30, %ebx
+	movl	%esi, %r12d
+	addq	%r13, %r8
+	xorq	%rbx, %rsi 
+	roll	$30, %r12d
+	addq	%r15, %r8
+	movq	-40(%rdi), %r15
+	movq	48(%rdi), %rdx
+	movq	(%rdi), %r14
+	movq	-16(%rdi), %r9
+	leaq	1518500249(%r8,%rax), %r13
+	xorq	%r11, %rsi 
+	xorq	%rdx, %r15 
+	movl	%r13d, %ecx
+	xorq	%r14, %r15 
+	roll	$5, %ecx
+	xorl	%r9d, %r15d
+	addq	%rsi, %rcx
+	roll	%r15d
+	addq	%r10, %rcx
+	movq	%r15, -16(%rdi)
+	movq	-16(%rdi), %rsi
+	movl	%r13d, %r15d
+	movq	-32(%rdi), %r14
+	movq	56(%rdi), %rax
+	xorq	%r12, %r13 
+	movq	8(%rdi), %rdx
+	movq	-8(%rdi), %r10
+	xorq	%rbx, %r13 
+	leaq	1859775393(%rcx,%rsi), %r9
+	roll	$30, %r15d
+	xorq	%rax, %r14 
+	xorq	%rdx, %r14 
+	movl	%r9d, %esi
+	xorl	%r10d, %r14d
+	roll	$5, %esi
+	roll	%r14d
+	addq	%r13, %rsi
+	movq	%r14, -8(%rdi)
+	movq	-8(%rdi), %r8
+	addq	%r11, %rsi
+	movq	-24(%rdi), %r13
+	movq	64(%rdi), %rax
+	movl	%r9d, %r14d
+	movq	16(%rdi), %rdx
+	movq	(%rdi), %r11
+	xorq	%r15, %r9 
+	leaq	1859775393(%rsi,%r8), %r10
+	xorq	%rax, %r13 
+	xorq	%rdx, %r13 
+	movl	%r10d, %r8d
+	xorl	%r11d, %r13d
+	roll	$5, %r8d
+	roll	%r13d
+	xorq	%r12, %r9 
+	roll	$30, %r14d
+	addq	%r9, %r8
+	movq	%r13, (%rdi)
+	movq	(%rdi), %rcx
+	addq	%rbx, %r8
+	movq	-16(%rdi), %rbx
+	movq	72(%rdi), %rax
+	movq	24(%rdi), %rdx
+	movq	8(%rdi), %r9
+	movl	%r10d, %r13d
+	leaq	1859775393(%r8,%rcx), %r11
+	xorq	%r14, %r10 
+	roll	$30, %r13d
+	xorq	%rax, %rbx 
+	xorq	%r15, %r10 
+	xorq	%rdx, %rbx 
+	movl	%r11d, %ecx
+	xorl	%r9d, %ebx
+	roll	$5, %ecx
+	roll	%ebx
+	addq	%r10, %rcx
+	movq	%rbx, 8(%rdi)
+	movq	8(%rdi), %rsi
+	addq	%r12, %rcx
+	movq	-8(%rdi), %r12
+	movq	-48(%rdi), %rax
+	movl	%r11d, %ebx
+	movq	32(%rdi), %rdx
+	movq	16(%rdi), %r9
+	xorq	%r13, %r11 
+	leaq	1859775393(%rcx,%rsi), %r10
+	xorq	%r14, %r11 
+	roll	$30, %ebx
+	xorq	%rax, %r12 
+	xorq	%rdx, %r12 
+	movl	%r10d, %esi
+	xorl	%r9d, %r12d
+	roll	$5, %esi
+	roll	%r12d
+	addq	%r11, %rsi
+	movq	%r12, 16(%rdi)
+	addq	%r15, %rsi
+	movq	16(%rdi), %r8
+	movq	(%rdi), %r15
+	movq	-40(%rdi), %rax
+	movl	%r10d, %r12d
+	movq	40(%rdi), %rdx
+	movq	24(%rdi), %r9
+	xorq	%rbx, %r10 
+	leaq	1859775393(%rsi,%r8), %r11
+	xorq	%r13, %r10 
+	xorq	%rax, %r15 
+	xorq	%rdx, %r15 
+	movl	%r11d, %r8d
+	xorl	%r9d, %r15d
+	roll	$5, %r8d
+	roll	%r15d
+	addq	%r10, %r8
+	movq	%r15, 24(%rdi)
+	movq	24(%rdi), %rcx
+	addq	%r14, %r8
+	movq	8(%rdi), %r14
+	movq	-32(%rdi), %rax
+	roll	$30, %r12d
+	movq	48(%rdi), %rdx
+	movq	32(%rdi), %r10
+	movl	%r11d, %r15d
+	leaq	1859775393(%r8,%rcx), %r9
+	xorq	%r12, %r11 
+	roll	$30, %r15d
+	xorq	%rax, %r14 
+	xorq	%rbx, %r11 
+	xorq	%rdx, %r14 
+	movl	%r9d, %ecx
+	xorl	%r10d, %r14d
+	roll	$5, %ecx
+	roll	%r14d
+	addq	%r11, %rcx
+	movq	%r14, 32(%rdi)
+	addq	%r13, %rcx
+	movq	32(%rdi), %rsi
+	movq	16(%rdi), %r13
+	movq	-24(%rdi), %rax
+	movl	%r9d, %r14d
+	movq	56(%rdi), %rdx
+	movq	40(%rdi), %r11
+	xorq	%r15, %r9 
+	leaq	1859775393(%rcx,%rsi), %r10
+	xorq	%r12, %r9 
+	roll	$30, %r14d
+	xorq	%rax, %r13 
+	xorq	%rdx, %r13 
+	movl	%r10d, %esi
+	xorl	%r11d, %r13d
+	roll	$5, %esi
+	roll	%r13d
+	addq	%r9, %rsi
+	movq	%r13, 40(%rdi)
+	movq	40(%rdi), %r8
+	addq	%rbx, %rsi
+	movq	24(%rdi), %rbx
+	movq	-16(%rdi), %rax
+	movl	%r10d, %r13d
+	movq	64(%rdi), %rdx
+	movq	48(%rdi), %r9
+	xorq	%r14, %r10 
+	leaq	1859775393(%rsi,%r8), %r11
+	xorq	%r15, %r10 
+	roll	$30, %r13d
+	xorq	%rax, %rbx 
+	xorq	%rdx, %rbx 
+	movl	%r11d, %r8d
+	xorl	%r9d, %ebx
+	roll	$5, %r8d
+	roll	%ebx
+	addq	%r10, %r8
+	movq	%rbx, 48(%rdi)
+	addq	%r12, %r8
+	movq	48(%rdi), %rcx
+	movq	32(%rdi), %r12
+	movq	-8(%rdi), %rax
+	movl	%r11d, %ebx
+	movq	72(%rdi), %rdx
+	movq	56(%rdi), %r9
+	leaq	1859775393(%r8,%rcx), %r10
+	xorq	%rax, %r12 
+	xorq	%rdx, %r12 
+	movl	%r10d, %ecx
+	xorl	%r9d, %r12d
+	xorq	%r13, %r11 
+	roll	$5, %ecx
+	xorq	%r14, %r11 
+	roll	%r12d
+	roll	$30, %ebx
+	addq	%r11, %rcx
+	movq	%r12, 56(%rdi)
+	movq	56(%rdi), %rsi
+	addq	%r15, %rcx
+	movq	40(%rdi), %r15
+	movq	(%rdi), %rax
+	movq	-48(%rdi), %rdx
+	movq	64(%rdi), %r9
+	movl	%r10d, %r12d
+	leaq	1859775393(%rcx,%rsi), %r11
+	xorq	%rbx, %r10 
+	roll	$30, %r12d
+	xorq	%rax, %r15 
+	xorq	%r13, %r10 
+	xorq	%rdx, %r15 
+	movl	%r11d, %esi
+	xorl	%r9d, %r15d
+	roll	$5, %esi
+	roll	%r15d
+	addq	%r10, %rsi
+	movq	%r15, 64(%rdi)
+	movq	64(%rdi), %r8
+	addq	%r14, %rsi
+	movq	48(%rdi), %r14
+	movq	8(%rdi), %rax
+	movl	%r11d, %r15d
+	movq	-40(%rdi), %rdx
+	movq	72(%rdi), %r10
+	xorq	%r12, %r11 
+	leaq	1859775393(%rsi,%r8), %r9
+	xorq	%rbx, %r11 
+	roll	$30, %r15d
+	xorq	%rax, %r14 
+	xorq	%rdx, %r14 
+	movl	%r9d, %r8d
+	xorl	%r10d, %r14d
+	roll	$5, %r8d
+	roll	%r14d
+	addq	%r11, %r8
+	movq	%r14, 72(%rdi)
+	addq	%r13, %r8
+	movq	72(%rdi), %rcx
+	movq	56(%rdi), %r13
+	movq	16(%rdi), %rax
+	movl	%r9d, %r14d
+	movq	-32(%rdi), %rdx
+	movq	-48(%rdi), %r11
+	leaq	1859775393(%r8,%rcx), %r10
+	xorq	%rax, %r13 
+	xorq	%rdx, %r13 
+	movl	%r10d, %ecx
+	xorl	%r11d, %r13d
+	roll	$5, %ecx
+	roll	%r13d
+	xorq	%r15, %r9 
+	roll	$30, %r14d
+	xorq	%r12, %r9 
+	movq	%r13, -48(%rdi)
+	movq	-48(%rdi), %rsi
+	addq	%r9, %rcx
+	movl	%r10d, %r13d
+	xorq	%r14, %r10 
+	addq	%rbx, %rcx
+	movq	64(%rdi), %rbx
+	movq	24(%rdi), %rax
+	movq	-24(%rdi), %rdx
+	leaq	1859775393(%rcx,%rsi), %r11
+	movq	-40(%rdi), %r9
+	xorq	%r15, %r10 
+	roll	$30, %r13d
+	xorq	%rax, %rbx 
+	movl	%r11d, %esi
+	xorq	%rdx, %rbx 
+	roll	$5, %esi
+	xorl	%r9d, %ebx
+	addq	%r10, %rsi
+	roll	%ebx
+	addq	%r12, %rsi
+	movq	%rbx, -40(%rdi)
+	movq	-40(%rdi), %r8
+	movl	%r11d, %ebx
+	movq	72(%rdi), %r12
+	movq	32(%rdi), %rax
+	xorq	%r13, %r11 
+	movq	-16(%rdi), %rdx
+	movq	-32(%rdi), %r9
+	xorq	%r14, %r11 
+	leaq	1859775393(%rsi,%r8), %r10
+	roll	$30, %ebx
+	xorq	%rax, %r12 
+	xorq	%rdx, %r12 
+	movl	%r10d, %r8d
+	xorl	%r9d, %r12d
+	roll	$5, %r8d
+	roll	%r12d
+	addq	%r11, %r8
+	movq	%r12, -32(%rdi)
+	movq	-32(%rdi), %rcx
+	addq	%r15, %r8
+	movq	-48(%rdi), %r15
+	movq	40(%rdi), %rax
+	movl	%r10d, %r12d
+	movq	-8(%rdi), %rdx
+	movq	-24(%rdi), %r9
+	xorq	%rbx, %r10 
+	leaq	1859775393(%r8,%rcx), %r11
+	xorq	%r13, %r10 
+	xorq	%rax, %r15 
+	xorq	%rdx, %r15 
+	movl	%r11d, %ecx
+	xorl	%r9d, %r15d
+	roll	$5, %ecx
+	roll	%r15d
+	addq	%r10, %rcx
+	addq	%r14, %rcx
+	movq	%r15, -24(%rdi)
+	movq	-24(%rdi), %rsi
+	movq	-40(%rdi), %r14
+	movq	48(%rdi), %rax
+	roll	$30, %r12d
+	movq	(%rdi), %rdx
+	movq	-16(%rdi), %r10
+	movl	%r11d, %r15d
+	leaq	1859775393(%rcx,%rsi), %r9
+	xorq	%r12, %r11 
+	roll	$30, %r15d
+	xorq	%rax, %r14 
+	xorq	%rbx, %r11 
+	xorq	%rdx, %r14 
+	movl	%r9d, %esi
+	xorl	%r10d, %r14d
+	roll	$5, %esi
+	roll	%r14d
+	addq	%r11, %rsi
+	movq	%r14, -16(%rdi)
+	movq	-16(%rdi), %r8
+	addq	%r13, %rsi
+	movq	-32(%rdi), %r11
+	movq	56(%rdi), %rax
+	movl	%r9d, %r14d
+	movq	8(%rdi), %rdx
+	movq	-8(%rdi), %r10
+	xorq	%r15, %r9 
+	leaq	1859775393(%rsi,%r8), %r13
+	xorq	%r12, %r9 
+	roll	$30, %r14d
+	xorq	%rax, %r11 
+	xorq	%rdx, %r11 
+	movl	%r13d, %r8d
+	xorl	%r10d, %r11d
+	roll	$5, %r8d
+	movl	%r13d, %r10d
+	roll	%r11d
+	addq	%r9, %r8
+	xorq	%r14, %r13 
+	movq	%r11, -8(%rdi)
+	addq	%rbx, %r8
+	movq	-8(%rdi), %rbx
+	movq	-24(%rdi), %r9
+	movq	64(%rdi), %rax
+	xorq	%r15, %r13 
+	movq	16(%rdi), %rdx
+	movq	(%rdi), %rcx
+	leaq	1859775393(%r8,%rbx), %r11
+	xorq	%rax, %r9 
+	xorq	%rdx, %r9 
+	movl	%r11d, %ebx
+	xorl	%ecx, %r9d
+	roll	$5, %ebx
+	roll	%r9d
+	addq	%r13, %rbx
+	movq	%r9, (%rdi)
+	movq	(%rdi), %rsi
+	addq	%r12, %rbx
+	movq	-16(%rdi), %r12
+	movq	72(%rdi), %r13
+	movl	%r11d, %r9d
+	leaq	1859775393(%rbx,%rsi), %rcx
+	movl	%r10d, %ebx
+	movq	24(%rdi), %r10
+	movq	8(%rdi), %rax
+	xorq	%r13, %r12 
+	roll	$30, %ebx
+	movl	%ecx, %esi
+	xorq	%r10, %r12 
+	xorq	%rbx, %r11 
+	roll	$5, %esi
+	xorl	%eax, %r12d
+	xorq	%r14, %r11 
+	roll	$30, %r9d
+	roll	%r12d
+	addq	%r11, %rsi
+	movq	%rcx, %rax
+	movq	%r12, 8(%rdi)
+	movq	8(%rdi), %rdx
+	addq	%r15, %rsi
+	movq	-8(%rdi), %r11
+	movq	-48(%rdi), %r13
+	movl	%ecx, %r12d
+	movq	32(%rdi), %r10
+	movq	16(%rdi), %r8
+	orq	%r9, %rcx
+	leaq	1859775393(%rsi,%rdx), %rsi
+	andq	%rbx, %rcx
+	andq	%r9, %rax
+	xorq	%r13, %r11 
+	orq	%rcx, %rax
+	roll	$30, %r12d
+	xorq	%r10, %r11 
+	movq	%rsi, %r10
+	xorl	%r8d, %r11d
+	movl	%esi, %r8d
+	andq	%r12, %r10
+	roll	%r11d
+	roll	$5, %r8d
+	movq	%r11, 16(%rdi)
+	addq	%rax, %r8
+	movq	16(%rdi), %r15
+	movq	(%rdi), %r13
+	movq	-40(%rdi), %rdx
+	addq	%r14, %r8
+	movq	40(%rdi), %r14
+	movq	24(%rdi), %rcx
+	movl	%esi, %r11d
+	addq	%r15, %r8
+	movl	$2400959708, %r15d
+	orq	%r12, %rsi
+	xorq	%rdx, %r13 
+	addq	%r15, %r8
+	andq	%r9, %rsi
+	xorq	%r14, %r13 
+	orq	%rsi, %r10
+	xorl	%ecx, %r13d
+	movl	%r8d, %ecx
+	roll	%r13d
+	roll	$5, %ecx
+	movq	%r13, 24(%rdi)
+	addq	%r10, %rcx
+	movq	24(%rdi), %rax
+	movq	8(%rdi), %r14
+	movq	-32(%rdi), %rdx
+	addq	%rbx, %rcx
+	movq	48(%rdi), %rbx
+	movq	32(%rdi), %rsi
+	roll	$30, %r11d
+	addq	%rax, %rcx
+	movl	%r8d, %r13d
+	movq	%r8, %r10
+	xorq	%rdx, %r14 
+	addq	%r15, %rcx
+	orq	%r11, %r8
+	xorq	%rbx, %r14 
+	andq	%r12, %r8
+	andq	%r11, %r10
+	xorl	%esi, %r14d
+	movl	%ecx, %esi
+	orq	%r8, %r10
+	roll	$5, %esi
+	roll	%r14d
+	roll	$30, %r13d
+	addq	%r10, %rsi
+	movq	%r14, 32(%rdi)
+	movq	32(%rdi), %rax
+	addq	%r9, %rsi
+	movq	16(%rdi), %r9
+	movq	-24(%rdi), %rdx
+	movq	56(%rdi), %rbx
+	movq	40(%rdi), %r8
+	movl	%ecx, %r14d
+	addq	%rax, %rsi
+	movq	%rcx, %r10
+	orq	%r13, %rcx
+	xorq	%rdx, %r9 
+	addq	%r15, %rsi
+	andq	%r11, %rcx
+	xorq	%rbx, %r9 
+	andq	%r13, %r10
+	roll	$30, %r14d
+	xorl	%r8d, %r9d
+	movl	%esi, %r8d
+	orq	%rcx, %r10
+	roll	%r9d
+	roll	$5, %r8d
+	movq	%r9, 40(%rdi)
+	addq	%r10, %r8
+	movq	40(%rdi), %rax
+	movq	24(%rdi), %r10
+	movq	-16(%rdi), %rdx
+	addq	%r12, %r8
+	movq	64(%rdi), %rbx
+	movq	48(%rdi), %rcx
+	movl	%esi, %r9d
+	addq	%rax, %r8
+	movq	%rsi, %r12
+	xorq	%rdx, %r10 
+	addq	%r15, %r8
+	xorq	%rbx, %r10 
+	orq	%r14, %rsi
+	andq	%r14, %r12
+	andq	%r13, %rsi
+	xorl	%ecx, %r10d
+	movl	%r8d, %ecx
+	orq	%rsi, %r12
+	roll	%r10d
+	roll	$5, %ecx
+	movq	%r10, 48(%rdi)
+	addq	%r12, %rcx
+	movq	48(%rdi), %rax
+	movq	32(%rdi), %r12
+	movq	-8(%rdi), %rdx
+	addq	%r11, %rcx
+	movq	72(%rdi), %rbx
+	movq	56(%rdi), %rsi
+	roll	$30, %r9d
+	addq	%rax, %rcx
+	movl	%r8d, %r10d
+	movq	%r8, %r11
+	xorq	%rdx, %r12 
+	addq	%r15, %rcx
+	orq	%r9, %r8
+	xorq	%rbx, %r12 
+	andq	%r14, %r8
+	andq	%r9, %r11
+	xorl	%esi, %r12d
+	movl	%ecx, %esi
+	orq	%r8, %r11
+	roll	%r12d
+	roll	$5, %esi
+	roll	$30, %r10d
+	movq	%r12, 56(%rdi)
+	addq	%r11, %rsi
+	movq	56(%rdi), %rax
+	movq	40(%rdi), %r11
+	movq	(%rdi), %rdx
+	addq	%r13, %rsi
+	movq	-48(%rdi), %rbx
+	movq	64(%rdi), %r8
+	movq	%rcx, %r13
+	addq	%rax, %rsi
+	andq	%r10, %r13
+	movl	%ecx, %r12d
+	xorq	%rdx, %r11 
+	addq	%r15, %rsi
+	xorq	%rbx, %r11 
+	xorl	%r8d, %r11d
+	movl	%esi, %r8d
+	roll	%r11d
+	roll	$5, %r8d
+	orq	%r10, %rcx
+	andq	%r9, %rcx
+	movq	%r11, 64(%rdi)
+	movq	64(%rdi), %rax
+	orq	%rcx, %r13
+	roll	$30, %r12d
+	movl	%esi, %r11d
+	addq	%r13, %r8
+	movq	48(%rdi), %r13
+	movq	8(%rdi), %rdx
+	movq	-40(%rdi), %rbx
+	addq	%r14, %r8
+	movq	72(%rdi), %rcx
+	addq	%rax, %r8
+	movq	%rsi, %r14
+	orq	%r12, %rsi
+	xorq	%rdx, %r13 
+	addq	%r15, %r8
+	andq	%r10, %rsi
+	xorq	%rbx, %r13 
+	andq	%r12, %r14
+	roll	$30, %r11d
+	xorl	%ecx, %r13d
+	movl	%r8d, %ecx
+	orq	%rsi, %r14
+	roll	%r13d
+	roll	$5, %ecx
+	movq	%r13, 72(%rdi)
+	addq	%r14, %rcx
+	movq	72(%rdi), %rax
+	movq	56(%rdi), %r14
+	movq	16(%rdi), %rdx
+	addq	%r9, %rcx
+	movq	-32(%rdi), %rbx
+	movq	-48(%rdi), %rsi
+	movl	%r8d, %r13d
+	addq	%rax, %rcx
+	movq	%r8, %r9
+	orq	%r11, %r8
+	xorq	%rdx, %r14 
+	addq	%r15, %rcx
+	andq	%r12, %r8
+	xorq	%rbx, %r14 
+	andq	%r11, %r9
+	xorl	%esi, %r14d
+	movl	%ecx, %esi
+	orq	%r8, %r9
+	roll	$5, %esi
+	roll	%r14d
+	addq	%r9, %rsi
+	movq	%r14, -48(%rdi)
+	movq	-48(%rdi), %rax
+	addq	%r10, %rsi
+	movq	64(%rdi), %r10
+	movq	24(%rdi), %rdx
+	movq	-24(%rdi), %rbx
+	movq	-40(%rdi), %r8
+	movl	%ecx, %r14d
+	addq	%rax, %rsi
+	roll	$30, %r13d
+	movq	%rcx, %r9
+	xorq	%rdx, %r10 
+	addq	%r15, %rsi
+	orq	%r13, %rcx
+	xorq	%rbx, %r10 
+	andq	%r11, %rcx
+	andq	%r13, %r9
+	xorl	%r8d, %r10d
+	movl	%esi, %r8d
+	orq	%rcx, %r9
+	roll	$5, %r8d
+	roll	%r10d
+	roll	$30, %r14d
+	addq	%r9, %r8
+	movq	%r10, -40(%rdi)
+	movq	-40(%rdi), %rax
+	addq	%r12, %r8
+	movq	72(%rdi), %r12
+	movq	32(%rdi), %rdx
+	movq	-16(%rdi), %rbx
+	movq	-32(%rdi), %rcx
+	movl	%esi, %r10d
+	addq	%rax, %r8
+	movq	%rsi, %r9
+	orq	%r14, %rsi
+	xorq	%rdx, %r12 
+	addq	%r15, %r8
+	andq	%r13, %rsi
+	xorq	%rbx, %r12 
+	andq	%r14, %r9
+	roll	$30, %r10d
+	xorl	%ecx, %r12d
+	movl	%r8d, %ecx
+	orq	%rsi, %r9
+	roll	$5, %ecx
+	roll	%r12d
+	addq	%r9, %rcx
+	movq	%r12, -32(%rdi)
+	movq	-32(%rdi), %rax
+	addq	%r11, %rcx
+	movq	-48(%rdi), %r11
+	movq	40(%rdi), %rdx
+	movq	-8(%rdi), %rbx
+	movq	-24(%rdi), %rsi
+	movl	%r8d, %r12d
+	addq	%rax, %rcx
+	movq	%r8, %r9
+	xorq	%rdx, %r11 
+	addq	%r15, %rcx
+	xorq	%rbx, %r11 
+	xorl	%esi, %r11d
+	orq	%r10, %r8
+	andq	%r10, %r9
+	andq	%r14, %r8
+	movl	%ecx, %esi
+	roll	%r11d
+	orq	%r8, %r9
+	roll	$5, %esi
+	movq	%r11, -24(%rdi)
+	addq	%r9, %rsi
+	movq	-24(%rdi), %rax
+	roll	$30, %r12d
+	addq	%r13, %rsi
+	movq	-40(%rdi), %r13
+	movq	48(%rdi), %rdx
+	movq	(%rdi), %rbx
+	movq	-16(%rdi), %r8
+	movl	%ecx, %r11d
+	addq	%rax, %rsi
+	movq	%rcx, %r9
+	orq	%r12, %rcx
+	xorq	%rdx, %r13 
+	addq	%r15, %rsi
+	andq	%r10, %rcx
+	xorq	%rbx, %r13 
+	andq	%r12, %r9
+	roll	$30, %r11d
+	xorl	%r8d, %r13d
+	movl	%esi, %r8d
+	orq	%rcx, %r9
+	roll	%r13d
+	roll	$5, %r8d
+	movq	%r13, -16(%rdi)
+	addq	%r9, %r8
+	movq	-16(%rdi), %rax
+	movq	-32(%rdi), %r9
+	movq	56(%rdi), %rdx
+	addq	%r14, %r8
+	movq	8(%rdi), %rcx
+	movq	-8(%rdi), %rbx
+	movl	%esi, %r13d
+	addq	%rax, %r8
+	movq	%rsi, %r14
+	orq	%r11, %rsi
+	xorq	%rdx, %r9 
+	addq	%r15, %r8
+	andq	%r11, %r14
+	xorq	%rcx, %r9 
+	xorl	%ebx, %r9d
+	movl	%r8d, %ebx
+	roll	%r9d
+	roll	$5, %ebx
+	andq	%r12, %rsi
+	orq	%rsi, %r14
+	movq	%r9, -8(%rdi)
+	movq	-8(%rdi), %rax
+	addq	%r14, %rbx
+	movq	-24(%rdi), %r14
+	movq	64(%rdi), %rdx
+	movq	16(%rdi), %rcx
+	addq	%r10, %rbx
+	movq	(%rdi), %rsi
+	roll	$30, %r13d
+	addq	%rax, %rbx
+	movl	%r8d, %r9d
+	xorq	%rdx, %r14 
+	addq	%r15, %rbx
+	movq	%r8, %r10
+	xorq	%rcx, %r14 
+	orq	%r13, %r8
+	andq	%r13, %r10
+	andq	%r11, %r8
+	xorl	%esi, %r14d
+	movl	%ebx, %esi
+	orq	%r8, %r10
+	roll	$5, %esi
+	roll	%r14d
+	addq	%r10, %rsi
+	movq	%r14, (%rdi)
+	movq	(%rdi), %rax
+	addq	%r12, %rsi
+	movq	-16(%rdi), %r12
+	movq	72(%rdi), %rdx
+	movq	24(%rdi), %rcx
+	movq	8(%rdi), %r8
+	roll	$30, %r9d
+	addq	%rax, %rsi
+	movl	%ebx, %r14d
+	movq	%rbx, %r10
+	xorq	%rdx, %r12 
+	addq	%r15, %rsi
+	orq	%r9, %rbx
+	xorq	%rcx, %r12 
+	andq	%r13, %rbx
+	andq	%r9, %r10
+	xorl	%r8d, %r12d
+	movl	%esi, %r8d
+	orq	%rbx, %r10
+	roll	%r12d
+	roll	$5, %r8d
+	movq	%r12, 8(%rdi)
+	movq	8(%rdi), %rax
+	addq	%r10, %r8
+	movq	-8(%rdi), %rbx
+	movq	-48(%rdi), %rdx
+	addq	%r11, %r8
+	movq	32(%rdi), %r11
+	movq	16(%rdi), %rcx
+	movl	%esi, %r12d
+	addq	%rax, %r8
+	movq	%rsi, %r10
+	addq	%r15, %r8
+	xorq	%rdx, %rbx 
+	roll	$30, %r14d
+	xorq	%r11, %rbx 
+	orq	%r14, %rsi
+	andq	%r14, %r10
+	xorl	%ecx, %ebx
+	andq	%r9, %rsi
+	movl	%r8d, %ecx
+	roll	%ebx
+	orq	%rsi, %r10
+	roll	$5, %ecx
+	movq	%rbx, 16(%rdi)
+	movq	16(%rdi), %rsi
+	addq	%r10, %rcx
+	movq	(%rdi), %r11
+	movq	-40(%rdi), %rax
+	addq	%r13, %rcx
+	movq	40(%rdi), %rdx
+	movq	24(%rdi), %r13
+	roll	$30, %r12d
+	addq	%rsi, %rcx
+	movl	%r8d, %ebx
+	movq	%r8, %r10
+	xorq	%rax, %r11 
+	addq	%r15, %rcx
+	orq	%r12, %r8
+	xorq	%rdx, %r11 
+	andq	%r14, %r8
+	andq	%r12, %r10
+	xorl	%r13d, %r11d
+	movl	%ecx, %r13d
+	orq	%r8, %r10
+	roll	%r11d
+	roll	$5, %r13d
+	roll	$30, %ebx
+	movq	%r11, 24(%rdi)
+	addq	%r10, %r13
+	movq	24(%rdi), %rsi
+	movq	8(%rdi), %r10
+	movq	-32(%rdi), %rax
+	addq	%r9, %r13
+	movq	48(%rdi), %rdx
+	movq	32(%rdi), %r8
+	movl	%ecx, %r11d
+	addq	%rsi, %r13
+	movq	%rcx, %r9
+	xorq	%rax, %r10 
+	addq	%r15, %r13
+	xorq	%rdx, %r10 
+	xorl	%r8d, %r10d
+	movl	%r13d, %r8d
+	roll	%r10d
+	orq	%rbx, %rcx
+	andq	%rbx, %r9
+	movq	%r10, 32(%rdi)
+	andq	%r12, %rcx
+	movl	%r13d, %r10d
+	orq	%rcx, %r9
+	roll	$5, %r10d
+	movq	32(%rdi), %rsi
+	addq	%r9, %r10
+	roll	$30, %r11d
+	movq	%r13, %rcx
+	addq	%r14, %r10
+	movq	16(%rdi), %r14
+	movq	-24(%rdi), %rax
+	movq	56(%rdi), %rdx
+	movq	40(%rdi), %r9
+	addq	%rsi, %r10
+	addq	%r15, %r10
+	orq	%r11, %r13
+	andq	%r11, %rcx
+	xorq	%rax, %r14 
+	andq	%rbx, %r13
+	xorq	%rdx, %r14 
+	orq	%r13, %rcx
+	xorl	%r9d, %r14d
+	movl	%r10d, %r9d
+	roll	%r14d
+	roll	$5, %r9d
+	movq	%r14, 40(%rdi)
+	movq	40(%rdi), %rsi
+	addq	%rcx, %r9
+	movq	24(%rdi), %r13
+	addq	%r12, %r9
+	movq	-16(%rdi), %r12
+	movq	64(%rdi), %rax
+	movl	%r10d, %r14d
+	addq	%rsi, %r9
+	movl	%r8d, %esi
+	addq	%r15, %r9
+	movq	48(%rdi), %r15
+	xorq	%r12, %r13 
+	roll	$30, %esi
+	xorq	%rax, %r13 
+	xorq	%rsi, %r10 
+	xorl	%r15d, %r13d
+	movl	%r9d, %r15d
+	xorq	%r11, %r10 
+	roll	$5, %r15d
+	roll	%r13d
+	addq	%r10, %r15
+	movq	%r13, 48(%rdi)
+	movq	48(%rdi), %r10
+	addq	%rbx, %r15
+	movq	32(%rdi), %rbx
+	movq	-8(%rdi), %r8
+	movq	72(%rdi), %rdx
+	movq	56(%rdi), %rcx
+	roll	$30, %r14d
+	addq	%r10, %r15
+	movl	$3395469782, %r10d
+	movl	%r9d, %r13d
+	xorq	%r8, %rbx 
+	addq	%r10, %r15
+	xorq	%r14, %r9 
+	xorq	%rdx, %rbx 
+	xorq	%rsi, %r9 
+	roll	$30, %r13d
+	xorl	%ecx, %ebx
+	movl	%r15d, %ecx
+	roll	%ebx
+	roll	$5, %ecx
+	movq	%rbx, 56(%rdi)
+	addq	%r9, %rcx
+	movq	56(%rdi), %r12
+	movq	40(%rdi), %r9
+	movq	(%rdi), %rax
+	addq	%r11, %rcx
+	movq	-48(%rdi), %r8
+	movq	64(%rdi), %r11
+	movl	%r15d, %ebx
+	addq	%r12, %rcx
+	xorq	%r13, %r15 
+	roll	$30, %ebx
+	xorq	%rax, %r9 
+	addq	%r10, %rcx
+	xorq	%r14, %r15 
+	xorq	%r8, %r9 
+	xorl	%r11d, %r9d
+	movl	%ecx, %r11d
+	roll	%r9d
+	roll	$5, %r11d
+	movq	%r9, 64(%rdi)
+	addq	%r15, %r11
+	movq	64(%rdi), %rdx
+	movq	48(%rdi), %r15
+	movq	8(%rdi), %r12
+	addq	%rsi, %r11
+	movq	-40(%rdi), %rax
+	movq	72(%rdi), %r8
+	movl	%ecx, %r9d
+	addq	%rdx, %r11
+	xorq	%r12, %r15 
+	addq	%r10, %r11
+	xorq	%rax, %r15 
+	xorl	%r8d, %r15d
+	movl	%r11d, %r8d
+	roll	%r15d
+	roll	$5, %r8d
+	xorq	%rbx, %rcx 
+	xorq	%r13, %rcx 
+	movq	%r15, 72(%rdi)
+	movq	72(%rdi), %rsi
+	addq	%rcx, %r8
+	movq	56(%rdi), %r12
+	movq	16(%rdi), %rcx
+	movq	-32(%rdi), %rdx
+	addq	%r14, %r8
+	movq	-48(%rdi), %r14
+	addq	%rsi, %r8
+	roll	$30, %r9d
+	movl	%r11d, %r15d
+	xorq	%rcx, %r12 
+	addq	%r10, %r8
+	xorq	%r9, %r11 
+	xorq	%rdx, %r12 
+	xorq	%rbx, %r11 
+	roll	$30, %r15d
+	xorl	%r14d, %r12d
+	movl	%r8d, %r14d
+	roll	$5, %r14d
+	roll	%r12d
+	addq	%r11, %r14
+	movq	%r12, -48(%rdi)
+	movq	-48(%rdi), %rax
+	addq	%r13, %r14
+	movq	64(%rdi), %r13
+	movq	24(%rdi), %rsi
+	movq	-24(%rdi), %rcx
+	movq	-40(%rdi), %r11
+	movl	%r8d, %r12d
+	addq	%rax, %r14
+	xorq	%r15, %r8 
+	roll	$30, %r12d
+	xorq	%rsi, %r13 
+	addq	%r10, %r14
+	xorq	%r9, %r8 
+	xorq	%rcx, %r13 
+	xorl	%r11d, %r13d
+	movl	%r14d, %r11d
+	roll	$5, %r11d
+	roll	%r13d
+	addq	%r8, %r11
+	movq	%r13, -40(%rdi)
+	movq	-40(%rdi), %rdx
+	addq	%rbx, %r11
+	movq	72(%rdi), %rbx
+	movq	32(%rdi), %rax
+	movq	-16(%rdi), %rsi
+	movq	-32(%rdi), %r8
+	movl	%r14d, %r13d
+	addq	%rdx, %r11
+	xorq	%rax, %rbx 
+	addq	%r10, %r11
+	xorq	%rsi, %rbx 
+	xorl	%r8d, %ebx
+	xorq	%r12, %r14 
+	movl	%r11d, %r8d
+	xorq	%r15, %r14 
+	roll	%ebx
+	roll	$5, %r8d
+	movq	%rbx, -32(%rdi)
+	addq	%r14, %r8
+	movq	-32(%rdi), %rcx
+	movq	-48(%rdi), %r14
+	movq	40(%rdi), %rdx
+	addq	%r9, %r8
+	movq	-8(%rdi), %rax
+	movq	-24(%rdi), %r9
+	roll	$30, %r13d
+	addq	%rcx, %r8
+	movl	%r11d, %ebx
+	xorq	%r13, %r11 
+	xorq	%rdx, %r14 
+	addq	%r10, %r8
+	xorq	%r12, %r11 
+	xorq	%rax, %r14 
+	roll	$30, %ebx
+	xorl	%r9d, %r14d
+	movl	%r8d, %r9d
+	roll	$5, %r9d
+	roll	%r14d
+	addq	%r11, %r9
+	movq	%r14, -24(%rdi)
+	movq	-24(%rdi), %rsi
+	addq	%r15, %r9
+	movq	-40(%rdi), %r15
+	movq	48(%rdi), %rcx
+	movq	(%rdi), %rdx
+	movq	-16(%rdi), %r11
+	movl	%r8d, %r14d
+	addq	%rsi, %r9
+	xorq	%rbx, %r8 
+	xorq	%rcx, %r15 
+	addq	%r10, %r9
+	xorq	%r13, %r8 
+	xorq	%rdx, %r15 
+	xorl	%r11d, %r15d
+	movl	%r9d, %r11d
+	roll	%r15d
+	roll	$5, %r11d
+	movq	%r15, -16(%rdi)
+	addq	%r8, %r11
+	movq	-16(%rdi), %rax
+	addq	%r12, %r11
+	movq	-32(%rdi), %r12
+	movq	56(%rdi), %rsi
+	movq	8(%rdi), %rcx
+	movq	-8(%rdi), %r8
+	movl	%r9d, %r15d
+	addq	%rax, %r11
+	addq	%r10, %r11
+	roll	$30, %r14d
+	xorq	%rsi, %r12 
+	xorq	%rcx, %r12 
+	xorq	%r14, %r9 
+	roll	$30, %r15d
+	xorl	%r8d, %r12d
+	movl	%r11d, %r8d
+	xorq	%rbx, %r9 
+	roll	$5, %r8d
+	roll	%r12d
+	addq	%r9, %r8
+	movq	%r12, -8(%rdi)
+	movq	-8(%rdi), %rdx
+	addq	%r13, %r8
+	movq	-24(%rdi), %r13
+	movq	64(%rdi), %rax
+	movq	16(%rdi), %rsi
+	movq	(%rdi), %rcx
+	movl	%r11d, %r12d
+	addq	%rdx, %r8
+	xorq	%r15, %r11 
+	roll	$30, %r12d
+	xorq	%rax, %r13 
+	addq	%r10, %r8
+	xorq	%r14, %r11 
+	xorq	%rsi, %r13 
+	xorl	%ecx, %r13d
+	movl	%r8d, %ecx
+	roll	$5, %ecx
+	roll	%r13d
+	addq	%r11, %rcx
+	movq	%r13, (%rdi)
+	movq	(%rdi), %r9
+	addq	%rbx, %rcx
+	movq	-16(%rdi), %rbx
+	movq	72(%rdi), %rdx
+	movq	24(%rdi), %rax
+	movq	8(%rdi), %rsi
+	movl	%r8d, %r13d
+	addq	%r9, %rcx
+	xorq	%r12, %r8 
+	xorq	%rdx, %rbx 
+	addq	%r10, %rcx
+	xorq	%r15, %r8 
+	xorq	%rax, %rbx 
+	xorl	%esi, %ebx
+	movl	%ecx, %esi
+	roll	$5, %esi
+	roll	%ebx
+	addq	%r8, %rsi
+	movq	%rbx, 8(%rdi)
+	movq	8(%rdi), %r11
+	addq	%r14, %rsi
+	movq	-8(%rdi), %r14
+	movq	-48(%rdi), %r9
+	movq	32(%rdi), %rdx
+	movq	16(%rdi), %r8
+	roll	$30, %r13d
+	addq	%r11, %rsi
+	movl	%ecx, %ebx
+	xorq	%r13, %rcx 
+	xorq	%r9, %r14 
+	addq	%r10, %rsi
+	xorq	%r12, %rcx 
+	xorq	%rdx, %r14 
+	roll	$30, %ebx
+	xorl	%r8d, %r14d
+	movl	%esi, %r8d
+	roll	$5, %r8d
+	roll	%r14d
+	addq	%rcx, %r8
+	movq	%r14, 16(%rdi)
+	movq	16(%rdi), %rax
+	addq	%r15, %r8
+	movq	(%rdi), %r15
+	movq	-40(%rdi), %r11
+	movq	40(%rdi), %r9
+	movq	24(%rdi), %rcx
+	movl	%esi, %r14d
+	addq	%rax, %r8
+	xorq	%rbx, %rsi 
+	roll	$30, %r14d
+	xorq	%r11, %r15 
+	addq	%r10, %r8
+	xorq	%r13, %rsi 
+	xorq	%r9, %r15 
+	xorl	%ecx, %r15d
+	movl	%r8d, %ecx
+	roll	%r15d
+	roll	$5, %ecx
+	movq	%r15, 24(%rdi)
+	addq	%rsi, %rcx
+	movq	24(%rdi), %rdx
+	movq	8(%rdi), %r11
+	movq	-32(%rdi), %rax
+	addq	%r12, %rcx
+	movq	48(%rdi), %r12
+	movq	32(%rdi), %rsi
+	movl	%r8d, %r15d
+	addq	%rdx, %rcx
+	xorq	%rax, %r11 
+	addq	%r10, %rcx
+	xorq	%r12, %r11 
+	xorl	%esi, %r11d
+	movl	%ecx, %esi
+	roll	%r11d
+	movq	%r11, 32(%rdi)
+	movl	%ecx, %r11d
+	movq	32(%rdi), %r9
+	roll	$5, %r11d
+	xorq	%r14, %r8 
+	movq	16(%rdi), %r12
+	xorq	%rbx, %r8 
+	movq	-24(%rdi), %rdx
+	movq	56(%rdi), %rax
+	addq	%r8, %r11
+	movq	40(%rdi), %r8
+	roll	$30, %r15d
+	addq	%r13, %r11
+	xorq	%r15, %rcx 
+	addq	%r9, %r11
+	xorq	%rdx, %r12 
+	xorq	%r14, %rcx 
+	addq	%r10, %r11
+	xorq	%rax, %r12 
+	xorl	%r8d, %r12d
+	movl	%r11d, %r8d
+	roll	$5, %r8d
+	roll	%r12d
+	addq	%rcx, %r8
+	movq	%r12, 40(%rdi)
+	movq	40(%rdi), %r13
+	addq	%rbx, %r8
+	movq	24(%rdi), %rbx
+	movq	-16(%rdi), %r9
+	movq	64(%rdi), %rdx
+	movq	48(%rdi), %rcx
+	movl	%r11d, %r12d
+	addq	%r13, %r8
+	movl	%esi, %r13d
+	roll	$30, %r12d
+	xorq	%r9, %rbx 
+	addq	%r10, %r8
+	roll	$30, %r13d
+	xorq	%rdx, %rbx 
+	xorq	%r13, %r11 
+	xorl	%ecx, %ebx
+	movl	%r8d, %ecx
+	xorq	%r15, %r11 
+	roll	%ebx
+	roll	$5, %ecx
+	movq	%rbx, 48(%rdi)
+	addq	%r11, %rcx
+	movq	48(%rdi), %rax
+	movq	32(%rdi), %r11
+	movq	-8(%rdi), %rsi
+	addq	%r14, %rcx
+	movq	72(%rdi), %r9
+	movq	56(%rdi), %r14
+	movl	%r8d, %ebx
+	addq	%rax, %rcx
+	xorq	%rsi, %r11 
+	addq	%r10, %rcx
+	xorq	%r9, %r11 
+	xorl	%r14d, %r11d
+	xorq	%r12, %r8 
+	movl	%ecx, %r14d
+	xorq	%r13, %r8 
+	roll	%r11d
+	roll	$5, %r14d
+	movq	%r11, 56(%rdi)
+	addq	%r8, %r14
+	movq	56(%rdi), %rdx
+	movq	40(%rdi), %r8
+	movq	(%rdi), %rax
+	addq	%r15, %r14
+	movq	-48(%rdi), %r15
+	movq	64(%rdi), %rsi
+	roll	$30, %ebx
+	addq	%rdx, %r14
+	movl	%ecx, %r11d
+	xorq	%rbx, %rcx 
+	xorq	%rax, %r8 
+	addq	%r10, %r14
+	xorq	%r12, %rcx 
+	xorq	%r15, %r8 
+	roll	$30, %r11d
+	xorl	%esi, %r8d
+	movl	%r14d, %esi
+	roll	%r8d
+	roll	$5, %esi
+	movq	%r8, 64(%rdi)
+	movq	64(%rdi), %r9
+	addq	%rcx, %rsi
+	movq	48(%rdi), %r15
+	movq	8(%rdi), %rcx
+	addq	%r13, %rsi
+	movq	-40(%rdi), %rdx
+	movq	72(%rdi), %rax
+	movl	%r14d, %r8d
+	addq	%r9, %rsi
+	xorq	%r11, %r14 
+	addq	%r10, %rsi
+	xorq	%rcx, %r15 
+	xorq	%rbx, %r14 
+	xorq	%rdx, %r15 
+	movl	%esi, %r13d
+	xorl	%eax, %r15d
+	roll	$5, %r13d
+	roll	%r15d
+	addq	%r14, %r13
+	movq	%r15, 72(%rdi)
+	addq	%r12, %r13
+	movq	72(%rdi), %r12
+	addq	%r12, %r13
+	addq	%r10, %r13
+	movq	-88(%rdi), %r10
+	roll	$30, %r8d
+	addq	%r13, %r10
+	movq	%r10, -88(%rdi)
+	movq	-80(%rdi), %r9
+	addq	%rsi, %r9
+	movq	%r9, -80(%rdi)
+	movq	-72(%rdi), %rcx
+	addq	%r8, %rcx
+	movq	%rcx, -72(%rdi)
+	movq	-64(%rdi), %rdx
+	addq	%r11, %rdx
+	movq	%rdx, -64(%rdi)
+	movq	-56(%rdi), %rax
+	addq	%rbx, %rax
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	movq	%rax, -56(%rdi)
+	ret
+.LFE7:
+	.size	shaCompress, .-shaCompress
+	.align 16
+.globl SHA1_Update
+	.type	SHA1_Update, @function
+SHA1_Update:
+.LFB5:
+	pushq	%rbp
+.LCFI5:
+	movq	%rsp, %rbp
+.LCFI6:
+	movq	%r13, -24(%rbp)
+.LCFI7:
+	movq	%r14, -16(%rbp)
+.LCFI8:
+	movl	%edx, %r13d
+	movq	%r15, -8(%rbp)
+.LCFI9:
+	movq	%rbx, -40(%rbp)
+.LCFI10:
+	movq	%rdi, %r15
+	movq	%r12, -32(%rbp)
+.LCFI11:
+	subq	$48, %rsp
+.LCFI12:
+	testl	%edx, %edx
+	movq	%rsi, %r14
+	je	.L243
+	movq	64(%rdi), %rdx
+	mov	%r13d, %ecx
+	leaq	(%rdx,%rcx), %rax
+	movq	%rax, 64(%rdi)
+	movl	%edx, %eax
+	andl	$63, %eax
+	movl	%eax, -44(%rbp)
+	jne	.L256
+.L245:
+	cmpl	$63, %r13d
+	jbe	.L253
+	leaq	160(%r15), %rbx
+	.align 16
+.L250:
+	movq	%r14, %rsi
+	subl	$64, %r13d
+	movq	%rbx, %rdi
+	call	shaCompress
+	addq	$64, %r14
+	cmpl	$63, %r13d
+	ja	.L250
+.L253:
+	testl	%r13d, %r13d
+	je	.L243
+	mov	%r13d, %edx
+	movq	%r14, %rsi
+	movq	%r15, %rdi
+	movq	-40(%rbp), %rbx
+	movq	-32(%rbp), %r12
+	movq	-24(%rbp), %r13
+	movq	-16(%rbp), %r14
+	movq	-8(%rbp), %r15
+	leave
+	jmp	memcpy@PLT
+	.align 16
+.L243:
+	movq	-40(%rbp), %rbx
+	movq	-32(%rbp), %r12
+	movq	-24(%rbp), %r13
+	movq	-16(%rbp), %r14
+	movq	-8(%rbp), %r15
+	leave
+	ret
+.L256:
+	movl	$64, %ebx
+	mov	%eax, %edi
+	subl	%eax, %ebx
+	cmpl	%ebx, %r13d
+	cmovb	%r13d, %ebx
+	addq	%r15, %rdi
+	mov	%ebx, %r12d
+	subl	%ebx, %r13d
+	movq	%r12, %rdx
+	addq	%r12, %r14
+	call	memcpy@PLT
+	addl	-44(%rbp), %ebx
+	andl	$63, %ebx
+	jne	.L245
+	leaq	160(%r15), %rdi
+	movq	%r15, %rsi
+	call	shaCompress
+	jmp	.L245
+.LFE5:
+	.size	SHA1_Update, .-SHA1_Update
+	.section	.rodata
+	.align 32
+	.type	bulk_pad.0, @object
+	.size	bulk_pad.0, 64
+bulk_pad.0:
+	.byte	-128
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.text
+	.align 16
+.globl SHA1_End
+	.type	SHA1_End, @function
+SHA1_End:
+.LFB6:
+	pushq	%rbp
+.LCFI13:
+	movq	%rsp, %rbp
+.LCFI14:
+	movq	%r12, -24(%rbp)
+.LCFI15:
+	movq	%r13, -16(%rbp)
+.LCFI16:
+	movq	%rsi, %r13
+	movq	%r14, -8(%rbp)
+.LCFI17:
+	movq	%rbx, -32(%rbp)
+.LCFI18:
+	subq	$32, %rsp
+.LCFI19:
+	movq	64(%rdi), %rbx
+	movq	%rdx, %r14
+	movl	$119, %edx
+	leaq	bulk_pad.0(%rip), %rsi
+	movq	%rdi, %r12
+	movl	%ebx, %r8d
+	salq	$3, %rbx
+	andl	$63, %r8d
+	subl	%r8d, %edx
+	andl	$63, %edx
+	incl	%edx
+	call	SHA1_Update@PLT
+	movq	%rbx, %rdi
+	movq	%r12, %rsi
+	shrq	$32, %rdi
+/APP
+	bswap %edi
+/NO_APP
+	movl	%edi, 56(%r12)
+	leaq	160(%r12), %rdi
+/APP
+	bswap %ebx
+/NO_APP
+	movl	%ebx, 60(%r12)
+	call	shaCompress
+	movl	72(%r12), %esi
+	movl	80(%r12), %ebx
+	movl	88(%r12), %ecx
+	movl	96(%r12), %edx
+	movl	104(%r12), %eax
+	movq	8(%rsp), %r12
+/APP
+	bswap %ebx
+	bswap %esi
+/NO_APP
+	movl	%ebx, 4(%r13)
+	movl	%esi, (%r13)
+/APP
+	bswap %ecx
+	bswap %edx
+/NO_APP
+	movl	%ecx, 8(%r13)
+	movl	%edx, 12(%r13)
+/APP
+	bswap %eax
+/NO_APP
+	movq	(%rsp), %rbx
+	movl	%eax, 16(%r13)
+        cmpq    $0, %r14
+        je      .L133
+	movl	$20, (%r14)
+.L133:
+	movq	16(%rsp), %r13
+	movq	24(%rsp), %r14
+	leave
+	ret
+.LFE6:
+	.size	SHA1_End, .-SHA1_End
+	.align 16
+.globl SHA1_NewContext
+	.type	SHA1_NewContext, @function
+SHA1_NewContext:
+.LFB8:
+	movl	$248, %edi
+	jmp	PORT_Alloc_Util@PLT
+.LFE8:
+	.size	SHA1_NewContext, .-SHA1_NewContext
+	.align 16
+.globl SHA1_DestroyContext
+	.type	SHA1_DestroyContext, @function
+SHA1_DestroyContext:
+.LFB9:
+	pushq	%rbp
+.LCFI20:
+	movl	$248, %edx
+	movq	%rsp, %rbp
+.LCFI21:
+	movq	%rbx, -16(%rbp)
+.LCFI22:
+	movq	%r12, -8(%rbp)
+.LCFI23:
+	movl	%esi, %ebx
+	subq	$16, %rsp
+.LCFI24:
+	xorl	%esi, %esi
+	movq	%rdi, %r12
+	call	memset@PLT
+	testl	%ebx, %ebx
+	jne	.L268
+	movq	(%rsp), %rbx
+	movq	8(%rsp), %r12
+	leave
+	ret
+	.align 16
+.L268:
+	movq	%r12, %rdi
+	movq	(%rsp), %rbx
+	movq	8(%rsp), %r12
+	leave
+	jmp	PORT_Free_Util@PLT
+.LFE9:
+	.size	SHA1_DestroyContext, .-SHA1_DestroyContext
+	.align 16
+.globl SHA1_HashBuf
+	.type	SHA1_HashBuf, @function
+SHA1_HashBuf:
+.LFB10:
+	pushq	%rbp
+.LCFI25:
+	movq	%rsp, %rbp
+.LCFI26:
+	movq	%rbx, -32(%rbp)
+.LCFI27:
+	leaq	-288(%rbp), %rbx
+	movq	%r12, -24(%rbp)
+.LCFI28:
+	movq	%r13, -16(%rbp)
+.LCFI29:
+	movq	%r14, -8(%rbp)
+.LCFI30:
+	movq	%rsi, %r13
+	subq	$304, %rsp
+.LCFI31:
+	movq	%rdi, %r14
+	movl	%edx, %r12d
+	movq	%rbx, %rdi
+	call	SHA1_Begin@PLT
+	movl	%r12d, %edx
+	movq	%r13, %rsi
+	movq	%rbx, %rdi
+	call	SHA1_Update@PLT
+	leaq	-292(%rbp), %rdx
+	movq	%r14, %rsi
+	movq	%rbx, %rdi
+	movl	$20, %ecx
+	call	SHA1_End@PLT
+	movq	-32(%rbp), %rbx
+	movq	-24(%rbp), %r12
+	xorl	%eax, %eax
+	movq	-16(%rbp), %r13
+	movq	-8(%rbp), %r14
+	leave
+	ret
+.LFE10:
+	.size	SHA1_HashBuf, .-SHA1_HashBuf
+	.align 16
+.globl SHA1_Hash
+	.type	SHA1_Hash, @function
+SHA1_Hash:
+.LFB11:
+	pushq	%rbp
+.LCFI32:
+	movq	%rsp, %rbp
+.LCFI33:
+	movq	%rbx, -16(%rbp)
+.LCFI34:
+	movq	%r12, -8(%rbp)
+.LCFI35:
+	movq	%rsi, %rbx
+	subq	$16, %rsp
+.LCFI36:
+	movq	%rdi, %r12
+	movq	%rsi, %rdi
+	call	strlen@PLT
+	movq	%rbx, %rsi
+	movq	%r12, %rdi
+	movq	(%rsp), %rbx
+	movq	8(%rsp), %r12
+	leave
+	movl	%eax, %edx
+	jmp	SHA1_HashBuf@PLT
+.LFE11:
+	.size	SHA1_Hash, .-SHA1_Hash
+	.align 16
+.globl SHA1_FlattenSize
+	.type	SHA1_FlattenSize, @function
+SHA1_FlattenSize:
+.LFB12:
+	movl	$248, %eax
+	ret
+.LFE12:
+	.size	SHA1_FlattenSize, .-SHA1_FlattenSize
+	.align 16
+.globl SHA1_Flatten
+	.type	SHA1_Flatten, @function
+SHA1_Flatten:
+.LFB13:
+	pushq	%rbp
+.LCFI37:
+	movq	%rsi, %rax
+	movl	$248, %edx
+	movq	%rdi, %rsi
+	movq	%rax, %rdi
+	movq	%rsp, %rbp
+.LCFI38:
+	call	memcpy@PLT
+	leave
+	xorl	%eax, %eax
+	ret
+.LFE13:
+	.size	SHA1_Flatten, .-SHA1_Flatten
+	.align 16
+.globl SHA1_Resurrect
+	.type	SHA1_Resurrect, @function
+SHA1_Resurrect:
+.LFB14:
+	pushq	%rbp
+.LCFI39:
+	movq	%rsp, %rbp
+.LCFI40:
+	movq	%rbx, -16(%rbp)
+.LCFI41:
+	movq	%r12, -8(%rbp)
+.LCFI42:
+	subq	$16, %rsp
+.LCFI43:
+	movq	%rdi, %r12
+	call	SHA1_NewContext@PLT
+	movq	%rax, %rbx
+	xorl	%eax, %eax
+	testq	%rbx, %rbx
+	je	.L273
+	movl	$248, %edx
+	movq	%r12, %rsi
+	movq	%rbx, %rdi
+	call	memcpy@PLT
+	movq	%rbx, %rax
+.L273:
+	movq	(%rsp), %rbx
+	movq	8(%rsp), %r12
+	leave
+	ret
+.LFE14:
+	.size	SHA1_Resurrect, .-SHA1_Resurrect
+	.align 16
+.globl SHA1_Clone
+	.type	SHA1_Clone, @function
+SHA1_Clone:
+.LFB15:
+	movl	$248, %edx
+	jmp	memcpy@PLT
+.LFE15:
+	.size	SHA1_Clone, .-SHA1_Clone
+	.align 16
+.globl SHA1_TraceState
+	.type	SHA1_TraceState, @function
+SHA1_TraceState:
+.LFB16:
+	movl	$-5992, %edi
+	jmp	PORT_SetError_Util@PLT
+.LFE16:
+	.size	SHA1_TraceState, .-SHA1_TraceState
+	.align 16
+.globl SHA1_EndRaw
+        .type   SHA1_EndRaw, @function
+SHA1_EndRaw:
+.LFB50:
+        movq    72(%rdi), %rax
+/APP
+        bswap %eax
+/NO_APP
+        movl    %eax, (%rsi)
+        movq    80(%rdi), %rax
+/APP
+        bswap %eax
+/NO_APP
+        movl    %eax, 4(%rsi)
+        movq    88(%rdi), %rax
+/APP
+        bswap %eax
+/NO_APP
+        movl    %eax, 8(%rsi)
+        movq    96(%rdi), %rax
+/APP
+        bswap %eax
+/NO_APP
+        movl    %eax, 12(%rsi)
+        movq    104(%rdi), %rax
+/APP
+        bswap %eax
+/NO_APP
+        testq   %rdx, %rdx
+        movl    %eax, 16(%rsi)
+        je      .L14
+        movl    $20, (%rdx)
+.L14:
+        rep
+        ret
+.LFE50:
+        .size   SHA1_EndRaw, .-SHA1_EndRaw
diff --git a/security/nss/lib/freebl/sha256.h b/security/nss/lib/freebl/sha256.h
new file mode 100644
index 000000000..c65ca152d
--- /dev/null
+++ b/security/nss/lib/freebl/sha256.h
@@ -0,0 +1,19 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHA_256_H_
+#define _SHA_256_H_
+
+#include "prtypes.h"
+
+struct SHA256ContextStr {
+    union {
+        PRUint32 w[64]; /* message schedule, input buffer, plus 48 words */
+        PRUint8 b[256];
+    } u;
+    PRUint32 h[8];           /* 8 state variables */
+    PRUint32 sizeHi, sizeLo; /* 64-bit count of hashed bytes. */
+};
+
+#endif /* _SHA_256_H_ */
diff --git a/security/nss/lib/freebl/sha512.c b/security/nss/lib/freebl/sha512.c
new file mode 100644
index 000000000..528f884b2
--- /dev/null
+++ b/security/nss/lib/freebl/sha512.c
@@ -0,0 +1,1655 @@
+/*
+ * sha512.c - implementation of SHA224, SHA256, SHA384 and SHA512
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prcpucfg.h"
+#if defined(NSS_X86) || defined(SHA_NO_LONG_LONG)
+#define NOUNROLL512 1
+#undef HAVE_LONG_LONG
+#endif
+#include "prtypes.h" /* for PRUintXX */
+#include "prlong.h"
+#include "secport.h" /* for PORT_XXX */
+#include "blapi.h"
+#include "sha256.h" /* for struct SHA256ContextStr */
+
+/* ============= Common constants and defines ======================= */
+
+#define W ctx->u.w
+#define B ctx->u.b
+#define H ctx->h
+
+#define SHR(x, n) (x >> n)
+#define SHL(x, n) (x << n)
+#define Ch(x, y, z) ((x & y) ^ (~x & z))
+#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
+#define SHA_MIN(a, b) (a < b ? a : b)
+
+/* Padding used with all flavors of SHA */
+static const PRUint8 pad[240] = {
+    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    /* compiler will fill the rest in with zeros */
+};
+
+/* ============= SHA256 implementation ================================== */
+
+/* SHA-256 constants, K256. */
+static const PRUint32 K256[64] = {
+    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+/* SHA-256 initial hash values */
+static const PRUint32 H256[8] = {
+    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+
+#if defined(IS_LITTLE_ENDIAN)
+#if (_MSC_VER >= 1300)
+#include <stdlib.h>
+#pragma intrinsic(_byteswap_ulong)
+#define SHA_HTONL(x) _byteswap_ulong(x)
+#elif defined(_MSC_VER) && defined(NSS_X86_OR_X64)
+#ifndef FORCEINLINE
+#if (_MSC_VER >= 1200)
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE __inline
+#endif
+#endif
+#define FASTCALL __fastcall
+
+static FORCEINLINE PRUint32 FASTCALL
+swap4b(PRUint32 dwd)
+{
+    __asm {
+        mov   eax,dwd
+    bswap eax
+    }
+}
+
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__GNUC__) && defined(NSS_X86_OR_X64)
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+    __asm__("bswap %0"
+            : "+r"(value));
+    return (value);
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__GNUC__) && (defined(__thumb2__) ||         \
+                            (!defined(__thumb__) &&        \
+                             (defined(__ARM_ARCH_6__) ||   \
+                              defined(__ARM_ARCH_6J__) ||  \
+                              defined(__ARM_ARCH_6K__) ||  \
+                              defined(__ARM_ARCH_6Z__) ||  \
+                              defined(__ARM_ARCH_6ZK__) || \
+                              defined(__ARM_ARCH_6T2__) || \
+                              defined(__ARM_ARCH_7__) ||   \
+                              defined(__ARM_ARCH_7A__) ||  \
+                              defined(__ARM_ARCH_7R__))))
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+    PRUint32 ret;
+    __asm__("rev %0, %1"
+            : "=r"(ret)
+            : "r"(value));
+    return ret;
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#else
+#define SWAP4MASK 0x00FF00FF
+static PRUint32
+swap4b(PRUint32 value)
+{
+    PRUint32 t1 = (value << 16) | (value >> 16);
+    return ((t1 & SWAP4MASK) << 8) | ((t1 >> 8) & SWAP4MASK);
+}
+#define SHA_HTONL(x) swap4b(x)
+#endif
+#define BYTESWAP4(x) x = SHA_HTONL(x)
+#endif /* defined(IS_LITTLE_ENDIAN) */
+
+#if defined(_MSC_VER)
+#pragma intrinsic(_lrotr, _lrotl)
+#define ROTR32(x, n) _lrotr(x, n)
+#define ROTL32(x, n) _lrotl(x, n)
+#else
+#define ROTR32(x, n) ((x >> n) | (x << ((8 * sizeof x) - n)))
+#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n)))
+#endif
+
+/* Capitol Sigma and lower case sigma functions */
+#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
+#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
+#define s0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ SHR(x, 3))
+#define s1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ SHR(x, 10))
+
+SHA256Context *
+SHA256_NewContext(void)
+{
+    SHA256Context *ctx = PORT_New(SHA256Context);
+    return ctx;
+}
+
+void
+SHA256_DestroyContext(SHA256Context *ctx, PRBool freeit)
+{
+    memset(ctx, 0, sizeof *ctx);
+    if (freeit) {
+        PORT_Free(ctx);
+    }
+}
+
+void
+SHA256_Begin(SHA256Context *ctx)
+{
+    memset(ctx, 0, sizeof *ctx);
+    memcpy(H, H256, sizeof H256);
+}
+
+static void
+SHA256_Compress(SHA256Context *ctx)
+{
+    {
+#if defined(IS_LITTLE_ENDIAN)
+        BYTESWAP4(W[0]);
+        BYTESWAP4(W[1]);
+        BYTESWAP4(W[2]);
+        BYTESWAP4(W[3]);
+        BYTESWAP4(W[4]);
+        BYTESWAP4(W[5]);
+        BYTESWAP4(W[6]);
+        BYTESWAP4(W[7]);
+        BYTESWAP4(W[8]);
+        BYTESWAP4(W[9]);
+        BYTESWAP4(W[10]);
+        BYTESWAP4(W[11]);
+        BYTESWAP4(W[12]);
+        BYTESWAP4(W[13]);
+        BYTESWAP4(W[14]);
+        BYTESWAP4(W[15]);
+#endif
+
+#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16])
+
+/* prepare the "message schedule"   */
+#ifdef NOUNROLL256
+        {
+            int t;
+            for (t = 16; t < 64; ++t) {
+                INITW(t);
+            }
+        }
+#else
+        INITW(16);
+        INITW(17);
+        INITW(18);
+        INITW(19);
+
+        INITW(20);
+        INITW(21);
+        INITW(22);
+        INITW(23);
+        INITW(24);
+        INITW(25);
+        INITW(26);
+        INITW(27);
+        INITW(28);
+        INITW(29);
+
+        INITW(30);
+        INITW(31);
+        INITW(32);
+        INITW(33);
+        INITW(34);
+        INITW(35);
+        INITW(36);
+        INITW(37);
+        INITW(38);
+        INITW(39);
+
+        INITW(40);
+        INITW(41);
+        INITW(42);
+        INITW(43);
+        INITW(44);
+        INITW(45);
+        INITW(46);
+        INITW(47);
+        INITW(48);
+        INITW(49);
+
+        INITW(50);
+        INITW(51);
+        INITW(52);
+        INITW(53);
+        INITW(54);
+        INITW(55);
+        INITW(56);
+        INITW(57);
+        INITW(58);
+        INITW(59);
+
+        INITW(60);
+        INITW(61);
+        INITW(62);
+        INITW(63);
+
+#endif
+#undef INITW
+    }
+    {
+        PRUint32 a, b, c, d, e, f, g, h;
+
+        a = H[0];
+        b = H[1];
+        c = H[2];
+        d = H[3];
+        e = H[4];
+        f = H[5];
+        g = H[6];
+        h = H[7];
+
+#define ROUND(n, a, b, c, d, e, f, g, h)       \
+    h += S1(e) + Ch(e, f, g) + K256[n] + W[n]; \
+    d += h;                                    \
+    h += S0(a) + Maj(a, b, c);
+
+#ifdef NOUNROLL256
+        {
+            int t;
+            for (t = 0; t < 64; t += 8) {
+                ROUND(t + 0, a, b, c, d, e, f, g, h)
+                ROUND(t + 1, h, a, b, c, d, e, f, g)
+                ROUND(t + 2, g, h, a, b, c, d, e, f)
+                ROUND(t + 3, f, g, h, a, b, c, d, e)
+                ROUND(t + 4, e, f, g, h, a, b, c, d)
+                ROUND(t + 5, d, e, f, g, h, a, b, c)
+                ROUND(t + 6, c, d, e, f, g, h, a, b)
+                ROUND(t + 7, b, c, d, e, f, g, h, a)
+            }
+        }
+#else
+        ROUND(0, a, b, c, d, e, f, g, h)
+        ROUND(1, h, a, b, c, d, e, f, g)
+        ROUND(2, g, h, a, b, c, d, e, f)
+        ROUND(3, f, g, h, a, b, c, d, e)
+        ROUND(4, e, f, g, h, a, b, c, d)
+        ROUND(5, d, e, f, g, h, a, b, c)
+        ROUND(6, c, d, e, f, g, h, a, b)
+        ROUND(7, b, c, d, e, f, g, h, a)
+
+        ROUND(8, a, b, c, d, e, f, g, h)
+        ROUND(9, h, a, b, c, d, e, f, g)
+        ROUND(10, g, h, a, b, c, d, e, f)
+        ROUND(11, f, g, h, a, b, c, d, e)
+        ROUND(12, e, f, g, h, a, b, c, d)
+        ROUND(13, d, e, f, g, h, a, b, c)
+        ROUND(14, c, d, e, f, g, h, a, b)
+        ROUND(15, b, c, d, e, f, g, h, a)
+
+        ROUND(16, a, b, c, d, e, f, g, h)
+        ROUND(17, h, a, b, c, d, e, f, g)
+        ROUND(18, g, h, a, b, c, d, e, f)
+        ROUND(19, f, g, h, a, b, c, d, e)
+        ROUND(20, e, f, g, h, a, b, c, d)
+        ROUND(21, d, e, f, g, h, a, b, c)
+        ROUND(22, c, d, e, f, g, h, a, b)
+        ROUND(23, b, c, d, e, f, g, h, a)
+
+        ROUND(24, a, b, c, d, e, f, g, h)
+        ROUND(25, h, a, b, c, d, e, f, g)
+        ROUND(26, g, h, a, b, c, d, e, f)
+        ROUND(27, f, g, h, a, b, c, d, e)
+        ROUND(28, e, f, g, h, a, b, c, d)
+        ROUND(29, d, e, f, g, h, a, b, c)
+        ROUND(30, c, d, e, f, g, h, a, b)
+        ROUND(31, b, c, d, e, f, g, h, a)
+
+        ROUND(32, a, b, c, d, e, f, g, h)
+        ROUND(33, h, a, b, c, d, e, f, g)
+        ROUND(34, g, h, a, b, c, d, e, f)
+        ROUND(35, f, g, h, a, b, c, d, e)
+        ROUND(36, e, f, g, h, a, b, c, d)
+        ROUND(37, d, e, f, g, h, a, b, c)
+        ROUND(38, c, d, e, f, g, h, a, b)
+        ROUND(39, b, c, d, e, f, g, h, a)
+
+        ROUND(40, a, b, c, d, e, f, g, h)
+        ROUND(41, h, a, b, c, d, e, f, g)
+        ROUND(42, g, h, a, b, c, d, e, f)
+        ROUND(43, f, g, h, a, b, c, d, e)
+        ROUND(44, e, f, g, h, a, b, c, d)
+        ROUND(45, d, e, f, g, h, a, b, c)
+        ROUND(46, c, d, e, f, g, h, a, b)
+        ROUND(47, b, c, d, e, f, g, h, a)
+
+        ROUND(48, a, b, c, d, e, f, g, h)
+        ROUND(49, h, a, b, c, d, e, f, g)
+        ROUND(50, g, h, a, b, c, d, e, f)
+        ROUND(51, f, g, h, a, b, c, d, e)
+        ROUND(52, e, f, g, h, a, b, c, d)
+        ROUND(53, d, e, f, g, h, a, b, c)
+        ROUND(54, c, d, e, f, g, h, a, b)
+        ROUND(55, b, c, d, e, f, g, h, a)
+
+        ROUND(56, a, b, c, d, e, f, g, h)
+        ROUND(57, h, a, b, c, d, e, f, g)
+        ROUND(58, g, h, a, b, c, d, e, f)
+        ROUND(59, f, g, h, a, b, c, d, e)
+        ROUND(60, e, f, g, h, a, b, c, d)
+        ROUND(61, d, e, f, g, h, a, b, c)
+        ROUND(62, c, d, e, f, g, h, a, b)
+        ROUND(63, b, c, d, e, f, g, h, a)
+#endif
+
+        H[0] += a;
+        H[1] += b;
+        H[2] += c;
+        H[3] += d;
+        H[4] += e;
+        H[5] += f;
+        H[6] += g;
+        H[7] += h;
+    }
+#undef ROUND
+}
+
+#undef s0
+#undef s1
+#undef S0
+#undef S1
+
+void
+SHA256_Update(SHA256Context *ctx, const unsigned char *input,
+              unsigned int inputLen)
+{
+    unsigned int inBuf = ctx->sizeLo & 0x3f;
+    if (!inputLen)
+        return;
+
+    /* Add inputLen into the count of bytes processed, before processing */
+    if ((ctx->sizeLo += inputLen) < inputLen)
+        ctx->sizeHi++;
+
+    /* if data already in buffer, attemp to fill rest of buffer */
+    if (inBuf) {
+        unsigned int todo = SHA256_BLOCK_LENGTH - inBuf;
+        if (inputLen < todo)
+            todo = inputLen;
+        memcpy(B + inBuf, input, todo);
+        input += todo;
+        inputLen -= todo;
+        if (inBuf + todo == SHA256_BLOCK_LENGTH)
+            SHA256_Compress(ctx);
+    }
+
+    /* if enough data to fill one or more whole buffers, process them. */
+    while (inputLen >= SHA256_BLOCK_LENGTH) {
+        memcpy(B, input, SHA256_BLOCK_LENGTH);
+        input += SHA256_BLOCK_LENGTH;
+        inputLen -= SHA256_BLOCK_LENGTH;
+        SHA256_Compress(ctx);
+    }
+    /* if data left over, fill it into buffer */
+    if (inputLen)
+        memcpy(B, input, inputLen);
+}
+
+void
+SHA256_End(SHA256Context *ctx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    unsigned int inBuf = ctx->sizeLo & 0x3f;
+    unsigned int padLen = (inBuf < 56) ? (56 - inBuf) : (56 + 64 - inBuf);
+    PRUint32 hi, lo;
+
+    hi = (ctx->sizeHi << 3) | (ctx->sizeLo >> 29);
+    lo = (ctx->sizeLo << 3);
+
+    SHA256_Update(ctx, pad, padLen);
+
+#if defined(IS_LITTLE_ENDIAN)
+    W[14] = SHA_HTONL(hi);
+    W[15] = SHA_HTONL(lo);
+#else
+    W[14] = hi;
+    W[15] = lo;
+#endif
+    SHA256_Compress(ctx);
+
+/* now output the answer */
+#if defined(IS_LITTLE_ENDIAN)
+    BYTESWAP4(H[0]);
+    BYTESWAP4(H[1]);
+    BYTESWAP4(H[2]);
+    BYTESWAP4(H[3]);
+    BYTESWAP4(H[4]);
+    BYTESWAP4(H[5]);
+    BYTESWAP4(H[6]);
+    BYTESWAP4(H[7]);
+#endif
+    padLen = PR_MIN(SHA256_LENGTH, maxDigestLen);
+    memcpy(digest, H, padLen);
+    if (digestLen)
+        *digestLen = padLen;
+}
+
+void
+SHA256_EndRaw(SHA256Context *ctx, unsigned char *digest,
+              unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    PRUint32 h[8];
+    unsigned int len;
+
+    memcpy(h, ctx->h, sizeof(h));
+
+#if defined(IS_LITTLE_ENDIAN)
+    BYTESWAP4(h[0]);
+    BYTESWAP4(h[1]);
+    BYTESWAP4(h[2]);
+    BYTESWAP4(h[3]);
+    BYTESWAP4(h[4]);
+    BYTESWAP4(h[5]);
+    BYTESWAP4(h[6]);
+    BYTESWAP4(h[7]);
+#endif
+
+    len = PR_MIN(SHA256_LENGTH, maxDigestLen);
+    memcpy(digest, h, len);
+    if (digestLen)
+        *digestLen = len;
+}
+
+SECStatus
+SHA256_HashBuf(unsigned char *dest, const unsigned char *src,
+               PRUint32 src_length)
+{
+    SHA256Context ctx;
+    unsigned int outLen;
+
+    SHA256_Begin(&ctx);
+    SHA256_Update(&ctx, src, src_length);
+    SHA256_End(&ctx, dest, &outLen, SHA256_LENGTH);
+    memset(&ctx, 0, sizeof ctx);
+
+    return SECSuccess;
+}
+
+SECStatus
+SHA256_Hash(unsigned char *dest, const char *src)
+{
+    return SHA256_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA256_TraceState(SHA256Context *ctx)
+{
+}
+
+unsigned int
+SHA256_FlattenSize(SHA256Context *ctx)
+{
+    return sizeof *ctx;
+}
+
+SECStatus
+SHA256_Flatten(SHA256Context *ctx, unsigned char *space)
+{
+    PORT_Memcpy(space, ctx, sizeof *ctx);
+    return SECSuccess;
+}
+
+SHA256Context *
+SHA256_Resurrect(unsigned char *space, void *arg)
+{
+    SHA256Context *ctx = SHA256_NewContext();
+    if (ctx)
+        PORT_Memcpy(ctx, space, sizeof *ctx);
+    return ctx;
+}
+
+void
+SHA256_Clone(SHA256Context *dest, SHA256Context *src)
+{
+    memcpy(dest, src, sizeof *dest);
+}
+
+/* ============= SHA224 implementation ================================== */
+
+/* SHA-224 initial hash values */
+static const PRUint32 H224[8] = {
+    0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
+    0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
+};
+
+SHA224Context *
+SHA224_NewContext(void)
+{
+    return SHA256_NewContext();
+}
+
+void
+SHA224_DestroyContext(SHA224Context *ctx, PRBool freeit)
+{
+    SHA256_DestroyContext(ctx, freeit);
+}
+
+void
+SHA224_Begin(SHA224Context *ctx)
+{
+    memset(ctx, 0, sizeof *ctx);
+    memcpy(H, H224, sizeof H224);
+}
+
+void
+SHA224_Update(SHA224Context *ctx, const unsigned char *input,
+              unsigned int inputLen)
+{
+    SHA256_Update(ctx, input, inputLen);
+}
+
+void
+SHA224_End(SHA256Context *ctx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH);
+    SHA256_End(ctx, digest, digestLen, maxLen);
+}
+
+void
+SHA224_EndRaw(SHA256Context *ctx, unsigned char *digest,
+              unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH);
+    SHA256_EndRaw(ctx, digest, digestLen, maxLen);
+}
+
+SECStatus
+SHA224_HashBuf(unsigned char *dest, const unsigned char *src,
+               PRUint32 src_length)
+{
+    SHA256Context ctx;
+    unsigned int outLen;
+
+    SHA224_Begin(&ctx);
+    SHA256_Update(&ctx, src, src_length);
+    SHA256_End(&ctx, dest, &outLen, SHA224_LENGTH);
+    memset(&ctx, 0, sizeof ctx);
+
+    return SECSuccess;
+}
+
+SECStatus
+SHA224_Hash(unsigned char *dest, const char *src)
+{
+    return SHA224_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA224_TraceState(SHA224Context *ctx)
+{
+}
+
+unsigned int
+SHA224_FlattenSize(SHA224Context *ctx)
+{
+    return SHA256_FlattenSize(ctx);
+}
+
+SECStatus
+SHA224_Flatten(SHA224Context *ctx, unsigned char *space)
+{
+    return SHA256_Flatten(ctx, space);
+}
+
+SHA224Context *
+SHA224_Resurrect(unsigned char *space, void *arg)
+{
+    return SHA256_Resurrect(space, arg);
+}
+
+void
+SHA224_Clone(SHA224Context *dest, SHA224Context *src)
+{
+    SHA256_Clone(dest, src);
+}
+
+/* ======= SHA512 and SHA384 common constants and defines ================= */
+
+/* common #defines for SHA512 and SHA384 */
+#if defined(HAVE_LONG_LONG)
+#if defined(_MSC_VER)
+#pragma intrinsic(_rotr64, _rotl64)
+#define ROTR64(x, n) _rotr64(x, n)
+#define ROTL64(x, n) _rotl64(x, n)
+#else
+#define ROTR64(x, n) ((x >> n) | (x << (64 - n)))
+#define ROTL64(x, n) ((x << n) | (x >> (64 - n)))
+#endif
+
+#define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
+#define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
+#define s0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SHR(x, 7))
+#define s1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SHR(x, 6))
+
+#if PR_BYTES_PER_LONG == 8
+#define ULLC(hi, lo) 0x##hi##lo##UL
+#elif defined(_MSC_VER)
+#define ULLC(hi, lo) 0x##hi##lo##ui64
+#else
+#define ULLC(hi, lo) 0x##hi##lo##ULL
+#endif
+
+#if defined(IS_LITTLE_ENDIAN)
+#if defined(_MSC_VER)
+#pragma intrinsic(_byteswap_uint64)
+#define SHA_HTONLL(x) _byteswap_uint64(x)
+
+#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64))
+static __inline__ PRUint64
+swap8b(PRUint64 value)
+{
+    __asm__("bswapq %0"
+            : "+r"(value));
+    return (value);
+}
+#define SHA_HTONLL(x) swap8b(x)
+
+#else
+#define SHA_MASK16 ULLC(0000FFFF, 0000FFFF)
+#define SHA_MASK8 ULLC(00FF00FF, 00FF00FF)
+static PRUint64
+swap8b(PRUint64 x)
+{
+    PRUint64 t1 = x;
+    t1 = ((t1 & SHA_MASK8) << 8) | ((t1 >> 8) & SHA_MASK8);
+    t1 = ((t1 & SHA_MASK16) << 16) | ((t1 >> 16) & SHA_MASK16);
+    return (t1 >> 32) | (t1 << 32);
+}
+#define SHA_HTONLL(x) swap8b(x)
+#endif
+#define BYTESWAP8(x) x = SHA_HTONLL(x)
+#endif /* defined(IS_LITTLE_ENDIAN) */
+
+#else /* no long long */
+
+#if defined(IS_LITTLE_ENDIAN)
+#define ULLC(hi, lo)         \
+    {                        \
+        0x##lo##U, 0x##hi##U \
+    }
+#define SHA_HTONLL(x) (BYTESWAP4(x.lo), BYTESWAP4(x.hi), \
+                       x.hi ^= x.lo ^= x.hi ^= x.lo, x)
+#define BYTESWAP8(x)     \
+    do {                 \
+        PRUint32 tmp;    \
+        BYTESWAP4(x.lo); \
+        BYTESWAP4(x.hi); \
+        tmp = x.lo;      \
+        x.lo = x.hi;     \
+        x.hi = tmp;      \
+    } while (0)
+#else
+#define ULLC(hi, lo)         \
+    {                        \
+        0x##hi##U, 0x##lo##U \
+    }
+#endif
+
+#endif
+
+/* SHA-384 and SHA-512 constants, K512. */
+static const PRUint64 K512[80] = {
+#if PR_BYTES_PER_LONG == 8
+    0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
+    0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
+    0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
+    0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
+    0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
+    0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
+    0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
+    0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
+    0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
+    0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
+    0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
+    0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
+    0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
+    0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
+    0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
+    0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
+    0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
+    0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
+    0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
+    0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
+    0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
+    0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
+    0xd192e819d6ef5218UL, 0xd69906245565a910UL,
+    0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
+    0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
+    0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
+    0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
+    0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
+    0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
+    0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
+    0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
+    0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
+    0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
+    0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
+    0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
+    0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
+    0x28db77f523047d84UL, 0x32caab7b40c72493UL,
+    0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
+    0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
+    0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL
+#else
+    ULLC(428a2f98, d728ae22), ULLC(71374491, 23ef65cd),
+    ULLC(b5c0fbcf, ec4d3b2f), ULLC(e9b5dba5, 8189dbbc),
+    ULLC(3956c25b, f348b538), ULLC(59f111f1, b605d019),
+    ULLC(923f82a4, af194f9b), ULLC(ab1c5ed5, da6d8118),
+    ULLC(d807aa98, a3030242), ULLC(12835b01, 45706fbe),
+    ULLC(243185be, 4ee4b28c), ULLC(550c7dc3, d5ffb4e2),
+    ULLC(72be5d74, f27b896f), ULLC(80deb1fe, 3b1696b1),
+    ULLC(9bdc06a7, 25c71235), ULLC(c19bf174, cf692694),
+    ULLC(e49b69c1, 9ef14ad2), ULLC(efbe4786, 384f25e3),
+    ULLC(0fc19dc6, 8b8cd5b5), ULLC(240ca1cc, 77ac9c65),
+    ULLC(2de92c6f, 592b0275), ULLC(4a7484aa, 6ea6e483),
+    ULLC(5cb0a9dc, bd41fbd4), ULLC(76f988da, 831153b5),
+    ULLC(983e5152, ee66dfab), ULLC(a831c66d, 2db43210),
+    ULLC(b00327c8, 98fb213f), ULLC(bf597fc7, beef0ee4),
+    ULLC(c6e00bf3, 3da88fc2), ULLC(d5a79147, 930aa725),
+    ULLC(06ca6351, e003826f), ULLC(14292967, 0a0e6e70),
+    ULLC(27b70a85, 46d22ffc), ULLC(2e1b2138, 5c26c926),
+    ULLC(4d2c6dfc, 5ac42aed), ULLC(53380d13, 9d95b3df),
+    ULLC(650a7354, 8baf63de), ULLC(766a0abb, 3c77b2a8),
+    ULLC(81c2c92e, 47edaee6), ULLC(92722c85, 1482353b),
+    ULLC(a2bfe8a1, 4cf10364), ULLC(a81a664b, bc423001),
+    ULLC(c24b8b70, d0f89791), ULLC(c76c51a3, 0654be30),
+    ULLC(d192e819, d6ef5218), ULLC(d6990624, 5565a910),
+    ULLC(f40e3585, 5771202a), ULLC(106aa070, 32bbd1b8),
+    ULLC(19a4c116, b8d2d0c8), ULLC(1e376c08, 5141ab53),
+    ULLC(2748774c, df8eeb99), ULLC(34b0bcb5, e19b48a8),
+    ULLC(391c0cb3, c5c95a63), ULLC(4ed8aa4a, e3418acb),
+    ULLC(5b9cca4f, 7763e373), ULLC(682e6ff3, d6b2b8a3),
+    ULLC(748f82ee, 5defb2fc), ULLC(78a5636f, 43172f60),
+    ULLC(84c87814, a1f0ab72), ULLC(8cc70208, 1a6439ec),
+    ULLC(90befffa, 23631e28), ULLC(a4506ceb, de82bde9),
+    ULLC(bef9a3f7, b2c67915), ULLC(c67178f2, e372532b),
+    ULLC(ca273ece, ea26619c), ULLC(d186b8c7, 21c0c207),
+    ULLC(eada7dd6, cde0eb1e), ULLC(f57d4f7f, ee6ed178),
+    ULLC(06f067aa, 72176fba), ULLC(0a637dc5, a2c898a6),
+    ULLC(113f9804, bef90dae), ULLC(1b710b35, 131c471b),
+    ULLC(28db77f5, 23047d84), ULLC(32caab7b, 40c72493),
+    ULLC(3c9ebe0a, 15c9bebc), ULLC(431d67c4, 9c100d4c),
+    ULLC(4cc5d4be, cb3e42b6), ULLC(597f299c, fc657e2a),
+    ULLC(5fcb6fab, 3ad6faec), ULLC(6c44198c, 4a475817)
+#endif
+};
+
+struct SHA512ContextStr {
+    union {
+        PRUint64 w[80]; /* message schedule, input buffer, plus 64 words */
+        PRUint32 l[160];
+        PRUint8 b[640];
+    } u;
+    PRUint64 h[8];   /* 8 state variables */
+    PRUint64 sizeLo; /* 64-bit count of hashed bytes. */
+};
+
+/* =========== SHA512 implementation ===================================== */
+
+/* SHA-512 initial hash values */
+static const PRUint64 H512[8] = {
+#if PR_BYTES_PER_LONG == 8
+    0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL,
+    0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL,
+    0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL,
+    0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL
+#else
+    ULLC(6a09e667, f3bcc908), ULLC(bb67ae85, 84caa73b),
+    ULLC(3c6ef372, fe94f82b), ULLC(a54ff53a, 5f1d36f1),
+    ULLC(510e527f, ade682d1), ULLC(9b05688c, 2b3e6c1f),
+    ULLC(1f83d9ab, fb41bd6b), ULLC(5be0cd19, 137e2179)
+#endif
+};
+
+SHA512Context *
+SHA512_NewContext(void)
+{
+    SHA512Context *ctx = PORT_New(SHA512Context);
+    return ctx;
+}
+
+void
+SHA512_DestroyContext(SHA512Context *ctx, PRBool freeit)
+{
+    memset(ctx, 0, sizeof *ctx);
+    if (freeit) {
+        PORT_Free(ctx);
+    }
+}
+
+void
+SHA512_Begin(SHA512Context *ctx)
+{
+    memset(ctx, 0, sizeof *ctx);
+    memcpy(H, H512, sizeof H512);
+}
+
+#if defined(SHA512_TRACE)
+#if defined(HAVE_LONG_LONG)
+#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %016lx, %s = %016lx\n", \
+                                   n, #e, d, #a, h);
+#else
+#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %08x%08x, %s = %08x%08x\n", \
+                                   n, #e, d.hi, d.lo, #a, h.hi, h.lo);
+#endif
+#else
+#define DUMP(n, a, d, e, h)
+#endif
+
+#if defined(HAVE_LONG_LONG)
+
+#define ADDTO(x, y) y += x
+
+#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16])
+
+#define ROUND(n, a, b, c, d, e, f, g, h)       \
+    h += S1(e) + Ch(e, f, g) + K512[n] + W[n]; \
+    d += h;                                    \
+    h += S0(a) + Maj(a, b, c);                 \
+    DUMP(n, a, d, e, h)
+
+#else /* use only 32-bit variables, and don't unroll loops */
+
+#undef NOUNROLL512
+#define NOUNROLL512 1
+
+#define ADDTO(x, y) \
+    y.lo += x.lo;   \
+    y.hi += x.hi + (x.lo > y.lo)
+
+#define ROTR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n))
+#define ROTR64A(x, n, lo, hi) (x.lo << (64 - n) | x.hi >> (n - 32))
+#define SHR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n))
+
+/* Capitol Sigma and lower case sigma functions */
+#define s0lo(x) (ROTR64a(x, 1, lo, hi) ^ ROTR64a(x, 8, lo, hi) ^ SHR64a(x, 7, lo, hi))
+#define s0hi(x) (ROTR64a(x, 1, hi, lo) ^ ROTR64a(x, 8, hi, lo) ^ (x.hi >> 7))
+
+#define s1lo(x) (ROTR64a(x, 19, lo, hi) ^ ROTR64A(x, 61, lo, hi) ^ SHR64a(x, 6, lo, hi))
+#define s1hi(x) (ROTR64a(x, 19, hi, lo) ^ ROTR64A(x, 61, hi, lo) ^ (x.hi >> 6))
+
+#define S0lo(x) (ROTR64a(x, 28, lo, hi) ^ ROTR64A(x, 34, lo, hi) ^ ROTR64A(x, 39, lo, hi))
+#define S0hi(x) (ROTR64a(x, 28, hi, lo) ^ ROTR64A(x, 34, hi, lo) ^ ROTR64A(x, 39, hi, lo))
+
+#define S1lo(x) (ROTR64a(x, 14, lo, hi) ^ ROTR64a(x, 18, lo, hi) ^ ROTR64A(x, 41, lo, hi))
+#define S1hi(x) (ROTR64a(x, 14, hi, lo) ^ ROTR64a(x, 18, hi, lo) ^ ROTR64A(x, 41, hi, lo))
+
+/* 32-bit versions of Ch and Maj */
+#define Chxx(x, y, z, lo) ((x.lo & y.lo) ^ (~x.lo & z.lo))
+#define Majx(x, y, z, lo) ((x.lo & y.lo) ^ (x.lo & z.lo) ^ (y.lo & z.lo))
+
+#define INITW(t)                                                                      \
+    do {                                                                              \
+        PRUint32 lo, tm;                                                              \
+        PRUint32 cy = 0;                                                              \
+        lo = s1lo(W[t - 2]);                                                          \
+        lo += (tm = W[t - 7].lo);                                                     \
+        if (lo < tm)                                                                  \
+            cy++;                                                                     \
+        lo += (tm = s0lo(W[t - 15]));                                                 \
+        if (lo < tm)                                                                  \
+            cy++;                                                                     \
+        lo += (tm = W[t - 16].lo);                                                    \
+        if (lo < tm)                                                                  \
+            cy++;                                                                     \
+        W[t].lo = lo;                                                                 \
+        W[t].hi = cy + s1hi(W[t - 2]) + W[t - 7].hi + s0hi(W[t - 15]) + W[t - 16].hi; \
+    } while (0)
+
+#define ROUND(n, a, b, c, d, e, f, g, h)                                 \
+    {                                                                    \
+        PRUint32 lo, tm, cy;                                             \
+        lo = S1lo(e);                                                    \
+        lo += (tm = Chxx(e, f, g, lo));                                  \
+        cy = (lo < tm);                                                  \
+        lo += (tm = K512[n].lo);                                         \
+        if (lo < tm)                                                     \
+            cy++;                                                        \
+        lo += (tm = W[n].lo);                                            \
+        if (lo < tm)                                                     \
+            cy++;                                                        \
+        h.lo += lo;                                                      \
+        if (h.lo < lo)                                                   \
+            cy++;                                                        \
+        h.hi += cy + S1hi(e) + Chxx(e, f, g, hi) + K512[n].hi + W[n].hi; \
+        d.lo += h.lo;                                                    \
+        d.hi += h.hi + (d.lo < h.lo);                                    \
+        lo = S0lo(a);                                                    \
+        lo += (tm = Majx(a, b, c, lo));                                  \
+        cy = (lo < tm);                                                  \
+        h.lo += lo;                                                      \
+        if (h.lo < lo)                                                   \
+            cy++;                                                        \
+        h.hi += cy + S0hi(a) + Majx(a, b, c, hi);                        \
+        DUMP(n, a, d, e, h)                                              \
+    }
+#endif
+
+static void
+SHA512_Compress(SHA512Context *ctx)
+{
+#if defined(IS_LITTLE_ENDIAN)
+    {
+        BYTESWAP8(W[0]);
+        BYTESWAP8(W[1]);
+        BYTESWAP8(W[2]);
+        BYTESWAP8(W[3]);
+        BYTESWAP8(W[4]);
+        BYTESWAP8(W[5]);
+        BYTESWAP8(W[6]);
+        BYTESWAP8(W[7]);
+        BYTESWAP8(W[8]);
+        BYTESWAP8(W[9]);
+        BYTESWAP8(W[10]);
+        BYTESWAP8(W[11]);
+        BYTESWAP8(W[12]);
+        BYTESWAP8(W[13]);
+        BYTESWAP8(W[14]);
+        BYTESWAP8(W[15]);
+    }
+#endif
+
+    {
+#ifdef NOUNROLL512
+        {
+            /* prepare the "message schedule"   */
+            int t;
+            for (t = 16; t < 80; ++t) {
+                INITW(t);
+            }
+        }
+#else
+        INITW(16);
+        INITW(17);
+        INITW(18);
+        INITW(19);
+
+        INITW(20);
+        INITW(21);
+        INITW(22);
+        INITW(23);
+        INITW(24);
+        INITW(25);
+        INITW(26);
+        INITW(27);
+        INITW(28);
+        INITW(29);
+
+        INITW(30);
+        INITW(31);
+        INITW(32);
+        INITW(33);
+        INITW(34);
+        INITW(35);
+        INITW(36);
+        INITW(37);
+        INITW(38);
+        INITW(39);
+
+        INITW(40);
+        INITW(41);
+        INITW(42);
+        INITW(43);
+        INITW(44);
+        INITW(45);
+        INITW(46);
+        INITW(47);
+        INITW(48);
+        INITW(49);
+
+        INITW(50);
+        INITW(51);
+        INITW(52);
+        INITW(53);
+        INITW(54);
+        INITW(55);
+        INITW(56);
+        INITW(57);
+        INITW(58);
+        INITW(59);
+
+        INITW(60);
+        INITW(61);
+        INITW(62);
+        INITW(63);
+        INITW(64);
+        INITW(65);
+        INITW(66);
+        INITW(67);
+        INITW(68);
+        INITW(69);
+
+        INITW(70);
+        INITW(71);
+        INITW(72);
+        INITW(73);
+        INITW(74);
+        INITW(75);
+        INITW(76);
+        INITW(77);
+        INITW(78);
+        INITW(79);
+#endif
+    }
+#ifdef SHA512_TRACE
+    {
+        int i;
+        for (i = 0; i < 80; ++i) {
+#ifdef HAVE_LONG_LONG
+            printf("W[%2d] = %016lx\n", i, W[i]);
+#else
+            printf("W[%2d] = %08x%08x\n", i, W[i].hi, W[i].lo);
+#endif
+        }
+    }
+#endif
+    {
+        PRUint64 a, b, c, d, e, f, g, h;
+
+        a = H[0];
+        b = H[1];
+        c = H[2];
+        d = H[3];
+        e = H[4];
+        f = H[5];
+        g = H[6];
+        h = H[7];
+
+#ifdef NOUNROLL512
+        {
+            int t;
+            for (t = 0; t < 80; t += 8) {
+                ROUND(t + 0, a, b, c, d, e, f, g, h)
+                ROUND(t + 1, h, a, b, c, d, e, f, g)
+                ROUND(t + 2, g, h, a, b, c, d, e, f)
+                ROUND(t + 3, f, g, h, a, b, c, d, e)
+                ROUND(t + 4, e, f, g, h, a, b, c, d)
+                ROUND(t + 5, d, e, f, g, h, a, b, c)
+                ROUND(t + 6, c, d, e, f, g, h, a, b)
+                ROUND(t + 7, b, c, d, e, f, g, h, a)
+            }
+        }
+#else
+        ROUND(0, a, b, c, d, e, f, g, h)
+        ROUND(1, h, a, b, c, d, e, f, g)
+        ROUND(2, g, h, a, b, c, d, e, f)
+        ROUND(3, f, g, h, a, b, c, d, e)
+        ROUND(4, e, f, g, h, a, b, c, d)
+        ROUND(5, d, e, f, g, h, a, b, c)
+        ROUND(6, c, d, e, f, g, h, a, b)
+        ROUND(7, b, c, d, e, f, g, h, a)
+
+        ROUND(8, a, b, c, d, e, f, g, h)
+        ROUND(9, h, a, b, c, d, e, f, g)
+        ROUND(10, g, h, a, b, c, d, e, f)
+        ROUND(11, f, g, h, a, b, c, d, e)
+        ROUND(12, e, f, g, h, a, b, c, d)
+        ROUND(13, d, e, f, g, h, a, b, c)
+        ROUND(14, c, d, e, f, g, h, a, b)
+        ROUND(15, b, c, d, e, f, g, h, a)
+
+        ROUND(16, a, b, c, d, e, f, g, h)
+        ROUND(17, h, a, b, c, d, e, f, g)
+        ROUND(18, g, h, a, b, c, d, e, f)
+        ROUND(19, f, g, h, a, b, c, d, e)
+        ROUND(20, e, f, g, h, a, b, c, d)
+        ROUND(21, d, e, f, g, h, a, b, c)
+        ROUND(22, c, d, e, f, g, h, a, b)
+        ROUND(23, b, c, d, e, f, g, h, a)
+
+        ROUND(24, a, b, c, d, e, f, g, h)
+        ROUND(25, h, a, b, c, d, e, f, g)
+        ROUND(26, g, h, a, b, c, d, e, f)
+        ROUND(27, f, g, h, a, b, c, d, e)
+        ROUND(28, e, f, g, h, a, b, c, d)
+        ROUND(29, d, e, f, g, h, a, b, c)
+        ROUND(30, c, d, e, f, g, h, a, b)
+        ROUND(31, b, c, d, e, f, g, h, a)
+
+        ROUND(32, a, b, c, d, e, f, g, h)
+        ROUND(33, h, a, b, c, d, e, f, g)
+        ROUND(34, g, h, a, b, c, d, e, f)
+        ROUND(35, f, g, h, a, b, c, d, e)
+        ROUND(36, e, f, g, h, a, b, c, d)
+        ROUND(37, d, e, f, g, h, a, b, c)
+        ROUND(38, c, d, e, f, g, h, a, b)
+        ROUND(39, b, c, d, e, f, g, h, a)
+
+        ROUND(40, a, b, c, d, e, f, g, h)
+        ROUND(41, h, a, b, c, d, e, f, g)
+        ROUND(42, g, h, a, b, c, d, e, f)
+        ROUND(43, f, g, h, a, b, c, d, e)
+        ROUND(44, e, f, g, h, a, b, c, d)
+        ROUND(45, d, e, f, g, h, a, b, c)
+        ROUND(46, c, d, e, f, g, h, a, b)
+        ROUND(47, b, c, d, e, f, g, h, a)
+
+        ROUND(48, a, b, c, d, e, f, g, h)
+        ROUND(49, h, a, b, c, d, e, f, g)
+        ROUND(50, g, h, a, b, c, d, e, f)
+        ROUND(51, f, g, h, a, b, c, d, e)
+        ROUND(52, e, f, g, h, a, b, c, d)
+        ROUND(53, d, e, f, g, h, a, b, c)
+        ROUND(54, c, d, e, f, g, h, a, b)
+        ROUND(55, b, c, d, e, f, g, h, a)
+
+        ROUND(56, a, b, c, d, e, f, g, h)
+        ROUND(57, h, a, b, c, d, e, f, g)
+        ROUND(58, g, h, a, b, c, d, e, f)
+        ROUND(59, f, g, h, a, b, c, d, e)
+        ROUND(60, e, f, g, h, a, b, c, d)
+        ROUND(61, d, e, f, g, h, a, b, c)
+        ROUND(62, c, d, e, f, g, h, a, b)
+        ROUND(63, b, c, d, e, f, g, h, a)
+
+        ROUND(64, a, b, c, d, e, f, g, h)
+        ROUND(65, h, a, b, c, d, e, f, g)
+        ROUND(66, g, h, a, b, c, d, e, f)
+        ROUND(67, f, g, h, a, b, c, d, e)
+        ROUND(68, e, f, g, h, a, b, c, d)
+        ROUND(69, d, e, f, g, h, a, b, c)
+        ROUND(70, c, d, e, f, g, h, a, b)
+        ROUND(71, b, c, d, e, f, g, h, a)
+
+        ROUND(72, a, b, c, d, e, f, g, h)
+        ROUND(73, h, a, b, c, d, e, f, g)
+        ROUND(74, g, h, a, b, c, d, e, f)
+        ROUND(75, f, g, h, a, b, c, d, e)
+        ROUND(76, e, f, g, h, a, b, c, d)
+        ROUND(77, d, e, f, g, h, a, b, c)
+        ROUND(78, c, d, e, f, g, h, a, b)
+        ROUND(79, b, c, d, e, f, g, h, a)
+#endif
+
+        ADDTO(a, H[0]);
+        ADDTO(b, H[1]);
+        ADDTO(c, H[2]);
+        ADDTO(d, H[3]);
+        ADDTO(e, H[4]);
+        ADDTO(f, H[5]);
+        ADDTO(g, H[6]);
+        ADDTO(h, H[7]);
+    }
+}
+
+void
+SHA512_Update(SHA512Context *ctx, const unsigned char *input,
+              unsigned int inputLen)
+{
+    unsigned int inBuf;
+    if (!inputLen)
+        return;
+
+#if defined(HAVE_LONG_LONG)
+    inBuf = (unsigned int)ctx->sizeLo & 0x7f;
+    /* Add inputLen into the count of bytes processed, before processing */
+    ctx->sizeLo += inputLen;
+#else
+    inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f;
+    ctx->sizeLo.lo += inputLen;
+    if (ctx->sizeLo.lo < inputLen)
+        ctx->sizeLo.hi++;
+#endif
+
+    /* if data already in buffer, attemp to fill rest of buffer */
+    if (inBuf) {
+        unsigned int todo = SHA512_BLOCK_LENGTH - inBuf;
+        if (inputLen < todo)
+            todo = inputLen;
+        memcpy(B + inBuf, input, todo);
+        input += todo;
+        inputLen -= todo;
+        if (inBuf + todo == SHA512_BLOCK_LENGTH)
+            SHA512_Compress(ctx);
+    }
+
+    /* if enough data to fill one or more whole buffers, process them. */
+    while (inputLen >= SHA512_BLOCK_LENGTH) {
+        memcpy(B, input, SHA512_BLOCK_LENGTH);
+        input += SHA512_BLOCK_LENGTH;
+        inputLen -= SHA512_BLOCK_LENGTH;
+        SHA512_Compress(ctx);
+    }
+    /* if data left over, fill it into buffer */
+    if (inputLen)
+        memcpy(B, input, inputLen);
+}
+
+void
+SHA512_End(SHA512Context *ctx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#if defined(HAVE_LONG_LONG)
+    unsigned int inBuf = (unsigned int)ctx->sizeLo & 0x7f;
+#else
+    unsigned int inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f;
+#endif
+    unsigned int padLen = (inBuf < 112) ? (112 - inBuf) : (112 + 128 - inBuf);
+    PRUint64 lo;
+    LL_SHL(lo, ctx->sizeLo, 3);
+
+    SHA512_Update(ctx, pad, padLen);
+
+#if defined(HAVE_LONG_LONG)
+    W[14] = 0;
+#else
+    W[14].lo = 0;
+    W[14].hi = 0;
+#endif
+
+    W[15] = lo;
+#if defined(IS_LITTLE_ENDIAN)
+    BYTESWAP8(W[15]);
+#endif
+    SHA512_Compress(ctx);
+
+/* now output the answer */
+#if defined(IS_LITTLE_ENDIAN)
+    BYTESWAP8(H[0]);
+    BYTESWAP8(H[1]);
+    BYTESWAP8(H[2]);
+    BYTESWAP8(H[3]);
+    BYTESWAP8(H[4]);
+    BYTESWAP8(H[5]);
+    BYTESWAP8(H[6]);
+    BYTESWAP8(H[7]);
+#endif
+    padLen = PR_MIN(SHA512_LENGTH, maxDigestLen);
+    memcpy(digest, H, padLen);
+    if (digestLen)
+        *digestLen = padLen;
+}
+
+void
+SHA512_EndRaw(SHA512Context *ctx, unsigned char *digest,
+              unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    PRUint64 h[8];
+    unsigned int len;
+
+    memcpy(h, ctx->h, sizeof(h));
+
+#if defined(IS_LITTLE_ENDIAN)
+    BYTESWAP8(h[0]);
+    BYTESWAP8(h[1]);
+    BYTESWAP8(h[2]);
+    BYTESWAP8(h[3]);
+    BYTESWAP8(h[4]);
+    BYTESWAP8(h[5]);
+    BYTESWAP8(h[6]);
+    BYTESWAP8(h[7]);
+#endif
+    len = PR_MIN(SHA512_LENGTH, maxDigestLen);
+    memcpy(digest, h, len);
+    if (digestLen)
+        *digestLen = len;
+}
+
+SECStatus
+SHA512_HashBuf(unsigned char *dest, const unsigned char *src,
+               PRUint32 src_length)
+{
+    SHA512Context ctx;
+    unsigned int outLen;
+
+    SHA512_Begin(&ctx);
+    SHA512_Update(&ctx, src, src_length);
+    SHA512_End(&ctx, dest, &outLen, SHA512_LENGTH);
+    memset(&ctx, 0, sizeof ctx);
+
+    return SECSuccess;
+}
+
+SECStatus
+SHA512_Hash(unsigned char *dest, const char *src)
+{
+    return SHA512_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA512_TraceState(SHA512Context *ctx)
+{
+}
+
+unsigned int
+SHA512_FlattenSize(SHA512Context *ctx)
+{
+    return sizeof *ctx;
+}
+
+SECStatus
+SHA512_Flatten(SHA512Context *ctx, unsigned char *space)
+{
+    PORT_Memcpy(space, ctx, sizeof *ctx);
+    return SECSuccess;
+}
+
+SHA512Context *
+SHA512_Resurrect(unsigned char *space, void *arg)
+{
+    SHA512Context *ctx = SHA512_NewContext();
+    if (ctx)
+        PORT_Memcpy(ctx, space, sizeof *ctx);
+    return ctx;
+}
+
+void
+SHA512_Clone(SHA512Context *dest, SHA512Context *src)
+{
+    memcpy(dest, src, sizeof *dest);
+}
+
+/* ======================================================================= */
+/* SHA384 uses a SHA512Context as the real context.
+** The only differences between SHA384 an SHA512 are:
+** a) the intialization values for the context, and
+** b) the number of bytes of data produced as output.
+*/
+
+/* SHA-384 initial hash values */
+static const PRUint64 H384[8] = {
+#if PR_BYTES_PER_LONG == 8
+    0xcbbb9d5dc1059ed8UL, 0x629a292a367cd507UL,
+    0x9159015a3070dd17UL, 0x152fecd8f70e5939UL,
+    0x67332667ffc00b31UL, 0x8eb44a8768581511UL,
+    0xdb0c2e0d64f98fa7UL, 0x47b5481dbefa4fa4UL
+#else
+    ULLC(cbbb9d5d, c1059ed8), ULLC(629a292a, 367cd507),
+    ULLC(9159015a, 3070dd17), ULLC(152fecd8, f70e5939),
+    ULLC(67332667, ffc00b31), ULLC(8eb44a87, 68581511),
+    ULLC(db0c2e0d, 64f98fa7), ULLC(47b5481d, befa4fa4)
+#endif
+};
+
+SHA384Context *
+SHA384_NewContext(void)
+{
+    return SHA512_NewContext();
+}
+
+void
+SHA384_DestroyContext(SHA384Context *ctx, PRBool freeit)
+{
+    SHA512_DestroyContext(ctx, freeit);
+}
+
+void
+SHA384_Begin(SHA384Context *ctx)
+{
+    memset(ctx, 0, sizeof *ctx);
+    memcpy(H, H384, sizeof H384);
+}
+
+void
+SHA384_Update(SHA384Context *ctx, const unsigned char *input,
+              unsigned int inputLen)
+{
+    SHA512_Update(ctx, input, inputLen);
+}
+
+void
+SHA384_End(SHA384Context *ctx, unsigned char *digest,
+           unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH);
+    SHA512_End(ctx, digest, digestLen, maxLen);
+}
+
+void
+SHA384_EndRaw(SHA384Context *ctx, unsigned char *digest,
+              unsigned int *digestLen, unsigned int maxDigestLen)
+{
+    unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH);
+    SHA512_EndRaw(ctx, digest, digestLen, maxLen);
+}
+
+SECStatus
+SHA384_HashBuf(unsigned char *dest, const unsigned char *src,
+               PRUint32 src_length)
+{
+    SHA512Context ctx;
+    unsigned int outLen;
+
+    SHA384_Begin(&ctx);
+    SHA512_Update(&ctx, src, src_length);
+    SHA512_End(&ctx, dest, &outLen, SHA384_LENGTH);
+    memset(&ctx, 0, sizeof ctx);
+
+    return SECSuccess;
+}
+
+SECStatus
+SHA384_Hash(unsigned char *dest, const char *src)
+{
+    return SHA384_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA384_TraceState(SHA384Context *ctx)
+{
+}
+
+unsigned int
+SHA384_FlattenSize(SHA384Context *ctx)
+{
+    return sizeof(SHA384Context);
+}
+
+SECStatus
+SHA384_Flatten(SHA384Context *ctx, unsigned char *space)
+{
+    return SHA512_Flatten(ctx, space);
+}
+
+SHA384Context *
+SHA384_Resurrect(unsigned char *space, void *arg)
+{
+    return SHA512_Resurrect(space, arg);
+}
+
+void
+SHA384_Clone(SHA384Context *dest, SHA384Context *src)
+{
+    memcpy(dest, src, sizeof *dest);
+}
+
+/* ======================================================================= */
+#ifdef SELFTEST
+#include <stdio.h>
+
+static const char abc[] = { "abc" };
+static const char abcdbc[] = {
+    "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+};
+static const char abcdef[] = {
+    "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+    "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"
+};
+
+void
+dumpHash32(const unsigned char *buf, unsigned int bufLen)
+{
+    unsigned int i;
+    for (i = 0; i < bufLen; i += 4) {
+        printf(" %02x%02x%02x%02x", buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
+    }
+    printf("\n");
+}
+
+void
+test256(void)
+{
+    unsigned char outBuf[SHA256_LENGTH];
+
+    printf("SHA256, input = %s\n", abc);
+    SHA256_Hash(outBuf, abc);
+    dumpHash32(outBuf, sizeof outBuf);
+
+    printf("SHA256, input = %s\n", abcdbc);
+    SHA256_Hash(outBuf, abcdbc);
+    dumpHash32(outBuf, sizeof outBuf);
+}
+
+void
+test224(void)
+{
+    SHA224Context ctx;
+    unsigned char a1000times[1000];
+    unsigned int outLen;
+    unsigned char outBuf[SHA224_LENGTH];
+    int i;
+
+    /* Test Vector 1 */
+    printf("SHA224, input = %s\n", abc);
+    SHA224_Hash(outBuf, abc);
+    dumpHash32(outBuf, sizeof outBuf);
+
+    /* Test Vector 2 */
+    printf("SHA224, input = %s\n", abcdbc);
+    SHA224_Hash(outBuf, abcdbc);
+    dumpHash32(outBuf, sizeof outBuf);
+
+    /* Test Vector 3 */
+
+    /* to hash one million 'a's perform 1000
+     * sha224 updates on a buffer with 1000 'a's
+     */
+    memset(a1000times, 'a', 1000);
+    printf("SHA224, input = %s\n", "a one million times");
+    SHA224_Begin(&ctx);
+    for (i = 0; i < 1000; i++)
+        SHA224_Update(&ctx, a1000times, 1000);
+    SHA224_End(&ctx, outBuf, &outLen, SHA224_LENGTH);
+    dumpHash32(outBuf, sizeof outBuf);
+}
+
+void
+dumpHash64(const unsigned char *buf, unsigned int bufLen)
+{
+    unsigned int i;
+    for (i = 0; i < bufLen; i += 8) {
+        if (i % 32 == 0)
+            printf("\n");
+        printf(" %02x%02x%02x%02x%02x%02x%02x%02x",
+               buf[i], buf[i + 1], buf[i + 2], buf[i + 3],
+               buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]);
+    }
+    printf("\n");
+}
+
+void
+test512(void)
+{
+    unsigned char outBuf[SHA512_LENGTH];
+
+    printf("SHA512, input = %s\n", abc);
+    SHA512_Hash(outBuf, abc);
+    dumpHash64(outBuf, sizeof outBuf);
+
+    printf("SHA512, input = %s\n", abcdef);
+    SHA512_Hash(outBuf, abcdef);
+    dumpHash64(outBuf, sizeof outBuf);
+}
+
+void
+time512(void)
+{
+    unsigned char outBuf[SHA512_LENGTH];
+
+    SHA512_Hash(outBuf, abc);
+    SHA512_Hash(outBuf, abcdef);
+}
+
+void
+test384(void)
+{
+    unsigned char outBuf[SHA384_LENGTH];
+
+    printf("SHA384, input = %s\n", abc);
+    SHA384_Hash(outBuf, abc);
+    dumpHash64(outBuf, sizeof outBuf);
+
+    printf("SHA384, input = %s\n", abcdef);
+    SHA384_Hash(outBuf, abcdef);
+    dumpHash64(outBuf, sizeof outBuf);
+}
+
+int
+main(int argc, char *argv[], char *envp[])
+{
+    int i = 1;
+    if (argc > 1) {
+        i = atoi(argv[1]);
+    }
+    if (i < 2) {
+        test224();
+        test256();
+        test384();
+        test512();
+    } else {
+        while (i-- > 0) {
+            time512();
+        }
+        printf("done\n");
+    }
+    return 0;
+}
+
+void *
+PORT_Alloc(size_t len)
+{
+    return malloc(len);
+}
+void
+PORT_Free(void *ptr)
+{
+    free(ptr);
+}
+void
+PORT_ZFree(void *ptr, size_t len)
+{
+    memset(ptr, 0, len);
+    free(ptr);
+}
+#endif
diff --git a/security/nss/lib/freebl/sha_fast.c b/security/nss/lib/freebl/sha_fast.c
new file mode 100644
index 000000000..52071f0c9
--- /dev/null
+++ b/security/nss/lib/freebl/sha_fast.c
@@ -0,0 +1,545 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <memory.h>
+#include "blapi.h"
+#include "sha_fast.h"
+#include "prerror.h"
+
+#ifdef TRACING_SSL
+#include "ssl.h"
+#include "ssltrace.h"
+#endif
+
+static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain);
+
+#define W u.w
+#define B u.b
+
+#define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
+#define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
+#define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
+
+#define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
+
+/*
+ *  SHA: initialize context
+ */
+void
+SHA1_Begin(SHA1Context *ctx)
+{
+    ctx->size = 0;
+    /*
+   *  Initialize H with constants from FIPS180-1.
+   */
+    ctx->H[0] = 0x67452301L;
+    ctx->H[1] = 0xefcdab89L;
+    ctx->H[2] = 0x98badcfeL;
+    ctx->H[3] = 0x10325476L;
+    ctx->H[4] = 0xc3d2e1f0L;
+}
+
+/* Explanation of H array and index values:
+ * The context's H array is actually the concatenation of two arrays
+ * defined by SHA1, the H array of state variables (5 elements),
+ * and the W array of intermediate values, of which there are 16 elements.
+ * The W array starts at H[5], that is W[0] is H[5].
+ * Although these values are defined as 32-bit values, we use 64-bit
+ * variables to hold them because the AMD64 stores 64 bit values in
+ * memory MUCH faster than it stores any smaller values.
+ *
+ * Rather than passing the context structure to shaCompress, we pass
+ * this combined array of H and W values.  We do not pass the address
+ * of the first element of this array, but rather pass the address of an
+ * element in the middle of the array, element X.  Presently X[0] is H[11].
+ * So we pass the address of H[11] as the address of array X to shaCompress.
+ * Then shaCompress accesses the members of the array using positive AND
+ * negative indexes.
+ *
+ * Pictorially: (each element is 8 bytes)
+ * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
+ * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
+ *
+ * The byte offset from X[0] to any member of H and W is always
+ * representable in a signed 8-bit value, which will be encoded
+ * as a single byte offset in the X86-64 instruction set.
+ * If we didn't pass the address of H[11], and instead passed the
+ * address of H[0], the offsets to elements H[16] and above would be
+ * greater than 127, not representable in a signed 8-bit value, and the
+ * x86-64 instruction set would encode every such offset as a 32-bit
+ * signed number in each instruction that accessed element H[16] or
+ * higher.  This results in much bigger and slower code.
+ */
+#if !defined(SHA_PUT_W_IN_STACK)
+#define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
+#define W2X 6  /* X[0] is W[6],  and W[0] is X[-6]  */
+#else
+#define H2X 0
+#endif
+
+/*
+ *  SHA: Add data to context.
+ */
+void
+SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+    register unsigned int lenB;
+    register unsigned int togo;
+
+    if (!len)
+        return;
+
+    /* accumulate the byte count. */
+    lenB = (unsigned int)(ctx->size) & 63U;
+
+    ctx->size += len;
+
+    /*
+   *  Read the data into W and process blocks as they get full
+   */
+    if (lenB > 0) {
+        togo = 64U - lenB;
+        if (len < togo)
+            togo = len;
+        memcpy(ctx->B + lenB, dataIn, togo);
+        len -= togo;
+        dataIn += togo;
+        lenB = (lenB + togo) & 63U;
+        if (!lenB) {
+            shaCompress(&ctx->H[H2X], ctx->W);
+        }
+    }
+#if !defined(HAVE_UNALIGNED_ACCESS)
+    if ((ptrdiff_t)dataIn % sizeof(PRUint32)) {
+        while (len >= 64U) {
+            memcpy(ctx->B, dataIn, 64);
+            len -= 64U;
+            shaCompress(&ctx->H[H2X], ctx->W);
+            dataIn += 64U;
+        }
+    } else
+#endif
+    {
+        while (len >= 64U) {
+            len -= 64U;
+            shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn);
+            dataIn += 64U;
+        }
+    }
+    if (len) {
+        memcpy(ctx->B, dataIn, len);
+    }
+}
+
+/*
+ *  SHA: Generate hash value from context
+ */
+void NO_SANITIZE_ALIGNMENT
+SHA1_End(SHA1Context *ctx, unsigned char *hashout,
+         unsigned int *pDigestLen, unsigned int maxDigestLen)
+{
+    register PRUint64 size;
+    register PRUint32 lenB;
+
+    static const unsigned char bulk_pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+#define tmp lenB
+
+    PORT_Assert(maxDigestLen >= SHA1_LENGTH);
+
+    /*
+   *  Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits
+   */
+    size = ctx->size;
+
+    lenB = (PRUint32)size & 63;
+    SHA1_Update(ctx, bulk_pad, (((55 + 64) - lenB) & 63) + 1);
+    PORT_Assert(((PRUint32)ctx->size & 63) == 56);
+    /* Convert size from bytes to bits. */
+    size <<= 3;
+    ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32));
+    ctx->W[15] = SHA_HTONL((PRUint32)size);
+    shaCompress(&ctx->H[H2X], ctx->W);
+
+    /*
+     *  Output hash
+     */
+    SHA_STORE_RESULT;
+    if (pDigestLen) {
+        *pDigestLen = SHA1_LENGTH;
+    }
+#undef tmp
+}
+
+void
+SHA1_EndRaw(SHA1Context *ctx, unsigned char *hashout,
+            unsigned int *pDigestLen, unsigned int maxDigestLen)
+{
+#if defined(SHA_NEED_TMP_VARIABLE)
+    register PRUint32 tmp;
+#endif
+    PORT_Assert(maxDigestLen >= SHA1_LENGTH);
+
+    SHA_STORE_RESULT;
+    if (pDigestLen)
+        *pDigestLen = SHA1_LENGTH;
+}
+
+#undef B
+/*
+ *  SHA: Compression function, unrolled.
+ *
+ * Some operations in shaCompress are done as 5 groups of 16 operations.
+ * Others are done as 4 groups of 20 operations.
+ * The code below shows that structure.
+ *
+ * The functions that compute the new values of the 5 state variables
+ * A-E are done in 4 groups of 20 operations (or you may also think
+ * of them as being done in 16 groups of 5 operations).  They are
+ * done by the SHA_RNDx macros below, in the right column.
+ *
+ * The functions that set the 16 values of the W array are done in
+ * 5 groups of 16 operations.  The first group is done by the
+ * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
+ * in the left column.
+ *
+ * gcc's optimizer observes that each member of the W array is assigned
+ * a value 5 times in this code.  It reduces the number of store
+ * operations done to the W array in the context (that is, in the X array)
+ * by creating a W array on the stack, and storing the W values there for
+ * the first 4 groups of operations on W, and storing the values in the
+ * context's W array only in the fifth group.  This is undesirable.
+ * It is MUCH bigger code than simply using the context's W array, because
+ * all the offsets to the W array in the stack are 32-bit signed offsets,
+ * and it is no faster than storing the values in the context's W array.
+ *
+ * The original code for sha_fast.c prevented this creation of a separate
+ * W array in the stack by creating a W array of 80 members, each of
+ * whose elements is assigned only once. It also separated the computations
+ * of the W array values and the computations of the values for the 5
+ * state variables into two separate passes, W's, then A-E's so that the
+ * second pass could be done all in registers (except for accessing the W
+ * array) on machines with fewer registers.  The method is suboptimal
+ * for machines with enough registers to do it all in one pass, and it
+ * necessitates using many instructions with 32-bit offsets.
+ *
+ * This code eliminates the separate W array on the stack by a completely
+ * different means: by declaring the X array volatile.  This prevents
+ * the optimizer from trying to reduce the use of the X array by the
+ * creation of a MORE expensive W array on the stack. The result is
+ * that all instructions use signed 8-bit offsets and not 32-bit offsets.
+ *
+ * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
+ * results in code that is 3 times faster than the previous NSS sha_fast
+ * code on AMD64.
+ */
+static void NO_SANITIZE_ALIGNMENT
+shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf)
+{
+    register SHA_HW_t A, B, C, D, E;
+
+#if defined(SHA_NEED_TMP_VARIABLE)
+    register PRUint32 tmp;
+#endif
+
+#if !defined(SHA_PUT_W_IN_STACK)
+#define XH(n) X[n - H2X]
+#define XW(n) X[n - W2X]
+#else
+    SHA_HW_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7,
+        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
+#define XW(n) w_##n
+#define XH(n) X[n]
+#endif
+
+#define K0 0x5a827999L
+#define K1 0x6ed9eba1L
+#define K2 0x8f1bbcdcL
+#define K3 0xca62c1d6L
+
+#define SHA_RND1(a, b, c, d, e, n)                         \
+    a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \
+    c = SHA_ROTL(c, 30)
+#define SHA_RND2(a, b, c, d, e, n)                         \
+    a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \
+    c = SHA_ROTL(c, 30)
+#define SHA_RND3(a, b, c, d, e, n)                         \
+    a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \
+    c = SHA_ROTL(c, 30)
+#define SHA_RND4(a, b, c, d, e, n)                         \
+    a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \
+    c = SHA_ROTL(c, 30)
+
+#define LOAD(n) XW(n) = SHA_HTONL(inbuf[n])
+
+    A = XH(0);
+    B = XH(1);
+    C = XH(2);
+    D = XH(3);
+    E = XH(4);
+
+    LOAD(0);
+    SHA_RND1(E, A, B, C, D, 0);
+    LOAD(1);
+    SHA_RND1(D, E, A, B, C, 1);
+    LOAD(2);
+    SHA_RND1(C, D, E, A, B, 2);
+    LOAD(3);
+    SHA_RND1(B, C, D, E, A, 3);
+    LOAD(4);
+    SHA_RND1(A, B, C, D, E, 4);
+    LOAD(5);
+    SHA_RND1(E, A, B, C, D, 5);
+    LOAD(6);
+    SHA_RND1(D, E, A, B, C, 6);
+    LOAD(7);
+    SHA_RND1(C, D, E, A, B, 7);
+    LOAD(8);
+    SHA_RND1(B, C, D, E, A, 8);
+    LOAD(9);
+    SHA_RND1(A, B, C, D, E, 9);
+    LOAD(10);
+    SHA_RND1(E, A, B, C, D, 10);
+    LOAD(11);
+    SHA_RND1(D, E, A, B, C, 11);
+    LOAD(12);
+    SHA_RND1(C, D, E, A, B, 12);
+    LOAD(13);
+    SHA_RND1(B, C, D, E, A, 13);
+    LOAD(14);
+    SHA_RND1(A, B, C, D, E, 14);
+    LOAD(15);
+    SHA_RND1(E, A, B, C, D, 15);
+
+    SHA_MIX(0, 13, 8, 2);
+    SHA_RND1(D, E, A, B, C, 0);
+    SHA_MIX(1, 14, 9, 3);
+    SHA_RND1(C, D, E, A, B, 1);
+    SHA_MIX(2, 15, 10, 4);
+    SHA_RND1(B, C, D, E, A, 2);
+    SHA_MIX(3, 0, 11, 5);
+    SHA_RND1(A, B, C, D, E, 3);
+
+    SHA_MIX(4, 1, 12, 6);
+    SHA_RND2(E, A, B, C, D, 4);
+    SHA_MIX(5, 2, 13, 7);
+    SHA_RND2(D, E, A, B, C, 5);
+    SHA_MIX(6, 3, 14, 8);
+    SHA_RND2(C, D, E, A, B, 6);
+    SHA_MIX(7, 4, 15, 9);
+    SHA_RND2(B, C, D, E, A, 7);
+    SHA_MIX(8, 5, 0, 10);
+    SHA_RND2(A, B, C, D, E, 8);
+    SHA_MIX(9, 6, 1, 11);
+    SHA_RND2(E, A, B, C, D, 9);
+    SHA_MIX(10, 7, 2, 12);
+    SHA_RND2(D, E, A, B, C, 10);
+    SHA_MIX(11, 8, 3, 13);
+    SHA_RND2(C, D, E, A, B, 11);
+    SHA_MIX(12, 9, 4, 14);
+    SHA_RND2(B, C, D, E, A, 12);
+    SHA_MIX(13, 10, 5, 15);
+    SHA_RND2(A, B, C, D, E, 13);
+    SHA_MIX(14, 11, 6, 0);
+    SHA_RND2(E, A, B, C, D, 14);
+    SHA_MIX(15, 12, 7, 1);
+    SHA_RND2(D, E, A, B, C, 15);
+
+    SHA_MIX(0, 13, 8, 2);
+    SHA_RND2(C, D, E, A, B, 0);
+    SHA_MIX(1, 14, 9, 3);
+    SHA_RND2(B, C, D, E, A, 1);
+    SHA_MIX(2, 15, 10, 4);
+    SHA_RND2(A, B, C, D, E, 2);
+    SHA_MIX(3, 0, 11, 5);
+    SHA_RND2(E, A, B, C, D, 3);
+    SHA_MIX(4, 1, 12, 6);
+    SHA_RND2(D, E, A, B, C, 4);
+    SHA_MIX(5, 2, 13, 7);
+    SHA_RND2(C, D, E, A, B, 5);
+    SHA_MIX(6, 3, 14, 8);
+    SHA_RND2(B, C, D, E, A, 6);
+    SHA_MIX(7, 4, 15, 9);
+    SHA_RND2(A, B, C, D, E, 7);
+
+    SHA_MIX(8, 5, 0, 10);
+    SHA_RND3(E, A, B, C, D, 8);
+    SHA_MIX(9, 6, 1, 11);
+    SHA_RND3(D, E, A, B, C, 9);
+    SHA_MIX(10, 7, 2, 12);
+    SHA_RND3(C, D, E, A, B, 10);
+    SHA_MIX(11, 8, 3, 13);
+    SHA_RND3(B, C, D, E, A, 11);
+    SHA_MIX(12, 9, 4, 14);
+    SHA_RND3(A, B, C, D, E, 12);
+    SHA_MIX(13, 10, 5, 15);
+    SHA_RND3(E, A, B, C, D, 13);
+    SHA_MIX(14, 11, 6, 0);
+    SHA_RND3(D, E, A, B, C, 14);
+    SHA_MIX(15, 12, 7, 1);
+    SHA_RND3(C, D, E, A, B, 15);
+
+    SHA_MIX(0, 13, 8, 2);
+    SHA_RND3(B, C, D, E, A, 0);
+    SHA_MIX(1, 14, 9, 3);
+    SHA_RND3(A, B, C, D, E, 1);
+    SHA_MIX(2, 15, 10, 4);
+    SHA_RND3(E, A, B, C, D, 2);
+    SHA_MIX(3, 0, 11, 5);
+    SHA_RND3(D, E, A, B, C, 3);
+    SHA_MIX(4, 1, 12, 6);
+    SHA_RND3(C, D, E, A, B, 4);
+    SHA_MIX(5, 2, 13, 7);
+    SHA_RND3(B, C, D, E, A, 5);
+    SHA_MIX(6, 3, 14, 8);
+    SHA_RND3(A, B, C, D, E, 6);
+    SHA_MIX(7, 4, 15, 9);
+    SHA_RND3(E, A, B, C, D, 7);
+    SHA_MIX(8, 5, 0, 10);
+    SHA_RND3(D, E, A, B, C, 8);
+    SHA_MIX(9, 6, 1, 11);
+    SHA_RND3(C, D, E, A, B, 9);
+    SHA_MIX(10, 7, 2, 12);
+    SHA_RND3(B, C, D, E, A, 10);
+    SHA_MIX(11, 8, 3, 13);
+    SHA_RND3(A, B, C, D, E, 11);
+
+    SHA_MIX(12, 9, 4, 14);
+    SHA_RND4(E, A, B, C, D, 12);
+    SHA_MIX(13, 10, 5, 15);
+    SHA_RND4(D, E, A, B, C, 13);
+    SHA_MIX(14, 11, 6, 0);
+    SHA_RND4(C, D, E, A, B, 14);
+    SHA_MIX(15, 12, 7, 1);
+    SHA_RND4(B, C, D, E, A, 15);
+
+    SHA_MIX(0, 13, 8, 2);
+    SHA_RND4(A, B, C, D, E, 0);
+    SHA_MIX(1, 14, 9, 3);
+    SHA_RND4(E, A, B, C, D, 1);
+    SHA_MIX(2, 15, 10, 4);
+    SHA_RND4(D, E, A, B, C, 2);
+    SHA_MIX(3, 0, 11, 5);
+    SHA_RND4(C, D, E, A, B, 3);
+    SHA_MIX(4, 1, 12, 6);
+    SHA_RND4(B, C, D, E, A, 4);
+    SHA_MIX(5, 2, 13, 7);
+    SHA_RND4(A, B, C, D, E, 5);
+    SHA_MIX(6, 3, 14, 8);
+    SHA_RND4(E, A, B, C, D, 6);
+    SHA_MIX(7, 4, 15, 9);
+    SHA_RND4(D, E, A, B, C, 7);
+    SHA_MIX(8, 5, 0, 10);
+    SHA_RND4(C, D, E, A, B, 8);
+    SHA_MIX(9, 6, 1, 11);
+    SHA_RND4(B, C, D, E, A, 9);
+    SHA_MIX(10, 7, 2, 12);
+    SHA_RND4(A, B, C, D, E, 10);
+    SHA_MIX(11, 8, 3, 13);
+    SHA_RND4(E, A, B, C, D, 11);
+    SHA_MIX(12, 9, 4, 14);
+    SHA_RND4(D, E, A, B, C, 12);
+    SHA_MIX(13, 10, 5, 15);
+    SHA_RND4(C, D, E, A, B, 13);
+    SHA_MIX(14, 11, 6, 0);
+    SHA_RND4(B, C, D, E, A, 14);
+    SHA_MIX(15, 12, 7, 1);
+    SHA_RND4(A, B, C, D, E, 15);
+
+    XH(0) += A;
+    XH(1) += B;
+    XH(2) += C;
+    XH(3) += D;
+    XH(4) += E;
+}
+
+/*************************************************************************
+** Code below this line added to make SHA code support BLAPI interface
+*/
+
+SHA1Context *
+SHA1_NewContext(void)
+{
+    SHA1Context *cx;
+
+    /* no need to ZNew, SHA1_Begin will init the context */
+    cx = PORT_New(SHA1Context);
+    return cx;
+}
+
+/* Zero and free the context */
+void
+SHA1_DestroyContext(SHA1Context *cx, PRBool freeit)
+{
+    memset(cx, 0, sizeof *cx);
+    if (freeit) {
+        PORT_Free(cx);
+    }
+}
+
+SECStatus
+SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+    SHA1Context ctx;
+    unsigned int outLen;
+
+    SHA1_Begin(&ctx);
+    SHA1_Update(&ctx, src, src_length);
+    SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH);
+    memset(&ctx, 0, sizeof ctx);
+    return SECSuccess;
+}
+
+/* Hash a null-terminated character string. */
+SECStatus
+SHA1_Hash(unsigned char *dest, const char *src)
+{
+    return SHA1_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+/*
+ * need to support save/restore state in pkcs11. Stores all the info necessary
+ * for a structure into just a stream of bytes.
+ */
+unsigned int
+SHA1_FlattenSize(SHA1Context *cx)
+{
+    return sizeof(SHA1Context);
+}
+
+SECStatus
+SHA1_Flatten(SHA1Context *cx, unsigned char *space)
+{
+    PORT_Memcpy(space, cx, sizeof(SHA1Context));
+    return SECSuccess;
+}
+
+SHA1Context *
+SHA1_Resurrect(unsigned char *space, void *arg)
+{
+    SHA1Context *cx = SHA1_NewContext();
+    if (cx == NULL)
+        return NULL;
+
+    PORT_Memcpy(cx, space, sizeof(SHA1Context));
+    return cx;
+}
+
+void
+SHA1_Clone(SHA1Context *dest, SHA1Context *src)
+{
+    memcpy(dest, src, sizeof *dest);
+}
+
+void
+SHA1_TraceState(SHA1Context *ctx)
+{
+    PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
diff --git a/security/nss/lib/freebl/sha_fast.h b/security/nss/lib/freebl/sha_fast.h
new file mode 100644
index 000000000..4f37d13d0
--- /dev/null
+++ b/security/nss/lib/freebl/sha_fast.h
@@ -0,0 +1,176 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHA_FAST_H_
+#define _SHA_FAST_H_
+
+#include "prlong.h"
+#include "blapii.h"
+
+#define SHA1_INPUT_LEN 64
+
+#if defined(IS_64) && !defined(__sparc)
+typedef PRUint64 SHA_HW_t;
+#define SHA1_USING_64_BIT 1
+#else
+typedef PRUint32 SHA_HW_t;
+#endif
+
+struct SHA1ContextStr {
+    union {
+        PRUint32 w[16]; /* input buffer */
+        PRUint8 b[64];
+    } u;
+    PRUint64 size;  /* count of hashed bytes. */
+    SHA_HW_t H[22]; /* 5 state variables, 16 tmp values, 1 extra */
+};
+
+#if defined(_MSC_VER)
+#include <stdlib.h>
+#if defined(IS_LITTLE_ENDIAN)
+#if (_MSC_VER >= 1300)
+#pragma intrinsic(_byteswap_ulong)
+#define SHA_HTONL(x) _byteswap_ulong(x)
+#elif defined(NSS_X86_OR_X64)
+#ifndef FORCEINLINE
+#if (_MSC_VER >= 1200)
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE __inline
+#endif /* _MSC_VER */
+#endif /* !defined FORCEINLINE */
+#define FASTCALL __fastcall
+
+static FORCEINLINE PRUint32 FASTCALL
+swap4b(PRUint32 dwd)
+{
+    __asm {
+    	mov   eax,dwd
+	bswap eax
+    }
+}
+
+#define SHA_HTONL(x) swap4b(x)
+#endif /* NSS_X86_OR_X64 */
+#endif /* IS_LITTLE_ENDIAN */
+
+#pragma intrinsic(_lrotr, _lrotl)
+#define SHA_ROTL(x, n) _lrotl(x, n)
+#define SHA_ROTL_IS_DEFINED 1
+#endif /* _MSC_VER */
+
+#if defined(__GNUC__)
+/* __x86_64__  and __x86_64 are defined by GCC on x86_64 CPUs */
+#if defined(SHA1_USING_64_BIT)
+static __inline__ PRUint64
+SHA_ROTL(PRUint64 x, PRUint32 n)
+{
+    PRUint32 t = (PRUint32)x;
+    return ((t << n) | (t >> (32 - n)));
+}
+#else
+static __inline__ PRUint32
+SHA_ROTL(PRUint32 t, PRUint32 n)
+{
+    return ((t << n) | (t >> (32 - n)));
+}
+#endif
+#define SHA_ROTL_IS_DEFINED 1
+
+#if defined(NSS_X86_OR_X64)
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+    __asm__("bswap %0"
+            : "+r"(value));
+    return (value);
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__thumb2__) ||       \
+    (!defined(__thumb__) &&        \
+     (defined(__ARM_ARCH_6__) ||   \
+      defined(__ARM_ARCH_6J__) ||  \
+      defined(__ARM_ARCH_6K__) ||  \
+      defined(__ARM_ARCH_6Z__) ||  \
+      defined(__ARM_ARCH_6ZK__) || \
+      defined(__ARM_ARCH_6T2__) || \
+      defined(__ARM_ARCH_7__) ||   \
+      defined(__ARM_ARCH_7A__) ||  \
+      defined(__ARM_ARCH_7R__)))
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+    PRUint32 ret;
+    __asm__("rev %0, %1"
+            : "=r"(ret)
+            : "r"(value));
+    return ret;
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#endif /* x86 family */
+
+#endif /* __GNUC__ */
+
+#if !defined(SHA_ROTL_IS_DEFINED)
+#define SHA_NEED_TMP_VARIABLE 1
+#define SHA_ROTL(X, n) (tmp = (X), ((tmp) << (n)) | ((tmp) >> (32 - (n))))
+#endif
+
+#if !defined(SHA_HTONL)
+#define SHA_MASK 0x00FF00FF
+#if defined(IS_LITTLE_ENDIAN)
+#undef SHA_NEED_TMP_VARIABLE
+#define SHA_NEED_TMP_VARIABLE 1
+#define SHA_HTONL(x) (tmp = (x), tmp = (tmp << 16) | (tmp >> 16), \
+                      ((tmp & SHA_MASK) << 8) | ((tmp >> 8) & SHA_MASK))
+#else
+#define SHA_HTONL(x) (x)
+#endif
+#endif
+
+#define SHA_BYTESWAP(x) x = SHA_HTONL(x)
+
+#define SHA_STORE(n) ((PRUint32*)hashout)[n] = SHA_HTONL(ctx->H[n])
+#if defined(HAVE_UNALIGNED_ACCESS)
+#define SHA_STORE_RESULT \
+    SHA_STORE(0);        \
+    SHA_STORE(1);        \
+    SHA_STORE(2);        \
+    SHA_STORE(3);        \
+    SHA_STORE(4);
+
+#elif defined(IS_LITTLE_ENDIAN) || defined(SHA1_USING_64_BIT)
+#define SHA_STORE_RESULT                            \
+    if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \
+        SHA_STORE(0);                               \
+        SHA_STORE(1);                               \
+        SHA_STORE(2);                               \
+        SHA_STORE(3);                               \
+        SHA_STORE(4);                               \
+    } else {                                        \
+        PRUint32 tmpbuf[5];                         \
+        tmpbuf[0] = SHA_HTONL(ctx->H[0]);           \
+        tmpbuf[1] = SHA_HTONL(ctx->H[1]);           \
+        tmpbuf[2] = SHA_HTONL(ctx->H[2]);           \
+        tmpbuf[3] = SHA_HTONL(ctx->H[3]);           \
+        tmpbuf[4] = SHA_HTONL(ctx->H[4]);           \
+        memcpy(hashout, tmpbuf, SHA1_LENGTH);       \
+    }
+
+#else
+#define SHA_STORE_RESULT                            \
+    if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \
+        SHA_STORE(0);                               \
+        SHA_STORE(1);                               \
+        SHA_STORE(2);                               \
+        SHA_STORE(3);                               \
+        SHA_STORE(4);                               \
+    } else {                                        \
+        memcpy(hashout, ctx->H, SHA1_LENGTH);       \
+    }
+#endif
+
+#endif /* _SHA_FAST_H_ */
diff --git a/security/nss/lib/freebl/shsign.h b/security/nss/lib/freebl/shsign.h
new file mode 100644
index 000000000..590c0e6b3
--- /dev/null
+++ b/security/nss/lib/freebl/shsign.h
@@ -0,0 +1,14 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHSIGN_H_
+#define _SHSIGN_H_
+
+#define SGN_SUFFIX ".chk"
+#define NSS_SIGN_CHK_MAGIC1 0xf1
+#define NSS_SIGN_CHK_MAGIC2 0xc5
+#define NSS_SIGN_CHK_MAJOR_VERSION 0x01
+#define NSS_SIGN_CHK_MINOR_VERSION 0x02
+
+#endif /* _SHSIGN_H_ */
diff --git a/security/nss/lib/freebl/shvfy.c b/security/nss/lib/freebl/shvfy.c
new file mode 100644
index 000000000..af4a34fb0
--- /dev/null
+++ b/security/nss/lib/freebl/shvfy.c
@@ -0,0 +1,534 @@
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "shsign.h"
+#include "prlink.h"
+#include "prio.h"
+#include "blapi.h"
+#include "seccomon.h"
+#include "stdio.h"
+#include "prmem.h"
+#include "hasht.h"
+#include "pqg.h"
+#include "blapii.h"
+
+/*
+ * Most modern version of Linux support a speed optimization scheme where an
+ * application called prelink modifies programs and shared libraries to quickly
+ * load if they fit into an already designed address space. In short, prelink
+ * scans the list of programs and libraries on your system, assigns them a
+ * predefined space in the the address space, then provides the fixups to the
+ * library.
+
+ * The modification of the shared library is correctly detected by the freebl
+ * FIPS checksum scheme where we check a signed hash of the library against the
+ * library itself.
+ *
+ * The prelink command itself can reverse the process of modification and
+ * output the prestine shared library as it was before prelink made it's
+ * changes. If FREEBL_USE_PRELINK is set Freebl uses prelink to output the
+ * original copy of the shared library before prelink modified it.
+ */
+#ifdef FREEBL_USE_PRELINK
+#ifndef FREELB_PRELINK_COMMAND
+#define FREEBL_PRELINK_COMMAND "/usr/sbin/prelink -u -o -"
+#endif
+#include "private/pprio.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+
+/*
+ * This function returns an NSPR PRFileDesc * which the caller can read to
+ * obtain the prestine value of the shared library, before any OS related
+ * changes to it (usually address fixups).
+ *
+ * If prelink is installed, this
+ * file descriptor is a pipe connecting the output of
+ *            /usr/sbin/prelink -u -o - {Library}
+ * and *pid returns the process id of the prelink child.
+ *
+ * If prelink is not installed, it returns a normal readonly handle to the
+ * library itself and *pid is set to '0'.
+ */
+PRFileDesc *
+bl_OpenUnPrelink(const char *shName, int *pid)
+{
+    char *command = strdup(FREEBL_PRELINK_COMMAND);
+    char *argString = NULL;
+    char **argv = NULL;
+    char *shNameArg = NULL;
+    char *cp;
+    pid_t child;
+    int argc = 0, argNext = 0;
+    struct stat statBuf;
+    int pipefd[2] = { -1, -1 };
+    int ret;
+
+    *pid = 0;
+
+    /* make sure the prelink command exists first. If not, fall back to
+     * just reading the file */
+    for (cp = command; *cp; cp++) {
+        if (*cp == ' ') {
+            *cp++ = 0;
+            argString = cp;
+            break;
+        }
+    }
+    memset(&statBuf, 0, sizeof(statBuf));
+    /* stat the file, follow the link */
+    ret = stat(command, &statBuf);
+    if (ret < 0) {
+        free(command);
+        return PR_Open(shName, PR_RDONLY, 0);
+    }
+    /* file exits, make sure it's an executable */
+    if (!S_ISREG(statBuf.st_mode) ||
+        ((statBuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
+        free(command);
+        return PR_Open(shName, PR_RDONLY, 0);
+    }
+
+    /* OK, the prelink command exists and looks correct, use it */
+    /* build the arglist while we can still malloc */
+    /* count the args if any */
+    if (argString && *argString) {
+        /* argString may have leading spaces, strip them off*/
+        for (cp = argString; *cp && *cp == ' '; cp++)
+            ;
+        argString = cp;
+        if (*cp) {
+            /* there is at least one arg.. */
+            argc = 1;
+        }
+
+        /* count the rest: Note there is no provision for escaped
+         * spaces here */
+        for (cp = argString; *cp; cp++) {
+            if (*cp == ' ') {
+                while (*cp && *cp == ' ')
+                    cp++;
+                if (*cp)
+                    argc++;
+            }
+        }
+    }
+
+    /* add the additional args: argv[0] (command), shName, NULL*/
+    argc += 3;
+    argv = PORT_NewArray(char *, argc);
+    if (argv == NULL) {
+        goto loser;
+    }
+
+    /* fill in the arglist */
+    argv[argNext++] = command;
+    if (argString && *argString) {
+        argv[argNext++] = argString;
+        for (cp = argString; *cp; cp++) {
+            if (*cp == ' ') {
+                *cp++ = 0;
+                while (*cp && *cp == ' ')
+                    cp++;
+                if (*cp)
+                    argv[argNext++] = cp;
+            }
+        }
+    }
+    /* exec doesn't advertise taking const char **argv, do the paranoid
+     * copy */
+    shNameArg = strdup(shName);
+    if (shNameArg == NULL) {
+        goto loser;
+    }
+    argv[argNext++] = shNameArg;
+    argv[argNext++] = 0;
+
+    ret = pipe(pipefd);
+    if (ret < 0) {
+        goto loser;
+    }
+
+    /* use vfork() so we don't trigger the pthread_at_fork() handlers */
+    child = vfork();
+    if (child < 0)
+        goto loser;
+    if (child == 0) {
+        /* set up the file descriptors */
+        /* if we need to support BSD, this will need to be an open of
+         * /dev/null and dup2(nullFD, 0)*/
+        close(0);
+        /* associate pipefd[1] with stdout */
+        if (pipefd[1] != 1)
+            dup2(pipefd[1], 1);
+        close(2);
+        close(pipefd[0]);
+        /* should probably close the other file descriptors? */
+
+        execv(command, argv);
+        /* avoid at_exit() handlers */
+        _exit(1); /* shouldn't reach here except on an error */
+    }
+    close(pipefd[1]);
+    pipefd[1] = -1;
+
+    /* this is safe because either vfork() as full fork() semantics, and thus
+     * already has it's own address space, or because vfork() has paused
+     * the parent util the exec or exit */
+    free(command);
+    free(shNameArg);
+    PORT_Free(argv);
+
+    *pid = child;
+
+    return PR_ImportPipe(pipefd[0]);
+
+loser:
+    if (pipefd[0] != -1) {
+        close(pipefd[0]);
+    }
+    if (pipefd[1] != -1) {
+        close(pipefd[1]);
+    }
+    free(command);
+    free(shNameArg);
+    PORT_Free(argv);
+
+    return NULL;
+}
+
+/*
+ * bl_CloseUnPrelink -
+ *
+ * This closes the file descripter and reaps and children openned and crated by
+ * b;_OpenUnprelink. It's primary difference between it and just close is
+ * that it calls wait on the pid if one is supplied, preventing zombie children
+ * from hanging around.
+ */
+void
+bl_CloseUnPrelink(PRFileDesc *file, int pid)
+{
+    /* close the file descriptor */
+    PR_Close(file);
+    /* reap the child */
+    if (pid) {
+        waitpid(pid, NULL, 0);
+    }
+}
+#endif
+
+/* #define DEBUG_SHVERIFY 1 */
+
+static char *
+mkCheckFileName(const char *libName)
+{
+    int ln_len = PORT_Strlen(libName);
+    char *output = PORT_Alloc(ln_len + sizeof(SGN_SUFFIX));
+    int index = ln_len + 1 - sizeof("." SHLIB_SUFFIX);
+
+    if ((index > 0) &&
+        (PORT_Strncmp(&libName[index],
+                      "." SHLIB_SUFFIX, sizeof("." SHLIB_SUFFIX)) == 0)) {
+        ln_len = index;
+    }
+    PORT_Memcpy(output, libName, ln_len);
+    PORT_Memcpy(&output[ln_len], SGN_SUFFIX, sizeof(SGN_SUFFIX));
+    return output;
+}
+
+static int
+decodeInt(unsigned char *buf)
+{
+    return (buf[3]) | (buf[2] << 8) | (buf[1] << 16) | (buf[0] << 24);
+}
+
+static SECStatus
+readItem(PRFileDesc *fd, SECItem *item)
+{
+    unsigned char buf[4];
+    int bytesRead;
+
+    bytesRead = PR_Read(fd, buf, 4);
+    if (bytesRead != 4) {
+        return SECFailure;
+    }
+    item->len = decodeInt(buf);
+
+    item->data = PORT_Alloc(item->len);
+    if (item->data == NULL) {
+        item->len = 0;
+        return SECFailure;
+    }
+    bytesRead = PR_Read(fd, item->data, item->len);
+    if (bytesRead != item->len) {
+        PORT_Free(item->data);
+        item->data = NULL;
+        item->len = 0;
+        return SECFailure;
+    }
+    return SECSuccess;
+}
+
+static PRBool blapi_SHVerifyFile(const char *shName, PRBool self);
+
+static PRBool
+blapi_SHVerify(const char *name, PRFuncPtr addr, PRBool self)
+{
+    PRBool result = PR_FALSE; /* if anything goes wrong,
+                   * the signature does not verify */
+    /* find our shared library name */
+    char *shName = PR_GetLibraryFilePathname(name, addr);
+    if (!shName) {
+        goto loser;
+    }
+    result = blapi_SHVerifyFile(shName, self);
+
+loser:
+    if (shName != NULL) {
+        PR_Free(shName);
+    }
+
+    return result;
+}
+
+PRBool
+BLAPI_SHVerify(const char *name, PRFuncPtr addr)
+{
+    return blapi_SHVerify(name, addr, PR_FALSE);
+}
+
+PRBool
+BLAPI_SHVerifyFile(const char *shName)
+{
+    return blapi_SHVerifyFile(shName, PR_FALSE);
+}
+
+static PRBool
+blapi_SHVerifyFile(const char *shName, PRBool self)
+{
+    char *checkName = NULL;
+    PRFileDesc *checkFD = NULL;
+    PRFileDesc *shFD = NULL;
+    void *hashcx = NULL;
+    const SECHashObject *hashObj = NULL;
+    SECItem signature = { 0, NULL, 0 };
+    SECItem hash;
+    int bytesRead, offset;
+    SECStatus rv;
+    DSAPublicKey key;
+    int count;
+#ifdef FREEBL_USE_PRELINK
+    int pid = 0;
+#endif
+
+    PRBool result = PR_FALSE; /* if anything goes wrong,
+                   * the signature does not verify */
+    unsigned char buf[4096];
+    unsigned char hashBuf[HASH_LENGTH_MAX];
+
+    PORT_Memset(&key, 0, sizeof(key));
+    hash.data = hashBuf;
+    hash.len = sizeof(hashBuf);
+
+    /* If our integrity check was never ran or failed, fail any other
+     * integrity checks to prevent any token going into FIPS mode. */
+    if (!self && (BL_FIPSEntryOK(PR_FALSE) != SECSuccess)) {
+        return PR_FALSE;
+    }
+
+    if (!shName) {
+        goto loser;
+    }
+
+    /* figure out the name of our check file */
+    checkName = mkCheckFileName(shName);
+    if (!checkName) {
+        goto loser;
+    }
+
+    /* open the check File */
+    checkFD = PR_Open(checkName, PR_RDONLY, 0);
+    if (checkFD == NULL) {
+#ifdef DEBUG_SHVERIFY
+        fprintf(stderr, "Failed to open the check file %s: (%d, %d)\n",
+                checkName, (int)PR_GetError(), (int)PR_GetOSError());
+#endif /* DEBUG_SHVERIFY */
+        goto loser;
+    }
+
+    /* read and Verify the headerthe header */
+    bytesRead = PR_Read(checkFD, buf, 12);
+    if (bytesRead != 12) {
+        goto loser;
+    }
+    if ((buf[0] != NSS_SIGN_CHK_MAGIC1) || (buf[1] != NSS_SIGN_CHK_MAGIC2)) {
+        goto loser;
+    }
+    if ((buf[2] != NSS_SIGN_CHK_MAJOR_VERSION) ||
+        (buf[3] < NSS_SIGN_CHK_MINOR_VERSION)) {
+        goto loser;
+    }
+#ifdef notdef
+    if (decodeInt(&buf[8]) != CKK_DSA) {
+        goto loser;
+    }
+#endif
+
+    /* seek past any future header extensions */
+    offset = decodeInt(&buf[4]);
+    if (PR_Seek(checkFD, offset, PR_SEEK_SET) < 0) {
+        goto loser;
+    }
+
+    /* read the key */
+    rv = readItem(checkFD, &key.params.prime);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+    rv = readItem(checkFD, &key.params.subPrime);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+    rv = readItem(checkFD, &key.params.base);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+    rv = readItem(checkFD, &key.publicValue);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+    /* read the siganture */
+    rv = readItem(checkFD, &signature);
+    if (rv != SECSuccess) {
+        goto loser;
+    }
+
+    /* done with the check file */
+    PR_Close(checkFD);
+    checkFD = NULL;
+
+    hashObj = HASH_GetRawHashObject(PQG_GetHashType(&key.params));
+    if (hashObj == NULL) {
+        goto loser;
+    }
+
+/* open our library file */
+#ifdef FREEBL_USE_PRELINK
+    shFD = bl_OpenUnPrelink(shName, &pid);
+#else
+    shFD = PR_Open(shName, PR_RDONLY, 0);
+#endif
+    if (shFD == NULL) {
+#ifdef DEBUG_SHVERIFY
+        fprintf(stderr, "Failed to open the library file %s: (%d, %d)\n",
+                shName, (int)PR_GetError(), (int)PR_GetOSError());
+#endif /* DEBUG_SHVERIFY */
+        goto loser;
+    }
+
+    /* hash our library file with SHA1 */
+    hashcx = hashObj->create();
+    if (hashcx == NULL) {
+        goto loser;
+    }
+    hashObj->begin(hashcx);
+
+    count = 0;
+    while ((bytesRead = PR_Read(shFD, buf, sizeof(buf))) > 0) {
+        hashObj->update(hashcx, buf, bytesRead);
+        count += bytesRead;
+    }
+#ifdef FREEBL_USE_PRELINK
+    bl_CloseUnPrelink(shFD, pid);
+#else
+    PR_Close(shFD);
+#endif
+    shFD = NULL;
+
+    hashObj->end(hashcx, hash.data, &hash.len, hash.len);
+
+    /* verify the hash against the check file */
+    if (DSA_VerifyDigest(&key, &signature, &hash) == SECSuccess) {
+        result = PR_TRUE;
+    }
+#ifdef DEBUG_SHVERIFY
+    {
+        int i, j;
+        fprintf(stderr, "File %s: %d bytes\n", shName, count);
+        fprintf(stderr, "  hash: %d bytes\n", hash.len);
+#define STEP 10
+        for (i = 0; i < hash.len; i += STEP) {
+            fprintf(stderr, "   ");
+            for (j = 0; j < STEP && (i + j) < hash.len; j++) {
+                fprintf(stderr, " %02x", hash.data[i + j]);
+            }
+            fprintf(stderr, "\n");
+        }
+        fprintf(stderr, "  signature: %d bytes\n", signature.len);
+        for (i = 0; i < signature.len; i += STEP) {
+            fprintf(stderr, "   ");
+            for (j = 0; j < STEP && (i + j) < signature.len; j++) {
+                fprintf(stderr, " %02x", signature.data[i + j]);
+            }
+            fprintf(stderr, "\n");
+        }
+        fprintf(stderr, "Verified : %s\n", result ? "TRUE" : "FALSE");
+    }
+#endif /* DEBUG_SHVERIFY */
+
+loser:
+    if (checkName != NULL) {
+        PORT_Free(checkName);
+    }
+    if (checkFD != NULL) {
+        PR_Close(checkFD);
+    }
+    if (shFD != NULL) {
+        PR_Close(shFD);
+    }
+    if (hashcx != NULL) {
+        if (hashObj) {
+            hashObj->destroy(hashcx, PR_TRUE);
+        }
+    }
+    if (signature.data != NULL) {
+        PORT_Free(signature.data);
+    }
+    if (key.params.prime.data != NULL) {
+        PORT_Free(key.params.prime.data);
+    }
+    if (key.params.subPrime.data != NULL) {
+        PORT_Free(key.params.subPrime.data);
+    }
+    if (key.params.base.data != NULL) {
+        PORT_Free(key.params.base.data);
+    }
+    if (key.publicValue.data != NULL) {
+        PORT_Free(key.publicValue.data);
+    }
+
+    return result;
+}
+
+PRBool
+BLAPI_VerifySelf(const char *name)
+{
+    if (name == NULL) {
+        /*
+         * If name is NULL, freebl is statically linked into softoken.
+         * softoken will call BLAPI_SHVerify next to verify itself.
+         */
+        return PR_TRUE;
+    }
+    return blapi_SHVerify(name, (PRFuncPtr)decodeInt, PR_TRUE);
+}
diff --git a/security/nss/lib/freebl/stubs.c b/security/nss/lib/freebl/stubs.c
new file mode 100644
index 000000000..8e0784935
--- /dev/null
+++ b/security/nss/lib/freebl/stubs.c
@@ -0,0 +1,711 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Allow freebl and softoken to be loaded without util or NSPR.
+ *
+ * These symbols are overridden once real NSPR, and libutil are attached.
+ */
+#define _GNU_SOURCE 1
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <dlfcn.h>
+#include <prio.h>
+#include <prlink.h>
+#include <prlog.h>
+#include <prthread.h>
+#include <plstr.h>
+#include <prinit.h>
+#include <prlock.h>
+#include <prmem.h>
+#include <prerror.h>
+#include <prmon.h>
+#include <pratom.h>
+#include <prsystem.h>
+#include <prinrval.h>
+#include <prtime.h>
+#include <prcvar.h>
+#include <secasn1.h>
+#include <secdig.h>
+#include <secport.h>
+#include <secitem.h>
+#include <blapi.h>
+#include <private/pprio.h>
+
+#define FREEBL_NO_WEAK 1
+
+#define WEAK __attribute__((weak))
+
+#ifdef FREEBL_NO_WEAK
+
+/*
+ * This uses function pointers.
+ *
+ * CONS:  A separate function is needed to
+ * fill in the function pointers.
+ *
+ * PROS: it works on all platforms.
+ *  it allows for dynamically finding nspr and libutil, even once
+ *  softoken is loaded and running. (NOTE: this may be a problem if
+ *  we switch between the stubs and real NSPR on the fly. NSPR will
+ *  do bad things if passed an _FakeArena to free or allocate from).
+ */
+#define STUB_DECLARE(ret, fn, args) \
+    typedef ret(*type_##fn) args;   \
+    static type_##fn ptr_##fn = NULL
+
+#define STUB_SAFE_CALL0(fn) \
+    if (ptr_##fn) {         \
+        return ptr_##fn();  \
+    }
+#define STUB_SAFE_CALL1(fn, a1) \
+    if (ptr_##fn) {             \
+        return ptr_##fn(a1);    \
+    }
+#define STUB_SAFE_CALL2(fn, a1, a2) \
+    if (ptr_##fn) {                 \
+        return ptr_##fn(a1, a2);    \
+    }
+#define STUB_SAFE_CALL3(fn, a1, a2, a3) \
+    if (ptr_##fn) {                     \
+        return ptr_##fn(a1, a2, a3);    \
+    }
+#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \
+    if (ptr_##fn) {                         \
+        return ptr_##fn(a1, a2, a3, a4);    \
+    }
+#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \
+    if (ptr_##fn) {                                 \
+        return ptr_##fn(a1, a2, a3, a4, a5, a6);    \
+    }
+
+#define STUB_FETCH_FUNCTION(fn)            \
+    ptr_##fn = (type_##fn)dlsym(lib, #fn); \
+    if (ptr_##fn == NULL) {                \
+        return SECFailure;                 \
+    }
+
+#else
+/*
+ * this uses the loader weak attribute. it works automatically, but once
+ * freebl is loaded, the symbols are 'fixed' (later loading of NSPR or
+ * libutil will not resolve these symbols).
+ */
+
+#define STUB_DECLARE(ret, fn, args) \
+    WEAK extern ret fn args
+
+#define STUB_SAFE_CALL0(fn) \
+    if (fn) {               \
+        return fn();        \
+    }
+#define STUB_SAFE_CALL1(fn, a1) \
+    if (fn) {                   \
+        return fn(a1);          \
+    }
+#define STUB_SAFE_CALL2(fn, a1, a2) \
+    if (fn) {                       \
+        return fn(a1, a2);          \
+    }
+#define STUB_SAFE_CALL3(fn, a1, a2, a3) \
+    if (fn) {                           \
+        return fn(a1, a2, a3);          \
+    }
+#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \
+    if (fn) {                               \
+        return fn(a1, a2, a3, a4);          \
+    }
+#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \
+    if (fn) {                                       \
+        return fn(a1, a2, a3, a4, a5, a6);          \
+    }
+#endif
+
+STUB_DECLARE(void *, PORT_Alloc_Util, (size_t len));
+STUB_DECLARE(void *, PORT_ArenaAlloc_Util, (PLArenaPool * arena, size_t size));
+STUB_DECLARE(void *, PORT_ArenaZAlloc_Util, (PLArenaPool * arena, size_t size));
+STUB_DECLARE(void, PORT_Free_Util, (void *ptr));
+STUB_DECLARE(void, PORT_FreeArena_Util, (PLArenaPool * arena, PRBool zero));
+STUB_DECLARE(int, PORT_GetError_Util, (void));
+STUB_DECLARE(PLArenaPool *, PORT_NewArena_Util, (unsigned long chunksize));
+STUB_DECLARE(void, PORT_SetError_Util, (int value));
+STUB_DECLARE(void *, PORT_ZAlloc_Util, (size_t len));
+STUB_DECLARE(void, PORT_ZFree_Util, (void *ptr, size_t len));
+
+STUB_DECLARE(void, PR_Assert, (const char *s, const char *file, PRIntn ln));
+STUB_DECLARE(PRStatus, PR_Access, (const char *name, PRAccessHow how));
+STUB_DECLARE(PRStatus, PR_CallOnce, (PRCallOnceType * once, PRCallOnceFN func));
+STUB_DECLARE(PRStatus, PR_Close, (PRFileDesc * fd));
+STUB_DECLARE(void, PR_DestroyLock, (PRLock * lock));
+STUB_DECLARE(void, PR_DestroyCondVar, (PRCondVar * cvar));
+STUB_DECLARE(void, PR_Free, (void *ptr));
+STUB_DECLARE(char *, PR_GetLibraryFilePathname, (const char *name,
+                                                 PRFuncPtr addr));
+STUB_DECLARE(PRFileDesc *, PR_ImportPipe, (PROsfd osfd));
+STUB_DECLARE(void, PR_Lock, (PRLock * lock));
+STUB_DECLARE(PRCondVar *, PR_NewCondVar, (PRLock * lock));
+STUB_DECLARE(PRLock *, PR_NewLock, (void));
+STUB_DECLARE(PRStatus, PR_NotifyCondVar, (PRCondVar * cvar));
+STUB_DECLARE(PRStatus, PR_NotifyAllCondVar, (PRCondVar * cvar));
+STUB_DECLARE(PRFileDesc *, PR_Open, (const char *name, PRIntn flags,
+                                     PRIntn mode));
+STUB_DECLARE(PRInt32, PR_Read, (PRFileDesc * fd, void *buf, PRInt32 amount));
+STUB_DECLARE(PROffset32, PR_Seek, (PRFileDesc * fd, PROffset32 offset,
+                                   PRSeekWhence whence));
+STUB_DECLARE(PRStatus, PR_Sleep, (PRIntervalTime ticks));
+STUB_DECLARE(PRStatus, PR_Unlock, (PRLock * lock));
+STUB_DECLARE(PRStatus, PR_WaitCondVar, (PRCondVar * cvar,
+                                        PRIntervalTime timeout));
+STUB_DECLARE(char *, PR_GetEnvSecure, (const char *));
+
+STUB_DECLARE(SECItem *, SECITEM_AllocItem_Util, (PLArenaPool * arena,
+                                                 SECItem *item, unsigned int len));
+STUB_DECLARE(SECComparison, SECITEM_CompareItem_Util, (const SECItem *a,
+                                                       const SECItem *b));
+STUB_DECLARE(SECStatus, SECITEM_CopyItem_Util, (PLArenaPool * arena,
+                                                SECItem *to, const SECItem *from));
+STUB_DECLARE(void, SECITEM_FreeItem_Util, (SECItem * zap, PRBool freeit));
+STUB_DECLARE(void, SECITEM_ZfreeItem_Util, (SECItem * zap, PRBool freeit));
+STUB_DECLARE(SECOidTag, SECOID_FindOIDTag_Util, (const SECItem *oid));
+STUB_DECLARE(int, NSS_SecureMemcmp, (const void *a, const void *b, size_t n));
+
+#define PORT_ZNew_stub(type) (type *)PORT_ZAlloc_stub(sizeof(type))
+#define PORT_New_stub(type) (type *)PORT_Alloc_stub(sizeof(type))
+#define PORT_ZNewArray_stub(type, num) \
+    (type *)PORT_ZAlloc_stub(sizeof(type) * (num))
+
+/*
+ * NOTE: in order to support hashing only the memory allocation stubs,
+ * the get library name stubs, and the file io stubs are needed (the latter
+ * two are for the library verification). The remaining stubs are simply to
+ * compile. Attempts to use the library for other operations without NSPR
+ * will most likely fail.
+ */
+
+/* memory */
+extern void *
+PORT_Alloc_stub(size_t len)
+{
+    STUB_SAFE_CALL1(PORT_Alloc_Util, len);
+    return malloc(len);
+}
+
+extern void
+PORT_Free_stub(void *ptr)
+{
+    STUB_SAFE_CALL1(PORT_Free_Util, ptr);
+    return free(ptr);
+}
+
+extern void *
+PORT_ZAlloc_stub(size_t len)
+{
+    STUB_SAFE_CALL1(PORT_ZAlloc_Util, len);
+    void *ptr = malloc(len);
+    if (ptr) {
+        memset(ptr, 0, len);
+    }
+    return ptr;
+}
+
+extern void
+PORT_ZFree_stub(void *ptr, size_t len)
+{
+    STUB_SAFE_CALL2(PORT_ZFree_Util, ptr, len);
+    memset(ptr, 0, len);
+    return free(ptr);
+}
+
+extern void
+PR_Free_stub(void *ptr)
+{
+    STUB_SAFE_CALL1(PR_Free, ptr);
+    return free(ptr);
+}
+
+/*
+ * arenas
+ *
+ */
+extern PLArenaPool *
+PORT_NewArena_stub(unsigned long chunksize)
+{
+    STUB_SAFE_CALL1(PORT_NewArena_Util, chunksize);
+    abort();
+    return NULL;
+}
+
+extern void *
+PORT_ArenaAlloc_stub(PLArenaPool *arena, size_t size)
+{
+
+    STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size);
+    abort();
+    return NULL;
+}
+
+extern void *
+PORT_ArenaZAlloc_stub(PLArenaPool *arena, size_t size)
+{
+
+    STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size);
+    abort();
+    return NULL;
+}
+
+extern void
+PORT_FreeArena_stub(PLArenaPool *arena, PRBool zero)
+{
+
+    STUB_SAFE_CALL2(PORT_FreeArena_Util, arena, zero);
+    abort();
+}
+
+/* io */
+extern PRFileDesc *
+PR_Open_stub(const char *name, PRIntn flags, PRIntn mode)
+{
+    int *lfd = NULL;
+    int fd;
+    int lflags = 0;
+
+    STUB_SAFE_CALL3(PR_Open, name, flags, mode);
+
+    if (flags & PR_RDWR) {
+        lflags = O_RDWR;
+    } else if (flags & PR_WRONLY) {
+        lflags = O_WRONLY;
+    } else {
+        lflags = O_RDONLY;
+    }
+
+    if (flags & PR_EXCL)
+        lflags |= O_EXCL;
+    if (flags & PR_APPEND)
+        lflags |= O_APPEND;
+    if (flags & PR_TRUNCATE)
+        lflags |= O_TRUNC;
+
+    fd = open(name, lflags, mode);
+    if (fd >= 0) {
+        lfd = PORT_New_stub(int);
+        if (lfd != NULL) {
+            *lfd = fd;
+        } else {
+            close(fd);
+        }
+    }
+    return (PRFileDesc *)lfd;
+}
+
+extern PRFileDesc *
+PR_ImportPipe_stub(PROsfd fd)
+{
+    int *lfd = NULL;
+
+    STUB_SAFE_CALL1(PR_ImportPipe, fd);
+
+    lfd = PORT_New_stub(int);
+    if (lfd != NULL) {
+        *lfd = fd;
+    }
+    return (PRFileDesc *)lfd;
+}
+
+extern PRStatus
+PR_Close_stub(PRFileDesc *fd)
+{
+    int *lfd;
+    STUB_SAFE_CALL1(PR_Close, fd);
+
+    lfd = (int *)fd;
+    close(*lfd);
+    PORT_Free_stub(lfd);
+
+    return PR_SUCCESS;
+}
+
+extern PRInt32
+PR_Read_stub(PRFileDesc *fd, void *buf, PRInt32 amount)
+{
+    int *lfd;
+    STUB_SAFE_CALL3(PR_Read, fd, buf, amount);
+
+    lfd = (int *)fd;
+    return read(*lfd, buf, amount);
+}
+
+extern PROffset32
+PR_Seek_stub(PRFileDesc *fd, PROffset32 offset, PRSeekWhence whence)
+{
+    int *lfd;
+    int lwhence = SEEK_SET;
+    STUB_SAFE_CALL3(PR_Seek, fd, offset, whence);
+    lfd = (int *)fd;
+    switch (whence) {
+        case PR_SEEK_CUR:
+            lwhence = SEEK_CUR;
+            break;
+        case PR_SEEK_END:
+            lwhence = SEEK_END;
+            break;
+        case PR_SEEK_SET:
+            break;
+    }
+
+    return lseek(*lfd, offset, lwhence);
+}
+
+PRStatus
+PR_Access_stub(const char *name, PRAccessHow how)
+{
+    int mode = F_OK;
+    int rv;
+    STUB_SAFE_CALL2(PR_Access, name, how);
+    switch (how) {
+        case PR_ACCESS_WRITE_OK:
+            mode = W_OK;
+            break;
+        case PR_ACCESS_READ_OK:
+            mode = R_OK;
+            break;
+        /* assume F_OK for all others */
+        default:
+            break;
+    }
+    rv = access(name, mode);
+    if (rv == 0) {
+        return PR_SUCCESS;
+    }
+    return PR_FAILURE;
+}
+
+/*
+ * library
+ */
+extern char *
+PR_GetLibraryFilePathname_stub(const char *name, PRFuncPtr addr)
+{
+    Dl_info dli;
+    char *result;
+
+    STUB_SAFE_CALL2(PR_GetLibraryFilePathname, name, addr);
+
+    if (dladdr((void *)addr, &dli) == 0) {
+        return NULL;
+    }
+    result = PORT_Alloc_stub(strlen(dli.dli_fname) + 1);
+    if (result != NULL) {
+        strcpy(result, dli.dli_fname);
+    }
+    return result;
+}
+
+#include <errno.h>
+
+/* errors */
+extern int
+PORT_GetError_stub(void)
+{
+    STUB_SAFE_CALL0(PORT_GetError_Util);
+    return errno;
+}
+
+extern void
+PORT_SetError_stub(int value)
+{
+    STUB_SAFE_CALL1(PORT_SetError_Util, value);
+    errno = value;
+}
+
+/* misc */
+extern void
+PR_Assert_stub(const char *s, const char *file, PRIntn ln)
+{
+    STUB_SAFE_CALL3(PR_Assert, s, file, ln);
+    fprintf(stderr, "%s line %d: %s\n", file, ln, s);
+    abort();
+}
+
+/* time */
+extern PRStatus
+PR_Sleep_stub(PRIntervalTime ticks)
+{
+    STUB_SAFE_CALL1(PR_Sleep, ticks);
+    usleep(ticks * 1000);
+    return PR_SUCCESS;
+}
+
+/* locking */
+extern PRLock *
+PR_NewLock_stub(void)
+{
+    STUB_SAFE_CALL0(PR_NewLock);
+    abort();
+    return NULL;
+}
+
+extern PRStatus
+PR_Unlock_stub(PRLock *lock)
+{
+    STUB_SAFE_CALL1(PR_Unlock, lock);
+    abort();
+    return PR_FAILURE;
+}
+
+extern void
+PR_Lock_stub(PRLock *lock)
+{
+    STUB_SAFE_CALL1(PR_Lock, lock);
+    abort();
+    return;
+}
+
+extern void
+PR_DestroyLock_stub(PRLock *lock)
+{
+    STUB_SAFE_CALL1(PR_DestroyLock, lock);
+    abort();
+    return;
+}
+
+extern PRCondVar *
+PR_NewCondVar_stub(PRLock *lock)
+{
+    STUB_SAFE_CALL1(PR_NewCondVar, lock);
+    abort();
+    return NULL;
+}
+
+extern PRStatus
+PR_NotifyCondVar_stub(PRCondVar *cvar)
+{
+    STUB_SAFE_CALL1(PR_NotifyCondVar, cvar);
+    abort();
+    return PR_FAILURE;
+}
+
+extern PRStatus
+PR_NotifyAllCondVar_stub(PRCondVar *cvar)
+{
+    STUB_SAFE_CALL1(PR_NotifyAllCondVar, cvar);
+    abort();
+    return PR_FAILURE;
+}
+
+extern PRStatus
+PR_WaitCondVar_stub(PRCondVar *cvar, PRIntervalTime timeout)
+{
+    STUB_SAFE_CALL2(PR_WaitCondVar, cvar, timeout);
+    abort();
+    return PR_FAILURE;
+}
+
+extern char *
+PR_GetEnvSecure_stub(const char *var)
+{
+    STUB_SAFE_CALL1(PR_GetEnvSecure, var);
+    abort();
+    return NULL;
+}
+
+extern void
+PR_DestroyCondVar_stub(PRCondVar *cvar)
+{
+    STUB_SAFE_CALL1(PR_DestroyCondVar, cvar);
+    abort();
+    return;
+}
+
+/*
+ * NOTE: this presupposes GCC 4.1
+ */
+extern PRStatus
+PR_CallOnce_stub(PRCallOnceType *once, PRCallOnceFN func)
+{
+    STUB_SAFE_CALL2(PR_CallOnce, once, func);
+    abort();
+    return PR_FAILURE;
+}
+
+/*
+ * SECITEMS implement Item Utilities
+ */
+extern void
+SECITEM_FreeItem_stub(SECItem *zap, PRBool freeit)
+{
+    STUB_SAFE_CALL2(SECITEM_FreeItem_Util, zap, freeit);
+    abort();
+}
+
+extern SECItem *
+SECITEM_AllocItem_stub(PLArenaPool *arena, SECItem *item, unsigned int len)
+{
+    STUB_SAFE_CALL3(SECITEM_AllocItem_Util, arena, item, len);
+    abort();
+    return NULL;
+}
+
+extern SECComparison
+SECITEM_CompareItem_stub(const SECItem *a, const SECItem *b)
+{
+    STUB_SAFE_CALL2(SECITEM_CompareItem_Util, a, b);
+    abort();
+    return SECEqual;
+}
+
+extern SECStatus
+SECITEM_CopyItem_stub(PLArenaPool *arena, SECItem *to, const SECItem *from)
+{
+    STUB_SAFE_CALL3(SECITEM_CopyItem_Util, arena, to, from);
+    abort();
+    return SECFailure;
+}
+
+extern SECOidTag
+SECOID_FindOIDTag_stub(const SECItem *oid)
+{
+    STUB_SAFE_CALL1(SECOID_FindOIDTag_Util, oid);
+    abort();
+    return SEC_OID_UNKNOWN;
+}
+
+extern void
+SECITEM_ZfreeItem_stub(SECItem *zap, PRBool freeit)
+{
+    STUB_SAFE_CALL2(SECITEM_ZfreeItem_Util, zap, freeit);
+    abort();
+}
+
+extern int
+NSS_SecureMemcmp_stub(const void *a, const void *b, size_t n)
+{
+    STUB_SAFE_CALL3(NSS_SecureMemcmp, a, b, n);
+    abort();
+}
+
+#ifdef FREEBL_NO_WEAK
+
+static const char *nsprLibName = SHLIB_PREFIX "nspr4." SHLIB_SUFFIX;
+static const char *nssutilLibName = SHLIB_PREFIX "nssutil3." SHLIB_SUFFIX;
+
+static SECStatus
+freebl_InitNSPR(void *lib)
+{
+    STUB_FETCH_FUNCTION(PR_Free);
+    STUB_FETCH_FUNCTION(PR_Open);
+    STUB_FETCH_FUNCTION(PR_ImportPipe);
+    STUB_FETCH_FUNCTION(PR_Close);
+    STUB_FETCH_FUNCTION(PR_Read);
+    STUB_FETCH_FUNCTION(PR_Seek);
+    STUB_FETCH_FUNCTION(PR_GetLibraryFilePathname);
+    STUB_FETCH_FUNCTION(PR_Assert);
+    STUB_FETCH_FUNCTION(PR_Access);
+    STUB_FETCH_FUNCTION(PR_Sleep);
+    STUB_FETCH_FUNCTION(PR_CallOnce);
+    STUB_FETCH_FUNCTION(PR_NewCondVar);
+    STUB_FETCH_FUNCTION(PR_NotifyCondVar);
+    STUB_FETCH_FUNCTION(PR_NotifyAllCondVar);
+    STUB_FETCH_FUNCTION(PR_WaitCondVar);
+    STUB_FETCH_FUNCTION(PR_DestroyCondVar);
+    STUB_FETCH_FUNCTION(PR_NewLock);
+    STUB_FETCH_FUNCTION(PR_Unlock);
+    STUB_FETCH_FUNCTION(PR_Lock);
+    STUB_FETCH_FUNCTION(PR_DestroyLock);
+    STUB_FETCH_FUNCTION(PR_GetEnvSecure);
+    return SECSuccess;
+}
+
+static SECStatus
+freebl_InitNSSUtil(void *lib)
+{
+    STUB_FETCH_FUNCTION(PORT_Alloc_Util);
+    STUB_FETCH_FUNCTION(PORT_Free_Util);
+    STUB_FETCH_FUNCTION(PORT_ZAlloc_Util);
+    STUB_FETCH_FUNCTION(PORT_ZFree_Util);
+    STUB_FETCH_FUNCTION(PORT_NewArena_Util);
+    STUB_FETCH_FUNCTION(PORT_ArenaAlloc_Util);
+    STUB_FETCH_FUNCTION(PORT_ArenaZAlloc_Util);
+    STUB_FETCH_FUNCTION(PORT_FreeArena_Util);
+    STUB_FETCH_FUNCTION(PORT_GetError_Util);
+    STUB_FETCH_FUNCTION(PORT_SetError_Util);
+    STUB_FETCH_FUNCTION(SECITEM_FreeItem_Util);
+    STUB_FETCH_FUNCTION(SECITEM_AllocItem_Util);
+    STUB_FETCH_FUNCTION(SECITEM_CompareItem_Util);
+    STUB_FETCH_FUNCTION(SECITEM_CopyItem_Util);
+    STUB_FETCH_FUNCTION(SECITEM_ZfreeItem_Util);
+    STUB_FETCH_FUNCTION(SECOID_FindOIDTag_Util);
+    STUB_FETCH_FUNCTION(NSS_SecureMemcmp);
+    return SECSuccess;
+}
+
+/*
+ * fetch the library if it's loaded. For NSS it should already be loaded
+ */
+#define freebl_getLibrary(libName) \
+    dlopen(libName, RTLD_LAZY | RTLD_NOLOAD)
+
+#define freebl_releaseLibrary(lib) \
+    if (lib)                       \
+    dlclose(lib)
+
+static void *FREEBLnsprGlobalLib = NULL;
+static void *FREEBLnssutilGlobalLib = NULL;
+
+void __attribute((destructor)) FREEBL_unload()
+{
+    freebl_releaseLibrary(FREEBLnsprGlobalLib);
+    freebl_releaseLibrary(FREEBLnssutilGlobalLib);
+}
+#endif
+
+/*
+ * load the symbols from the real libraries if available.
+ *
+ * if force is set, explicitly load the libraries if they are not already
+ * loaded. If we could not use the real libraries, return failure.
+ */
+extern SECStatus
+FREEBL_InitStubs()
+{
+    SECStatus rv = SECSuccess;
+#ifdef FREEBL_NO_WEAK
+    void *nspr = NULL;
+    void *nssutil = NULL;
+
+    /* NSPR should be first */
+    if (!FREEBLnsprGlobalLib) {
+        nspr = freebl_getLibrary(nsprLibName);
+        if (!nspr) {
+            return SECFailure;
+        }
+        rv = freebl_InitNSPR(nspr);
+        if (rv != SECSuccess) {
+            freebl_releaseLibrary(nspr);
+            return rv;
+        }
+        FREEBLnsprGlobalLib = nspr; /* adopt */
+    }
+    /* now load NSSUTIL */
+    if (!FREEBLnssutilGlobalLib) {
+        nssutil = freebl_getLibrary(nssutilLibName);
+        if (!nssutil) {
+            return SECFailure;
+        }
+        rv = freebl_InitNSSUtil(nssutil);
+        if (rv != SECSuccess) {
+            freebl_releaseLibrary(nssutil);
+            return rv;
+        }
+        FREEBLnssutilGlobalLib = nssutil; /* adopt */
+    }
+#endif
+
+    return rv;
+}
diff --git a/security/nss/lib/freebl/stubs.h b/security/nss/lib/freebl/stubs.h
new file mode 100644
index 000000000..25ec394ec
--- /dev/null
+++ b/security/nss/lib/freebl/stubs.h
@@ -0,0 +1,66 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Allow freebl and softoken to be loaded without util or NSPR.
+ *
+ * These symbols are overridden once real NSPR, and libutil are attached.
+ */
+
+#ifndef _STUBS_H
+#define _STUBS_H_ 1
+
+#ifdef _LIBUTIL_H_
+/* must be included before util */
+/*#error stubs.h included too late */
+#define MP_DIGITES(x) "stubs included too late"
+#endif
+
+/* hide libutil rename */
+#define _LIBUTIL_H_ 1
+
+#define PORT_Alloc PORT_Alloc_stub
+#define PORT_ArenaAlloc PORT_ArenaAlloc_stub
+#define PORT_ArenaZAlloc PORT_ArenaZAlloc_stub
+#define PORT_Free PORT_Free_stub
+#define PORT_FreeArena PORT_FreeArena_stub
+#define PORT_GetError PORT_GetError_stub
+#define PORT_NewArena PORT_NewArena_stub
+#define PORT_SetError PORT_SetError_stub
+#define PORT_ZAlloc PORT_ZAlloc_stub
+#define PORT_ZFree PORT_ZFree_stub
+
+#define SECITEM_AllocItem SECITEM_AllocItem_stub
+#define SECITEM_CompareItem SECITEM_CompareItem_stub
+#define SECITEM_CopyItem SECITEM_CopyItem_stub
+#define SECITEM_FreeItem SECITEM_FreeItem_stub
+#define SECITEM_ZfreeItem SECITEM_ZfreeItem_stub
+#define SECOID_FindOIDTag SECOID_FindOIDTag_stub
+#define NSS_SecureMemcmp NSS_SecureMemcmp_stub
+
+#define PR_Assert PR_Assert_stub
+#define PR_Access PR_Access_stub
+#define PR_CallOnce PR_CallOnce_stub
+#define PR_Close PR_Close_stub
+#define PR_DestroyCondVar PR_DestroyCondVar_stub
+#define PR_DestroyLock PR_DestroyLock_stub
+#define PR_Free PR_Free_stub
+#define PR_GetLibraryFilePathname PR_GetLibraryFilePathname_stub
+#define PR_ImportPipe PR_ImportPipe_stub
+#define PR_Lock PR_Lock_stub
+#define PR_NewCondVar PR_NewCondVar_stub
+#define PR_NewLock PR_NewLock_stub
+#define PR_NotifyCondVar PR_NotifyCondVar_stub
+#define PR_NotifyAllCondVar PR_NotifyAllCondVar_stub
+#define PR_Open PR_Open_stub
+#define PR_Read PR_Read_stub
+#define PR_Seek PR_Seek_stub
+#define PR_Sleep PR_Sleep_stub
+#define PR_Unlock PR_Unlock_stub
+#define PR_WaitCondVar PR_WaitCondVar_stub
+#define PR_GetEnvSecure PR_GetEnvSecure_stub
+
+extern int FREEBL_InitStubs(void);
+
+#endif
diff --git a/security/nss/lib/freebl/sysrand.c b/security/nss/lib/freebl/sysrand.c
new file mode 100644
index 000000000..0128fa0ee
--- /dev/null
+++ b/security/nss/lib/freebl/sysrand.c
@@ -0,0 +1,49 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "seccomon.h"
+
+#ifndef XP_WIN
+static size_t rng_systemFromNoise(unsigned char *dest, size_t maxLen);
+#endif
+
+#if defined(XP_UNIX) || defined(XP_BEOS)
+#include "unix_rand.c"
+#endif
+#ifdef XP_WIN
+#include "win_rand.c"
+#endif
+#ifdef XP_OS2
+#include "os2_rand.c"
+#endif
+
+#ifndef XP_WIN
+/*
+ * Normal RNG_SystemRNG() isn't available, use the system noise to collect
+ * the required amount of entropy.
+ */
+static size_t
+rng_systemFromNoise(unsigned char *dest, size_t maxLen)
+{
+    size_t retBytes = maxLen;
+
+    while (maxLen) {
+        size_t nbytes = RNG_GetNoise(dest, maxLen);
+
+        PORT_Assert(nbytes != 0);
+
+        dest += nbytes;
+        maxLen -= nbytes;
+
+        /* some hw op to try to introduce more entropy into the next
+         * RNG_GetNoise call */
+        rng_systemJitter();
+    }
+    return retBytes;
+}
+#endif
diff --git a/security/nss/lib/freebl/tlsprfalg.c b/security/nss/lib/freebl/tlsprfalg.c
new file mode 100644
index 000000000..1e5e67886
--- /dev/null
+++ b/security/nss/lib/freebl/tlsprfalg.c
@@ -0,0 +1,134 @@
+/* tlsprfalg.c - TLS Pseudo Random Function (PRF) implementation
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "hasht.h"
+#include "alghmac.h"
+
+#define PHASH_STATE_MAX_LEN HASH_LENGTH_MAX
+
+/* TLS P_hash function */
+SECStatus
+TLS_P_hash(HASH_HashType hashType, const SECItem *secret, const char *label,
+           SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+    unsigned char state[PHASH_STATE_MAX_LEN];
+    unsigned char outbuf[PHASH_STATE_MAX_LEN];
+    unsigned int state_len = 0, label_len = 0, outbuf_len = 0, chunk_size;
+    unsigned int remaining;
+    unsigned char *res;
+    SECStatus status;
+    HMACContext *cx;
+    SECStatus rv = SECFailure;
+    const SECHashObject *hashObj = HASH_GetRawHashObject(hashType);
+
+    PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len));
+    PORT_Assert((seed != NULL) && (seed->data != NULL));
+    PORT_Assert((result != NULL) && (result->data != NULL));
+
+    remaining = result->len;
+    res = result->data;
+
+    if (label != NULL)
+        label_len = PORT_Strlen(label);
+
+    cx = HMAC_Create(hashObj, secret->data, secret->len, isFIPS);
+    if (cx == NULL)
+        goto loser;
+
+    /* initialize the state = A(1) = HMAC_hash(secret, seed) */
+    HMAC_Begin(cx);
+    HMAC_Update(cx, (unsigned char *)label, label_len);
+    HMAC_Update(cx, seed->data, seed->len);
+    status = HMAC_Finish(cx, state, &state_len, sizeof(state));
+    if (status != SECSuccess)
+        goto loser;
+
+    /* generate a block at a time until we're done */
+    while (remaining > 0) {
+
+        HMAC_Begin(cx);
+        HMAC_Update(cx, state, state_len);
+        if (label_len)
+            HMAC_Update(cx, (unsigned char *)label, label_len);
+        HMAC_Update(cx, seed->data, seed->len);
+        status = HMAC_Finish(cx, outbuf, &outbuf_len, sizeof(outbuf));
+        if (status != SECSuccess)
+            goto loser;
+
+        /* Update the state = A(i) = HMAC_hash(secret, A(i-1)) */
+        HMAC_Begin(cx);
+        HMAC_Update(cx, state, state_len);
+        status = HMAC_Finish(cx, state, &state_len, sizeof(state));
+        if (status != SECSuccess)
+            goto loser;
+
+        chunk_size = PR_MIN(outbuf_len, remaining);
+        PORT_Memcpy(res, &outbuf, chunk_size);
+        res += chunk_size;
+        remaining -= chunk_size;
+    }
+
+    rv = SECSuccess;
+
+loser:
+    /* clear out state so it's not left on the stack */
+    if (cx)
+        HMAC_Destroy(cx, PR_TRUE);
+    PORT_Memset(state, 0, sizeof(state));
+    PORT_Memset(outbuf, 0, sizeof(outbuf));
+    return rv;
+}
+
+SECStatus
+TLS_PRF(const SECItem *secret, const char *label, SECItem *seed,
+        SECItem *result, PRBool isFIPS)
+{
+    SECStatus rv = SECFailure, status;
+    unsigned int i;
+    SECItem tmp = { siBuffer, NULL, 0 };
+    SECItem S1;
+    SECItem S2;
+
+    PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len));
+    PORT_Assert((seed != NULL) && (seed->data != NULL));
+    PORT_Assert((result != NULL) && (result->data != NULL));
+
+    S1.type = siBuffer;
+    S1.len = (secret->len / 2) + (secret->len & 1);
+    S1.data = secret->data;
+
+    S2.type = siBuffer;
+    S2.len = S1.len;
+    S2.data = secret->data + (secret->len - S2.len);
+
+    tmp.data = (unsigned char *)PORT_Alloc(result->len);
+    if (tmp.data == NULL)
+        goto loser;
+    tmp.len = result->len;
+
+    status = TLS_P_hash(HASH_AlgMD5, &S1, label, seed, result, isFIPS);
+    if (status != SECSuccess)
+        goto loser;
+
+    status = TLS_P_hash(HASH_AlgSHA1, &S2, label, seed, &tmp, isFIPS);
+    if (status != SECSuccess)
+        goto loser;
+
+    for (i = 0; i < result->len; i++)
+        result->data[i] ^= tmp.data[i];
+
+    rv = SECSuccess;
+
+loser:
+    if (tmp.data != NULL)
+        PORT_ZFree(tmp.data, tmp.len);
+    return rv;
+}
diff --git a/security/nss/lib/freebl/unix_rand.c b/security/nss/lib/freebl/unix_rand.c
new file mode 100644
index 000000000..ea3b6af3d
--- /dev/null
+++ b/security/nss/lib/freebl/unix_rand.c
@@ -0,0 +1,1176 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include "secrng.h"
+#include "secerr.h"
+#include "prerror.h"
+#include "prthread.h"
+#include "prprf.h"
+#include "prenv.h"
+
+size_t RNG_FileUpdate(const char *fileName, size_t limit);
+
+/*
+ * When copying data to the buffer we want the least signicant bytes
+ * from the input since those bits are changing the fastest. The address
+ * of least significant byte depends upon whether we are running on
+ * a big-endian or little-endian machine.
+ *
+ * Does this mean the least signicant bytes are the most significant
+ * to us? :-)
+ */
+
+static size_t
+CopyLowBits(void *dst, size_t dstlen, void *src, size_t srclen)
+{
+    union endianness {
+        PRInt32 i;
+        char c[4];
+    } u;
+
+    if (srclen <= dstlen) {
+        memcpy(dst, src, srclen);
+        return srclen;
+    }
+    u.i = 0x01020304;
+    if (u.c[0] == 0x01) {
+        /* big-endian case */
+        memcpy(dst, (char *)src + (srclen - dstlen), dstlen);
+    } else {
+        /* little-endian case */
+        memcpy(dst, src, dstlen);
+    }
+    return dstlen;
+}
+
+#ifdef SOLARIS
+
+#include <kstat.h>
+
+static const PRUint32 entropy_buf_len = 4096; /* buffer up to 4 KB */
+
+/* Buffer entropy data, and feed it to the RNG, entropy_buf_len bytes at a time.
+ * Returns error if RNG_RandomUpdate fails. Also increments *total_fed
+ * by the number of bytes successfully buffered.
+ */
+static SECStatus
+BufferEntropy(char *inbuf, PRUint32 inlen,
+              char *entropy_buf, PRUint32 *entropy_buffered,
+              PRUint32 *total_fed)
+{
+    PRUint32 tocopy = 0;
+    PRUint32 avail = 0;
+    SECStatus rv = SECSuccess;
+
+    while (inlen) {
+        avail = entropy_buf_len - *entropy_buffered;
+        if (!avail) {
+            /* Buffer is full, time to feed it to the RNG. */
+            rv = RNG_RandomUpdate(entropy_buf, entropy_buf_len);
+            if (SECSuccess != rv) {
+                break;
+            }
+            *entropy_buffered = 0;
+            avail = entropy_buf_len;
+        }
+        tocopy = PR_MIN(avail, inlen);
+        memcpy(entropy_buf + *entropy_buffered, inbuf, tocopy);
+        *entropy_buffered += tocopy;
+        inlen -= tocopy;
+        inbuf += tocopy;
+        *total_fed += tocopy;
+    }
+    return rv;
+}
+
+/* Feed kernel statistics structures and ks_data field to the RNG.
+ * Returns status as well as the number of bytes successfully fed to the RNG.
+ */
+static SECStatus
+RNG_kstat(PRUint32 *fed)
+{
+    kstat_ctl_t *kc = NULL;
+    kstat_t *ksp = NULL;
+    PRUint32 entropy_buffered = 0;
+    char *entropy_buf = NULL;
+    SECStatus rv = SECSuccess;
+
+    PORT_Assert(fed);
+    if (!fed) {
+        return SECFailure;
+    }
+    *fed = 0;
+
+    kc = kstat_open();
+    PORT_Assert(kc);
+    if (!kc) {
+        return SECFailure;
+    }
+    entropy_buf = (char *)PORT_Alloc(entropy_buf_len);
+    PORT_Assert(entropy_buf);
+    if (entropy_buf) {
+        for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
+            if (-1 == kstat_read(kc, ksp, NULL)) {
+                /* missing data from a single kstat shouldn't be fatal */
+                continue;
+            }
+            rv = BufferEntropy((char *)ksp, sizeof(kstat_t),
+                               entropy_buf, &entropy_buffered,
+                               fed);
+            if (SECSuccess != rv) {
+                break;
+            }
+
+            if (ksp->ks_data && ksp->ks_data_size > 0 && ksp->ks_ndata > 0) {
+                rv = BufferEntropy((char *)ksp->ks_data, ksp->ks_data_size,
+                                   entropy_buf, &entropy_buffered,
+                                   fed);
+                if (SECSuccess != rv) {
+                    break;
+                }
+            }
+        }
+        if (SECSuccess == rv && entropy_buffered) {
+            /* Buffer is not empty, time to feed it to the RNG */
+            rv = RNG_RandomUpdate(entropy_buf, entropy_buffered);
+        }
+        PORT_Free(entropy_buf);
+    } else {
+        rv = SECFailure;
+    }
+    if (kstat_close(kc)) {
+        PORT_Assert(0);
+        rv = SECFailure;
+    }
+    return rv;
+}
+
+#endif
+
+#if defined(SCO) || defined(UNIXWARE) || defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(DARWIN) || defined(OPENBSD) || defined(NTO) || defined(__riscos__)
+#include <sys/times.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    int ticks;
+    struct tms buffer;
+
+    ticks = times(&buffer);
+    return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks));
+}
+
+static void
+GiveSystemInfo(void)
+{
+    long si;
+
+    /*
+     * Is this really necessary?  Why not use rand48 or something?
+     */
+    si = sysconf(_SC_CHILD_MAX);
+    RNG_RandomUpdate(&si, sizeof(si));
+
+    si = sysconf(_SC_STREAM_MAX);
+    RNG_RandomUpdate(&si, sizeof(si));
+
+    si = sysconf(_SC_OPEN_MAX);
+    RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif
+
+#if defined(__sun)
+#if defined(__svr4) || defined(SVR4)
+#include <sys/systeminfo.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static void
+GiveSystemInfo(void)
+{
+    int rv;
+    char buf[2000];
+
+    rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+}
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    hrtime_t t;
+    t = gethrtime();
+    if (t) {
+        return CopyLowBits(buf, maxbytes, &t, sizeof(t));
+    }
+    return 0;
+}
+#else /* SunOS (Sun, but not SVR4) */
+
+extern long sysconf(int name);
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+    long si;
+
+    /* This is not very good */
+    si = sysconf(_SC_CHILD_MAX);
+    RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif
+#endif /* Sun */
+
+#if defined(__hpux)
+#include <sys/unistd.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+#if defined(__ia64)
+#include <ia64/sys/inline.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    PRUint64 t;
+
+    t = _Asm_mov_from_ar(_AREG44);
+    return CopyLowBits(buf, maxbytes, &t, sizeof(t));
+}
+#else
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    extern int ret_cr16();
+    int cr16val;
+
+    cr16val = ret_cr16();
+    return CopyLowBits(buf, maxbytes, &cr16val, sizeof(cr16val));
+}
+#endif
+
+static void
+GiveSystemInfo(void)
+{
+    long si;
+
+    /* This is not very good */
+    si = sysconf(_AES_OS_VERSION);
+    RNG_RandomUpdate(&si, sizeof(si));
+    si = sysconf(_SC_CPU_VERSION);
+    RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif /* HPUX */
+
+#if defined(OSF1)
+#include <sys/types.h>
+#include <sys/sysinfo.h>
+#include <sys/systeminfo.h>
+#include <c_asm.h>
+
+static void
+GiveSystemInfo(void)
+{
+    char buf[BUFSIZ];
+    int rv;
+    int off = 0;
+
+    rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+}
+
+/*
+ * Use the "get the cycle counter" instruction on the alpha.
+ * The low 32 bits completely turn over in less than a minute.
+ * The high 32 bits are some non-counter gunk that changes sometimes.
+ */
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    unsigned long t;
+
+    t = asm("rpcc %v0");
+    return CopyLowBits(buf, maxbytes, &t, sizeof(t));
+}
+
+#endif /* Alpha */
+
+#if defined(_IBMR2)
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+    /* XXX haven't found any yet! */
+}
+#endif /* IBM R2 */
+
+#if defined(LINUX)
+#include <sys/sysinfo.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+#ifndef NO_SYSINFO
+    struct sysinfo si;
+    if (sysinfo(&si) == 0) {
+        RNG_RandomUpdate(&si, sizeof(si));
+    }
+#endif
+}
+#endif /* LINUX */
+
+#if defined(NCR)
+
+#include <sys/utsname.h>
+#include <sys/systeminfo.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+    int rv;
+    char buf[2000];
+
+    rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+}
+
+#endif /* NCR */
+
+#if defined(sgi)
+#include <fcntl.h>
+#undef PRIVATE
+#include <sys/mman.h>
+#include <sys/syssgi.h>
+#include <sys/immu.h>
+#include <sys/systeminfo.h>
+#include <sys/utsname.h>
+#include <wait.h>
+
+static void
+GiveSystemInfo(void)
+{
+    int rv;
+    char buf[4096];
+
+    rv = syssgi(SGI_SYSID, &buf[0]);
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, MAXSYSIDSIZE);
+    }
+#ifdef SGI_RDUBLK
+    rv = syssgi(SGI_RDUBLK, getpid(), &buf[0], sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, sizeof(buf));
+    }
+#endif /* SGI_RDUBLK */
+    rv = syssgi(SGI_INVENT, SGI_INV_READ, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, sizeof(buf));
+    }
+    rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+}
+
+static size_t
+GetHighResClock(void *buf, size_t maxbuf)
+{
+    unsigned phys_addr, raddr, cycleval;
+    static volatile unsigned *iotimer_addr = NULL;
+    static int tries = 0;
+    static int cntr_size;
+    int mfd;
+    long s0[2];
+    struct timeval tv;
+
+#ifndef SGI_CYCLECNTR_SIZE
+#define SGI_CYCLECNTR_SIZE 165 /* Size user needs to use to read CC */
+#endif
+
+    if (iotimer_addr == NULL) {
+        if (tries++ > 1) {
+            /* Don't keep trying if it didn't work */
+            return 0;
+        }
+
+        /*
+        ** For SGI machines we can use the cycle counter, if it has one,
+        ** to generate some truly random numbers
+        */
+        phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval);
+        if (phys_addr) {
+            int pgsz = getpagesize();
+            int pgoffmask = pgsz - 1;
+
+            raddr = phys_addr & ~pgoffmask;
+            mfd = open("/dev/mmem", O_RDONLY);
+            if (mfd < 0) {
+                return 0;
+            }
+            iotimer_addr = (unsigned *)
+                mmap(0, pgoffmask, PROT_READ, MAP_PRIVATE, mfd, (int)raddr);
+            if (iotimer_addr == (void *)-1) {
+                close(mfd);
+                iotimer_addr = NULL;
+                return 0;
+            }
+            iotimer_addr = (unsigned *)((__psint_t)iotimer_addr | (phys_addr & pgoffmask));
+            /*
+             * The file 'mfd' is purposefully not closed.
+             */
+            cntr_size = syssgi(SGI_CYCLECNTR_SIZE);
+            if (cntr_size < 0) {
+                struct utsname utsinfo;
+
+                /*
+                 * We must be executing on a 6.0 or earlier system, since the
+                 * SGI_CYCLECNTR_SIZE call is not supported.
+                 *
+                 * The only pre-6.1 platforms with 64-bit counters are
+                 * IP19 and IP21 (Challenge, PowerChallenge, Onyx).
+                 */
+                uname(&utsinfo);
+                if (!strncmp(utsinfo.machine, "IP19", 4) ||
+                    !strncmp(utsinfo.machine, "IP21", 4))
+                    cntr_size = 64;
+                else
+                    cntr_size = 32;
+            }
+            cntr_size /= 8; /* Convert from bits to bytes */
+        }
+    }
+
+    s0[0] = *iotimer_addr;
+    if (cntr_size > 4)
+        s0[1] = *(iotimer_addr + 1);
+    memcpy(buf, (char *)&s0[0], cntr_size);
+    return CopyLowBits(buf, maxbuf, &s0, cntr_size);
+}
+#endif
+
+#if defined(sony)
+#include <sys/systeminfo.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+    int rv;
+    char buf[2000];
+
+    rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+}
+#endif /* sony */
+
+#if defined(sinix)
+#include <sys/systeminfo.h>
+#include <sys/times.h>
+
+int gettimeofday(struct timeval *, struct timezone *);
+int gethostname(char *, int);
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    int ticks;
+    struct tms buffer;
+
+    ticks = times(&buffer);
+    return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks));
+}
+
+static void
+GiveSystemInfo(void)
+{
+    int rv;
+    char buf[2000];
+
+    rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+}
+#endif /* sinix */
+
+#ifdef BEOS
+#include <be/kernel/OS.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    bigtime_t bigtime; /* Actually a int64 */
+
+    bigtime = real_time_clock_usecs();
+    return CopyLowBits(buf, maxbytes, &bigtime, sizeof(bigtime));
+}
+
+static void
+GiveSystemInfo(void)
+{
+    system_info *info = NULL;
+    PRInt32 val;
+    get_system_info(info);
+    if (info) {
+        val = info->boot_time;
+        RNG_RandomUpdate(&val, sizeof(val));
+        val = info->used_pages;
+        RNG_RandomUpdate(&val, sizeof(val));
+        val = info->used_ports;
+        RNG_RandomUpdate(&val, sizeof(val));
+        val = info->used_threads;
+        RNG_RandomUpdate(&val, sizeof(val));
+        val = info->used_teams;
+        RNG_RandomUpdate(&val, sizeof(val));
+    }
+}
+#endif /* BEOS */
+
+#if defined(nec_ews)
+#include <sys/systeminfo.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+    return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+    int rv;
+    char buf[2000];
+
+    rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+    rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+    if (rv > 0) {
+        RNG_RandomUpdate(buf, rv);
+    }
+}
+#endif /* nec_ews */
+
+size_t
+RNG_GetNoise(void *buf, size_t maxbytes)
+{
+    struct timeval tv;
+    int n = 0;
+    int c;
+
+    n = GetHighResClock(buf, maxbytes);
+    maxbytes -= n;
+
+    (void)gettimeofday(&tv, 0);
+    c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_usec, sizeof(tv.tv_usec));
+    n += c;
+    maxbytes -= c;
+    c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_sec, sizeof(tv.tv_sec));
+    n += c;
+    return n;
+}
+
+#define SAFE_POPEN_MAXARGS 10 /* must be at least 2 */
+
+/*
+ * safe_popen is static to this module and we know what arguments it is
+ * called with. Note that this version only supports a single open child
+ * process at any time.
+ */
+static pid_t safe_popen_pid;
+static struct sigaction oldact;
+
+static FILE *
+safe_popen(char *cmd)
+{
+    int p[2], fd, argc;
+    pid_t pid;
+    char *argv[SAFE_POPEN_MAXARGS + 1];
+    FILE *fp;
+    static char blank[] = " \t";
+    static struct sigaction newact;
+
+    if (pipe(p) < 0)
+        return 0;
+
+    fp = fdopen(p[0], "r");
+    if (fp == 0) {
+        close(p[0]);
+        close(p[1]);
+        return 0;
+    }
+
+    /* Setup signals so that SIGCHLD is ignored as we want to do waitpid */
+    newact.sa_handler = SIG_DFL;
+    newact.sa_flags = 0;
+    sigfillset(&newact.sa_mask);
+    sigaction(SIGCHLD, &newact, &oldact);
+
+    pid = fork();
+    switch (pid) {
+        int ndesc;
+
+        case -1:
+            fclose(fp); /* this closes p[0], the fd associated with fp */
+            close(p[1]);
+            sigaction(SIGCHLD, &oldact, NULL);
+            return 0;
+
+        case 0:
+            /* dup write-side of pipe to stderr and stdout */
+            if (p[1] != 1)
+                dup2(p[1], 1);
+            if (p[1] != 2)
+                dup2(p[1], 2);
+
+            /*
+             * close the other file descriptors, except stdin which we
+             * try reassociating with /dev/null, first (bug 174993)
+             */
+            if (!freopen("/dev/null", "r", stdin))
+                close(0);
+            ndesc = getdtablesize();
+            for (fd = PR_MIN(65536, ndesc); --fd > 2; close(fd))
+                ;
+
+            /* clean up environment in the child process */
+            putenv("PATH=/bin:/usr/bin:/sbin:/usr/sbin:/etc:/usr/etc");
+            putenv("SHELL=/bin/sh");
+            putenv("IFS= \t");
+
+            /*
+             * The caller may have passed us a string that is in text
+             * space. It may be illegal to modify the string
+             */
+            cmd = strdup(cmd);
+            /* format argv */
+            argv[0] = strtok(cmd, blank);
+            argc = 1;
+            while ((argv[argc] = strtok(0, blank)) != 0) {
+                if (++argc == SAFE_POPEN_MAXARGS) {
+                    argv[argc] = 0;
+                    break;
+                }
+            }
+
+            /* and away we go */
+            execvp(argv[0], argv);
+            exit(127);
+            break;
+
+        default:
+            close(p[1]);
+            break;
+    }
+
+    /* non-zero means there's a cmd running */
+    safe_popen_pid = pid;
+    return fp;
+}
+
+static int
+safe_pclose(FILE *fp)
+{
+    pid_t pid;
+    int status = -1, rv;
+
+    if ((pid = safe_popen_pid) == 0)
+        return -1;
+    safe_popen_pid = 0;
+
+    fclose(fp);
+
+    /* yield the processor so the child gets some time to exit normally */
+    PR_Sleep(PR_INTERVAL_NO_WAIT);
+
+    /* if the child hasn't exited, kill it -- we're done with its output */
+    while ((rv = waitpid(pid, &status, WNOHANG)) == -1 && errno == EINTR)
+        ;
+    if (rv == 0) {
+        kill(pid, SIGKILL);
+        while ((rv = waitpid(pid, &status, 0)) == -1 && errno == EINTR)
+            ;
+    }
+
+    /* Reset SIGCHLD signal hander before returning */
+    sigaction(SIGCHLD, &oldact, NULL);
+
+    return status;
+}
+
+#ifdef DARWIN
+#include <TargetConditionals.h>
+#if !TARGET_OS_IPHONE
+#include <crt_externs.h>
+#endif
+#endif
+
+/* Fork netstat to collect its output by default. Do not unset this unless
+ * another source of entropy is available
+ */
+#define DO_NETSTAT 1
+
+void
+RNG_SystemInfoForRNG(void)
+{
+    FILE *fp;
+    char buf[BUFSIZ];
+    size_t bytes;
+    const char *const *cp;
+    char *randfile;
+#ifdef DARWIN
+#if TARGET_OS_IPHONE
+    /* iOS does not expose a way to access environ. */
+    char **environ = NULL;
+#else
+    char **environ = *_NSGetEnviron();
+#endif
+#else
+    extern char **environ;
+#endif
+#ifdef BEOS
+    static const char *const files[] = {
+        "/boot/var/swap",
+        "/boot/var/log/syslog",
+        "/boot/var/tmp",
+        "/boot/home/config/settings",
+        "/boot/home",
+        0
+    };
+#else
+    static const char *const files[] = {
+        "/etc/passwd",
+        "/etc/utmp",
+        "/tmp",
+        "/var/tmp",
+        "/usr/tmp",
+        0
+    };
+#endif
+
+#if defined(BSDI)
+    static char netstat_ni_cmd[] = "netstat -nis";
+#else
+    static char netstat_ni_cmd[] = "netstat -ni";
+#endif
+
+    GiveSystemInfo();
+
+    bytes = RNG_GetNoise(buf, sizeof(buf));
+    RNG_RandomUpdate(buf, bytes);
+
+    /*
+     * Pass the C environment and the addresses of the pointers to the
+     * hash function. This makes the random number function depend on the
+     * execution environment of the user and on the platform the program
+     * is running on.
+     */
+    if (environ != NULL) {
+        cp = (const char *const *)environ;
+        while (*cp) {
+            RNG_RandomUpdate(*cp, strlen(*cp));
+            cp++;
+        }
+        RNG_RandomUpdate(environ, (char *)cp - (char *)environ);
+    }
+
+    /* Give in system information */
+    if (gethostname(buf, sizeof(buf)) == 0) {
+        RNG_RandomUpdate(buf, strlen(buf));
+    }
+    GiveSystemInfo();
+
+    /* grab some data from system's PRNG before any other files. */
+    bytes = RNG_FileUpdate("/dev/urandom", SYSTEM_RNG_SEED_COUNT);
+
+    /* If the user points us to a random file, pass it through the rng */
+    randfile = PR_GetEnvSecure("NSRANDFILE");
+    if ((randfile != NULL) && (randfile[0] != '\0')) {
+        char *randCountString = PR_GetEnvSecure("NSRANDCOUNT");
+        int randCount = randCountString ? atoi(randCountString) : 0;
+        if (randCount != 0) {
+            RNG_FileUpdate(randfile, randCount);
+        } else {
+            RNG_FileForRNG(randfile);
+        }
+    }
+
+    /* pass other files through */
+    for (cp = files; *cp; cp++)
+        RNG_FileForRNG(*cp);
+
+/*
+ * Bug 100447: On BSD/OS 4.2 and 4.3, we have problem calling safe_popen
+ * in a pthreads environment.  Therefore, we call safe_popen last and on
+ * BSD/OS we do not call safe_popen when we succeeded in getting data
+ * from /dev/urandom.
+ *
+ * Bug 174993: On platforms providing /dev/urandom, don't fork netstat
+ * either, if data has been gathered successfully.
+ */
+
+#if defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(OPENBSD) || defined(DARWIN) || defined(LINUX) || defined(HPUX)
+    if (bytes)
+        return;
+#endif
+
+#ifdef SOLARIS
+
+/*
+ * On Solaris, NSS may be initialized automatically from libldap in
+ * applications that are unaware of the use of NSS. safe_popen forks, and
+ * sometimes creates issues with some applications' pthread_atfork handlers.
+ * We always have /dev/urandom on Solaris 9 and above as an entropy source,
+ * and for Solaris 8 we have the libkstat interface, so we don't need to
+ * fork netstat.
+ */
+
+#undef DO_NETSTAT
+    if (!bytes) {
+        /* On Solaris 8, /dev/urandom isn't available, so we use libkstat. */
+        PRUint32 kstat_bytes = 0;
+        if (SECSuccess != RNG_kstat(&kstat_bytes)) {
+            PORT_Assert(0);
+        }
+        bytes += kstat_bytes;
+        PORT_Assert(bytes);
+    }
+#endif
+
+#ifdef DO_NETSTAT
+    fp = safe_popen(netstat_ni_cmd);
+    if (fp != NULL) {
+        while ((bytes = fread(buf, 1, sizeof(buf), fp)) > 0)
+            RNG_RandomUpdate(buf, bytes);
+        safe_pclose(fp);
+    }
+#endif
+}
+
+#define TOTAL_FILE_LIMIT 1000000 /* one million */
+
+size_t
+RNG_FileUpdate(const char *fileName, size_t limit)
+{
+    FILE *file;
+    int fd;
+    int bytes;
+    size_t fileBytes = 0;
+    struct stat stat_buf;
+    unsigned char buffer[BUFSIZ];
+    static size_t totalFileBytes = 0;
+
+    /* suppress valgrind warnings due to holes in struct stat */
+    memset(&stat_buf, 0, sizeof(stat_buf));
+
+    if (stat((char *)fileName, &stat_buf) < 0)
+        return fileBytes;
+    RNG_RandomUpdate(&stat_buf, sizeof(stat_buf));
+
+    file = fopen(fileName, "r");
+    if (file != NULL) {
+        /* Read from the underlying file descriptor directly to bypass stdio
+         * buffering and avoid reading more bytes than we need from
+         * /dev/urandom. NOTE: we can't use fread with unbuffered I/O because
+         * fread may return EOF in unbuffered I/O mode on Android.
+         *
+         * Moreover, we read into a buffer of size BUFSIZ, so buffered I/O
+         * has no performance advantage. */
+        fd = fileno(file);
+        /* 'file' was just opened, so this should not fail. */
+        PORT_Assert(fd != -1);
+        while (limit > fileBytes && fd != -1) {
+            bytes = PR_MIN(sizeof buffer, limit - fileBytes);
+            bytes = read(fd, buffer, bytes);
+            if (bytes <= 0)
+                break;
+            RNG_RandomUpdate(buffer, bytes);
+            fileBytes += bytes;
+            totalFileBytes += bytes;
+            /* after TOTAL_FILE_LIMIT has been reached, only read in first
+            ** buffer of data from each subsequent file.
+            */
+            if (totalFileBytes > TOTAL_FILE_LIMIT)
+                break;
+        }
+        fclose(file);
+    }
+    /*
+     * Pass yet another snapshot of our highest resolution clock into
+     * the hash function.
+     */
+    bytes = RNG_GetNoise(buffer, sizeof(buffer));
+    RNG_RandomUpdate(buffer, bytes);
+    return fileBytes;
+}
+
+void
+RNG_FileForRNG(const char *fileName)
+{
+    RNG_FileUpdate(fileName, TOTAL_FILE_LIMIT);
+}
+
+void
+ReadSingleFile(const char *fileName)
+{
+    FILE *file;
+    unsigned char buffer[BUFSIZ];
+
+    file = fopen(fileName, "rb");
+    if (file != NULL) {
+        while (fread(buffer, 1, sizeof(buffer), file) > 0)
+            ;
+        fclose(file);
+    }
+}
+
+#define _POSIX_PTHREAD_SEMANTICS
+#include <dirent.h>
+
+PRBool
+ReadFileOK(char *dir, char *file)
+{
+    struct stat stat_buf;
+    char filename[PATH_MAX];
+    int count = snprintf(filename, sizeof filename, "%s/%s", dir, file);
+
+    if (count <= 0) {
+        return PR_FALSE; /* name too long, can't read it anyway */
+    }
+
+    if (stat(filename, &stat_buf) < 0)
+        return PR_FALSE; /* can't stat, probably can't read it then as well */
+    return S_ISREG(stat_buf.st_mode) ? PR_TRUE : PR_FALSE;
+}
+
+/*
+ * read one file out of either /etc or the user's home directory.
+ * fileToRead tells which file to read.
+ *
+ * return 1 if it's time to reset the fileToRead (no more files to read).
+ */
+static int
+ReadOneFile(int fileToRead)
+{
+    char *dir = "/etc";
+    DIR *fd = opendir(dir);
+    int resetCount = 0;
+    struct dirent *entry;
+#if defined(__sun)
+    char firstName[256];
+#else
+    char firstName[NAME_MAX + 1];
+#endif
+    const char *name = NULL;
+    int i;
+
+    if (fd == NULL) {
+        dir = PR_GetEnvSecure("HOME");
+        if (dir) {
+            fd = opendir(dir);
+        }
+    }
+    if (fd == NULL) {
+        return 1;
+    }
+
+    firstName[0] = '\0';
+    for (i = 0; i <= fileToRead; i++) {
+        do {
+            /* readdir() isn't guaranteed to be thread safe on every platform;
+             * this code assumes the same directory isn't read concurrently.
+             * This usage is confirmed safe on Linux, see bug 1254334. */
+            entry = readdir(fd);
+        } while (entry != NULL && !ReadFileOK(dir, &entry->d_name[0]));
+        if (entry == NULL) {
+            resetCount = 1; /* read to the end, start again at the beginning */
+            if (firstName[0]) {
+                /* ran out of entries in the directory, use the first one */
+                name = firstName;
+            }
+            break;
+        }
+        name = entry->d_name;
+        if (i == 0) {
+            /* copy the name of the first in case we run out of entries */
+            PORT_Assert(PORT_Strlen(name) < sizeof(firstName));
+            PORT_Strncpy(firstName, name, sizeof(firstName) - 1);
+            firstName[sizeof(firstName) - 1] = '\0';
+        }
+    }
+
+    if (name) {
+        char filename[PATH_MAX];
+        int count = snprintf(filename, sizeof(filename), "%s/%s", dir, name);
+        if (count >= 1) {
+            ReadSingleFile(filename);
+        }
+    }
+
+    closedir(fd);
+    return resetCount;
+}
+
+/*
+ * do something to try to introduce more noise into the 'GetNoise' call
+ */
+static void
+rng_systemJitter(void)
+{
+    static int fileToRead = 1;
+
+    if (ReadOneFile(fileToRead)) {
+        fileToRead = 1;
+    } else {
+        fileToRead++;
+    }
+}
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+    FILE *file;
+    int fd;
+    int bytes;
+    size_t fileBytes = 0;
+    unsigned char *buffer = dest;
+
+    file = fopen("/dev/urandom", "r");
+    if (file == NULL) {
+        return rng_systemFromNoise(dest, maxLen);
+    }
+    /* Read from the underlying file descriptor directly to bypass stdio
+     * buffering and avoid reading more bytes than we need from /dev/urandom.
+     * NOTE: we can't use fread with unbuffered I/O because fread may return
+     * EOF in unbuffered I/O mode on Android.
+     */
+    fd = fileno(file);
+    /* 'file' was just opened, so this should not fail. */
+    PORT_Assert(fd != -1);
+    while (maxLen > fileBytes && fd != -1) {
+        bytes = maxLen - fileBytes;
+        bytes = read(fd, buffer, bytes);
+        if (bytes <= 0)
+            break;
+        fileBytes += bytes;
+        buffer += bytes;
+    }
+    fclose(file);
+    if (fileBytes != maxLen) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM); /* system RNG failed */
+        fileBytes = 0;
+    }
+    return fileBytes;
+}
diff --git a/security/nss/lib/freebl/win_rand.c b/security/nss/lib/freebl/win_rand.c
new file mode 100644
index 000000000..b863776d2
--- /dev/null
+++ b/security/nss/lib/freebl/win_rand.c
@@ -0,0 +1,161 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "secrng.h"
+
+#ifdef XP_WIN
+#include <windows.h>
+#include <time.h>
+
+static BOOL
+CurrentClockTickTime(LPDWORD lpdwHigh, LPDWORD lpdwLow)
+{
+    LARGE_INTEGER liCount;
+
+    if (!QueryPerformanceCounter(&liCount))
+        return FALSE;
+
+    *lpdwHigh = liCount.u.HighPart;
+    *lpdwLow = liCount.u.LowPart;
+    return TRUE;
+}
+
+size_t
+RNG_GetNoise(void *buf, size_t maxbuf)
+{
+    DWORD dwHigh, dwLow, dwVal;
+    int n = 0;
+    int nBytes;
+    time_t sTime;
+
+    if (maxbuf <= 0)
+        return 0;
+
+    CurrentClockTickTime(&dwHigh, &dwLow);
+
+    // get the maximally changing bits first
+    nBytes = sizeof(dwLow) > maxbuf ? maxbuf : sizeof(dwLow);
+    memcpy((char *)buf, &dwLow, nBytes);
+    n += nBytes;
+    maxbuf -= nBytes;
+
+    if (maxbuf <= 0)
+        return n;
+
+    nBytes = sizeof(dwHigh) > maxbuf ? maxbuf : sizeof(dwHigh);
+    memcpy(((char *)buf) + n, &dwHigh, nBytes);
+    n += nBytes;
+    maxbuf -= nBytes;
+
+    if (maxbuf <= 0)
+        return n;
+
+    // get the number of milliseconds that have elapsed since Windows started
+    dwVal = GetTickCount();
+
+    nBytes = sizeof(dwVal) > maxbuf ? maxbuf : sizeof(dwVal);
+    memcpy(((char *)buf) + n, &dwVal, nBytes);
+    n += nBytes;
+    maxbuf -= nBytes;
+
+    if (maxbuf <= 0)
+        return n;
+
+    // get the time in seconds since midnight Jan 1, 1970
+    time(&sTime);
+    nBytes = sizeof(sTime) > maxbuf ? maxbuf : sizeof(sTime);
+    memcpy(((char *)buf) + n, &sTime, nBytes);
+    n += nBytes;
+
+    return n;
+}
+
+void
+RNG_SystemInfoForRNG(void)
+{
+    DWORD dwVal;
+    char buffer[256];
+    int nBytes;
+    MEMORYSTATUS sMem;
+    HANDLE hVal;
+    DWORD dwSerialNum;
+    DWORD dwComponentLen;
+    DWORD dwSysFlags;
+    char volName[128];
+    DWORD dwSectors, dwBytes, dwFreeClusters, dwNumClusters;
+
+    nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes
+    RNG_RandomUpdate(buffer, nBytes);
+
+    sMem.dwLength = sizeof(sMem);
+    GlobalMemoryStatus(&sMem); // assorted memory stats
+    RNG_RandomUpdate(&sMem, sizeof(sMem));
+
+    dwVal = GetLogicalDrives();
+    RNG_RandomUpdate(&dwVal, sizeof(dwVal)); // bitfields in bits 0-25
+
+    dwVal = sizeof(buffer);
+    if (GetComputerName(buffer, &dwVal))
+        RNG_RandomUpdate(buffer, dwVal);
+
+    hVal = GetCurrentProcess(); // 4 or 8 byte pseudo handle (a
+                                // constant!) of current process
+    RNG_RandomUpdate(&hVal, sizeof(hVal));
+
+    dwVal = GetCurrentProcessId(); // process ID (4 bytes)
+    RNG_RandomUpdate(&dwVal, sizeof(dwVal));
+
+    dwVal = GetCurrentThreadId(); // thread ID (4 bytes)
+    RNG_RandomUpdate(&dwVal, sizeof(dwVal));
+
+    volName[0] = '\0';
+    buffer[0] = '\0';
+    GetVolumeInformation(NULL,
+                         volName,
+                         sizeof(volName),
+                         &dwSerialNum,
+                         &dwComponentLen,
+                         &dwSysFlags,
+                         buffer,
+                         sizeof(buffer));
+
+    RNG_RandomUpdate(volName, strlen(volName));
+    RNG_RandomUpdate(&dwSerialNum, sizeof(dwSerialNum));
+    RNG_RandomUpdate(&dwComponentLen, sizeof(dwComponentLen));
+    RNG_RandomUpdate(&dwSysFlags, sizeof(dwSysFlags));
+    RNG_RandomUpdate(buffer, strlen(buffer));
+
+    if (GetDiskFreeSpace(NULL, &dwSectors, &dwBytes, &dwFreeClusters,
+                         &dwNumClusters)) {
+        RNG_RandomUpdate(&dwSectors, sizeof(dwSectors));
+        RNG_RandomUpdate(&dwBytes, sizeof(dwBytes));
+        RNG_RandomUpdate(&dwFreeClusters, sizeof(dwFreeClusters));
+        RNG_RandomUpdate(&dwNumClusters, sizeof(dwNumClusters));
+    }
+
+    nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes
+    RNG_RandomUpdate(buffer, nBytes);
+}
+
+/*
+ * The RtlGenRandom function is declared in <ntsecapi.h>, but the
+ * declaration is missing a calling convention specifier. So we
+ * declare it manually here.
+ */
+#define RtlGenRandom SystemFunction036
+DECLSPEC_IMPORT BOOLEAN WINAPI RtlGenRandom(
+    PVOID RandomBuffer,
+    ULONG RandomBufferLength);
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+    size_t bytes = 0;
+
+    if (RtlGenRandom(dest, maxLen)) {
+        bytes = maxLen;
+    }
+    return bytes;
+}
+#endif /* is XP_WIN */
-- 
cgit v1.2.3