1 files changed, 183 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c
new file mode 100644
index 000000000..94e86eedb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpvalpha.c
@@ -0,0 +1,183 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include <c_asm.h>
+
+#define MP_MUL_DxD(a, b, Phi, Plo)              \
+    {                                           \
+        Plo = asm("mulq %a0, %a1, %v0", a, b);  \
+        Phi = asm("umulh %a0, %a1, %v0", a, b); \
+    }
+
+/* This is empty for the loop in s_mpv_mul_d    */
+#define CARRY_ADD
+
+#define ONE_MUL                     \
+    a_i = *a++;                     \
+    MP_MUL_DxD(a_i, b, a1b1, a0b0); \
+    a0b0 += carry;                  \
+    if (a0b0 < carry)               \
+        ++a1b1;                     \
+    CARRY_ADD                       \
+    *c++ = a0b0;                    \
+    carry = a1b1;
+
+#define FOUR_MUL \
+    ONE_MUL      \
+    ONE_MUL      \
+    ONE_MUL      \
+    ONE_MUL
+
+#define SIXTEEN_MUL \
+    FOUR_MUL        \
+    FOUR_MUL        \
+    FOUR_MUL        \
+    FOUR_MUL
+
+#define THIRTYTWO_MUL \
+    SIXTEEN_MUL       \
+    SIXTEEN_MUL
+
+#define ONETWENTYEIGHT_MUL \
+    THIRTYTWO_MUL          \
+    THIRTYTWO_MUL          \
+    THIRTYTWO_MUL          \
+    THIRTYTWO_MUL
+
+#define EXPAND_256(CALL)                     \
+    mp_digit carry = 0;                      \
+    mp_digit a_i;                            \
+    mp_digit a0b0, a1b1;                     \
+    if (a_len & 255) {                       \
+        if (a_len & 1) {                     \
+            ONE_MUL                          \
+        }                                    \
+        if (a_len & 2) {                     \
+            ONE_MUL                          \
+            ONE_MUL                          \
+        }                                    \
+        if (a_len & 4) {                     \
+            FOUR_MUL                         \
+        }                                    \
+        if (a_len & 8) {                     \
+            FOUR_MUL                         \
+            FOUR_MUL                         \
+        }                                    \
+        if (a_len & 16) {                    \
+            SIXTEEN_MUL                      \
+        }                                    \
+        if (a_len & 32) {                    \
+            THIRTYTWO_MUL                    \
+        }                                    \
+        if (a_len & 64) {                    \
+            THIRTYTWO_MUL                    \
+            THIRTYTWO_MUL                    \
+        }                                    \
+        if (a_len & 128) {                   \
+            ONETWENTYEIGHT_MUL               \
+        }                                    \
+        a_len = a_len & (-256);              \
+    }                                        \
+    if (a_len >= 256) {                      \
+        carry = CALL(a, a_len, b, c, carry); \
+        c += a_len;                          \
+    }
+
+#define FUNC_NAME(NAME)                    \
+    mp_digit NAME(const mp_digit *a,       \
+                  mp_size a_len,           \
+                  mp_digit b, mp_digit *c, \
+                  mp_digit carry)
+
+#define DECLARE_MUL_256(FNAME) \
+    FUNC_NAME(FNAME)           \
+    {                          \
+        mp_digit a_i;          \
+        mp_digit a0b0, a1b1;   \
+        while (a_len) {        \
+            ONETWENTYEIGHT_MUL \
+            ONETWENTYEIGHT_MUL \
+            a_len -= 256;      \
+        }                      \
+        return carry;          \
+    }
+
+/* Expanding the loop in s_mpv_mul_d appeared to slow down the
+   (admittedly) small number of tests (i.e., timetest) used to
+   measure performance, so this define disables that optimization. */
+#define DO_NOT_EXPAND 1
+
+/* Need forward declaration so it can be instantiated after
+   the routine that uses it; this helps locality somewhat  */
+#if !defined(DO_NOT_EXPAND)
+FUNC_NAME(s_mpv_mul_d_MUL256);
+#endif
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+            mp_digit b, mp_digit *c)
+{
+#if defined(DO_NOT_EXPAND)
+    mp_digit carry = 0;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        if (a0b0 < carry)
+            ++a1b1;
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+#else
+    EXPAND_256(s_mpv_mul_d_MUL256)
+#endif
+    *c = carry;
+}
+
+#if !defined(DO_NOT_EXPAND)
+DECLARE_MUL_256(s_mpv_mul_d_MUL256)
+#endif
+
+#undef CARRY_ADD
+/* This is redefined for the loop in s_mpv_mul_d_add */
+#define CARRY_ADD     \
+    a0b0 += a_i = *c; \
+    if (a0b0 < a_i)   \
+        ++a1b1;
+
+/* Need forward declaration so it can be instantiated between the
+   two routines that use it; this helps locality somewhat  */
+FUNC_NAME(s_mpv_mul_d_add_MUL256);
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+                mp_digit b, mp_digit *c)
+{
+    EXPAND_256(s_mpv_mul_d_add_MUL256)
+    *c = carry;
+}
+
+/* Instantiate multiply 256 routine here */
+DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+                     mp_digit b, mp_digit *c)
+{
+    EXPAND_256(s_mpv_mul_d_add_MUL256)
+    while (carry) {
+        mp_digit c_i = *c;
+        carry += c_i;
+        *c++ = carry;
+        carry = carry < c_i;
+    }
+}