From df9477dfa60ebb5d31bc142e58ce46535c17abce Mon Sep 17 00:00:00 2001
From: trav90 <travawine@palemoon.org>
Date: Wed, 17 Oct 2018 05:59:08 -0500
Subject: Update aom to slightly newer commit ID

---
 third_party/aom/test/hiprec_convolve_test_util.cc | 188 ++++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100644 third_party/aom/test/hiprec_convolve_test_util.cc

(limited to 'third_party/aom/test/hiprec_convolve_test_util.cc')

diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
new file mode 100644
index 000000000..d53384c5b
--- /dev/null
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/hiprec_convolve_test_util.h"
+
+#include "av1/common/restoration.h"
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+
+namespace libaom_test {
+
+// Generate a random pair of filter kernels, using the ranges
+// of possible values from the loop-restoration experiment
+static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
+                             InterpKernel vkernel) {
+  hkernel[0] = hkernel[6] =
+      WIENER_FILT_TAP0_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
+  hkernel[1] = hkernel[5] =
+      WIENER_FILT_TAP1_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
+  hkernel[2] = hkernel[4] =
+      WIENER_FILT_TAP2_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
+  hkernel[3] = -(hkernel[0] + hkernel[1] + hkernel[2]);
+  hkernel[7] = 0;
+
+  vkernel[0] = vkernel[6] =
+      WIENER_FILT_TAP0_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
+  vkernel[1] = vkernel[5] =
+      WIENER_FILT_TAP1_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
+  vkernel[2] = vkernel[4] =
+      WIENER_FILT_TAP2_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
+  vkernel[3] = -(vkernel[0] + vkernel[1] + vkernel[2]);
+  vkernel[7] = 0;
+}
+
+namespace AV1HiprecConvolve {
+
+::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
+    hiprec_convolve_func filter) {
+  const HiprecConvolveParam params[] = {
+    make_tuple(8, 8, 50000, filter), make_tuple(64, 64, 1000, filter),
+    make_tuple(32, 8, 10000, filter),
+  };
+  return ::testing::ValuesIn(params);
+}
+
+AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
+void AV1HiprecConvolveTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HiprecConvolveTest::TearDown() { libaom_test::ClearSystemState(); }
+
+void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  int i, j;
+
+  uint8_t *input_ = new uint8_t[h * w];
+  uint8_t *input = input_;
+
+  // The convolve functions always write rows with widths that are multiples of
+  // 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  uint8_t *output = new uint8_t[output_n];
+  uint8_t *output2 = new uint8_t[output_n];
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+
+  for (i = 0; i < num_iters; ++i) {
+    // Choose random locations within the source block
+    int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7);
+    int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7);
+    aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output,
+                                out_w, hkernel, 16, vkernel, 16, out_w, out_h);
+    test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
+              vkernel, 16, out_w, out_h);
+
+    for (j = 0; j < out_w * out_h; ++j)
+      ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j
+                                       << " = (" << (j % out_w) << ", "
+                                       << (j / out_w) << ") on iteration " << i;
+  }
+  delete[] input_;
+  delete[] output;
+  delete[] output2;
+}
+}  // namespace AV1HiprecConvolve
+
+#if CONFIG_HIGHBITDEPTH
+namespace AV1HighbdHiprecConvolve {
+
+::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
+    highbd_hiprec_convolve_func filter) {
+  const HighbdHiprecConvolveParam params[] = {
+    make_tuple(8, 8, 50000, 8, filter),   make_tuple(64, 64, 1000, 8, filter),
+    make_tuple(32, 8, 10000, 8, filter),  make_tuple(8, 8, 50000, 10, filter),
+    make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
+    make_tuple(8, 8, 50000, 12, filter),  make_tuple(64, 64, 1000, 12, filter),
+    make_tuple(32, 8, 10000, 12, filter),
+  };
+  return ::testing::ValuesIn(params);
+}
+
+AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
+void AV1HighbdHiprecConvolveTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HighbdHiprecConvolveTest::TearDown() {
+  libaom_test::ClearSystemState();
+}
+
+void AV1HighbdHiprecConvolveTest::RunCheckOutput(
+    highbd_hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  const int bd = GET_PARAM(3);
+  int i, j;
+
+  uint16_t *input = new uint16_t[h * w];
+
+  // The convolve functions always write rows with widths that are multiples of
+  // 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  uint16_t *output = new uint16_t[output_n];
+  uint16_t *output2 = new uint16_t[output_n];
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+
+  uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
+  uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
+  uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
+
+  for (i = 0; i < num_iters; ++i) {
+    // Choose random locations within the source block
+    int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7);
+    int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7);
+    aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w,
+                                       output_ptr, out_w, hkernel, 16, vkernel,
+                                       16, out_w, out_h, bd);
+    test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
+              hkernel, 16, vkernel, 16, out_w, out_h, bd);
+
+    for (j = 0; j < out_w * out_h; ++j)
+      ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j
+                                       << " = (" << (j % out_w) << ", "
+                                       << (j / out_w) << ") on iteration " << i;
+  }
+  delete[] input;
+  delete[] output;
+  delete[] output2;
+}
+}  // namespace AV1HighbdHiprecConvolve
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace libaom_test
-- 
cgit v1.2.3


From 7369c7d7a5eed32963d8af37658286617919f91c Mon Sep 17 00:00:00 2001
From: trav90 <travawine@palemoon.org>
Date: Thu, 18 Oct 2018 06:04:57 -0500
Subject: Update aom to commit id f5bdeac22930ff4c6b219be49c843db35970b918

---
 third_party/aom/test/hiprec_convolve_test_util.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'third_party/aom/test/hiprec_convolve_test_util.cc')

diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
index d53384c5b..f5661ec07 100644
--- a/third_party/aom/test/hiprec_convolve_test_util.cc
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -92,8 +92,8 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
 
   for (i = 0; i < num_iters; ++i) {
     // Choose random locations within the source block
-    int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7);
-    int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7);
+    int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+    int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
     aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output,
                                 out_w, hkernel, 16, vkernel, 16, out_w, out_h);
     test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
@@ -166,8 +166,8 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
 
   for (i = 0; i < num_iters; ++i) {
     // Choose random locations within the source block
-    int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7);
-    int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7);
+    int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+    int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
     aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w,
                                        output_ptr, out_w, hkernel, 16, vkernel,
                                        16, out_w, out_h, bd);
-- 
cgit v1.2.3


From ec910d81405c736a4490383a250299a7837c2e64 Mon Sep 17 00:00:00 2001
From: trav90 <travawine@palemoon.org>
Date: Thu, 18 Oct 2018 21:53:44 -0500
Subject: Update aom to commit id e87fb2378f01103d5d6e477a4ef6892dc714e614

---
 third_party/aom/test/hiprec_convolve_test_util.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'third_party/aom/test/hiprec_convolve_test_util.cc')

diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
index f5661ec07..4dee6ab4d 100644
--- a/third_party/aom/test/hiprec_convolve_test_util.cc
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -100,9 +100,9 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
               vkernel, 16, out_w, out_h);
 
     for (j = 0; j < out_w * out_h; ++j)
-      ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j
-                                       << " = (" << (j % out_w) << ", "
-                                       << (j / out_w) << ") on iteration " << i;
+      ASSERT_EQ(output[j], output2[j])
+          << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
+          << (j / out_w) << ") on iteration " << i;
   }
   delete[] input_;
   delete[] output;
@@ -175,9 +175,9 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
               hkernel, 16, vkernel, 16, out_w, out_h, bd);
 
     for (j = 0; j < out_w * out_h; ++j)
-      ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j
-                                       << " = (" << (j % out_w) << ", "
-                                       << (j / out_w) << ") on iteration " << i;
+      ASSERT_EQ(output[j], output2[j])
+          << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
+          << (j / out_w) << ") on iteration " << i;
   }
   delete[] input;
   delete[] output;
-- 
cgit v1.2.3


From bbcc64772580c8a979288791afa02d30bc476d2e Mon Sep 17 00:00:00 2001
From: trav90 <travawine@palemoon.org>
Date: Fri, 19 Oct 2018 21:52:15 -0500
Subject: Update aom to v1.0.0

Update aom to commit id d14c5bb4f336ef1842046089849dee4a301fbbf0.
---
 third_party/aom/test/hiprec_convolve_test_util.cc | 185 +++++++++++++++++++---
 1 file changed, 164 insertions(+), 21 deletions(-)

(limited to 'third_party/aom/test/hiprec_convolve_test_util.cc')

diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
index 4dee6ab4d..2672bcec3 100644
--- a/third_party/aom/test/hiprec_convolve_test_util.cc
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -13,8 +13,8 @@
 
 #include "av1/common/restoration.h"
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 
 namespace libaom_test {
 
@@ -52,8 +52,13 @@ namespace AV1HiprecConvolve {
 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
     hiprec_convolve_func filter) {
   const HiprecConvolveParam params[] = {
-    make_tuple(8, 8, 50000, filter), make_tuple(64, 64, 1000, filter),
-    make_tuple(32, 8, 10000, filter),
+    make_tuple(8, 8, 50000, filter),   make_tuple(8, 4, 50000, filter),
+    make_tuple(64, 24, 1000, filter),  make_tuple(64, 64, 1000, filter),
+    make_tuple(64, 56, 1000, filter),  make_tuple(32, 8, 10000, filter),
+    make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
+    make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
+    make_tuple(64, 34, 1000, filter),  make_tuple(8, 17, 10000, filter),
+    make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
   };
   return ::testing::ValuesIn(params);
 }
@@ -70,14 +75,15 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
   const int num_iters = GET_PARAM(2);
   int i, j;
+  const ConvolveParams conv_params = get_conv_params_wiener(8);
 
   uint8_t *input_ = new uint8_t[h * w];
   uint8_t *input = input_;
 
-  // The convolve functions always write rows with widths that are multiples of
-  // 8.
-  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
-  int output_n = ((out_w + 7) & ~7) * out_h;
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
   uint8_t *output = new uint8_t[output_n];
   uint8_t *output2 = new uint8_t[output_n];
 
@@ -94,10 +100,11 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
     // Choose random locations within the source block
     int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
     int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-    aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output,
-                                out_w, hkernel, 16, vkernel, 16, out_w, out_h);
+    av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output,
+                                  out_w, hkernel, 16, vkernel, 16, out_w, out_h,
+                                  &conv_params);
     test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
-              vkernel, 16, out_w, out_h);
+              vkernel, 16, out_w, out_h, &conv_params);
 
     for (j = 0; j < out_w * out_h; ++j)
       ASSERT_EQ(output[j], output2[j])
@@ -108,9 +115,74 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
   delete[] output;
   delete[] output2;
 }
+
+void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2) / 500;
+  int i, j, k;
+  const ConvolveParams conv_params = get_conv_params_wiener(8);
+
+  uint8_t *input_ = new uint8_t[h * w];
+  uint8_t *input = input_;
+
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+  uint8_t *output = new uint8_t[output_n];
+  uint8_t *output2 = new uint8_t[output_n];
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+
+  aom_usec_timer ref_timer;
+  aom_usec_timer_start(&ref_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w,
+                                      hkernel, 16, vkernel, 16, out_w, out_h,
+                                      &conv_params);
+      }
+    }
+  }
+  aom_usec_timer_mark(&ref_timer);
+  const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer tst_timer;
+  aom_usec_timer_start(&tst_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel,
+                  16, out_w, out_h, &conv_params);
+      }
+    }
+  }
+  aom_usec_timer_mark(&tst_timer);
+  const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+  std::cout << "[          ] C time = " << ref_time / 1000
+            << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+  EXPECT_GT(ref_time, tst_time)
+      << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
+      << "C time: " << ref_time << " us\n"
+      << "SIMD time: " << tst_time << " us\n";
+
+  delete[] input_;
+  delete[] output;
+  delete[] output2;
+}
 }  // namespace AV1HiprecConvolve
 
-#if CONFIG_HIGHBITDEPTH
 namespace AV1HighbdHiprecConvolve {
 
 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
@@ -141,13 +213,14 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
   const int num_iters = GET_PARAM(2);
   const int bd = GET_PARAM(3);
   int i, j;
+  const ConvolveParams conv_params = get_conv_params_wiener(bd);
 
   uint16_t *input = new uint16_t[h * w];
 
-  // The convolve functions always write rows with widths that are multiples of
-  // 8.
-  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
-  int output_n = ((out_w + 7) & ~7) * out_h;
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
   uint16_t *output = new uint16_t[output_n];
   uint16_t *output2 = new uint16_t[output_n];
 
@@ -168,11 +241,11 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
     // Choose random locations within the source block
     int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
     int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-    aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w,
-                                       output_ptr, out_w, hkernel, 16, vkernel,
-                                       16, out_w, out_h, bd);
+    av1_highbd_wiener_convolve_add_src_c(
+        input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16,
+        vkernel, 16, out_w, out_h, &conv_params, bd);
     test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
-              hkernel, 16, vkernel, 16, out_w, out_h, bd);
+              hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
 
     for (j = 0; j < out_w * out_h; ++j)
       ASSERT_EQ(output[j], output2[j])
@@ -183,6 +256,76 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
   delete[] output;
   delete[] output2;
 }
+
+void AV1HighbdHiprecConvolveTest::RunSpeedTest(
+    highbd_hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2) / 500;
+  const int bd = GET_PARAM(3);
+  int i, j, k;
+  const ConvolveParams conv_params = get_conv_params_wiener(bd);
+
+  uint16_t *input = new uint16_t[h * w];
+
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+  uint16_t *output = new uint16_t[output_n];
+  uint16_t *output2 = new uint16_t[output_n];
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+
+  uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
+  uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
+  uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
+
+  aom_usec_timer ref_timer;
+  aom_usec_timer_start(&ref_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        av1_highbd_wiener_convolve_add_src_c(
+            input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
+            16, out_w, out_h, &conv_params, bd);
+      }
+    }
+  }
+  aom_usec_timer_mark(&ref_timer);
+  const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer tst_timer;
+  aom_usec_timer_start(&tst_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
+                  vkernel, 16, out_w, out_h, &conv_params, bd);
+      }
+    }
+  }
+  aom_usec_timer_mark(&tst_timer);
+  const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+  std::cout << "[          ] C time = " << ref_time / 1000
+            << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+  EXPECT_GT(ref_time, tst_time)
+      << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
+      << "C time: " << ref_time << " us\n"
+      << "SIMD time: " << tst_time << " us\n";
+
+  delete[] input;
+  delete[] output;
+  delete[] output2;
+}
 }  // namespace AV1HighbdHiprecConvolve
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace libaom_test
-- 
cgit v1.2.3