From df9477dfa60ebb5d31bc142e58ce46535c17abce Mon Sep 17 00:00:00 2001 From: trav90 Date: Wed, 17 Oct 2018 05:59:08 -0500 Subject: Update aom to slightly newer commit ID --- third_party/aom/test/hiprec_convolve_test_util.cc | 188 ++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 third_party/aom/test/hiprec_convolve_test_util.cc (limited to 'third_party/aom/test/hiprec_convolve_test_util.cc') diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc new file mode 100644 index 000000000..d53384c5b --- /dev/null +++ b/third_party/aom/test/hiprec_convolve_test_util.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/hiprec_convolve_test_util.h" + +#include "av1/common/restoration.h" + +using std::tr1::tuple; +using std::tr1::make_tuple; + +namespace libaom_test { + +// Generate a random pair of filter kernels, using the ranges +// of possible values from the loop-restoration experiment +static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel, + InterpKernel vkernel) { + hkernel[0] = hkernel[6] = + WIENER_FILT_TAP0_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV); + hkernel[1] = hkernel[5] = + WIENER_FILT_TAP1_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV); + hkernel[2] = hkernel[4] = + WIENER_FILT_TAP2_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV); + hkernel[3] = -(hkernel[0] + hkernel[1] + hkernel[2]); + hkernel[7] = 0; + + vkernel[0] = vkernel[6] = + WIENER_FILT_TAP0_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV); + vkernel[1] = vkernel[5] = + WIENER_FILT_TAP1_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV); + vkernel[2] = vkernel[4] = + WIENER_FILT_TAP2_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV); + vkernel[3] = -(vkernel[0] + vkernel[1] + vkernel[2]); + vkernel[7] = 0; +} + +namespace AV1HiprecConvolve { + +::testing::internal::ParamGenerator BuildParams( + hiprec_convolve_func filter) { + const HiprecConvolveParam params[] = { + make_tuple(8, 8, 50000, filter), make_tuple(64, 64, 1000, filter), + make_tuple(32, 8, 10000, filter), + }; + return ::testing::ValuesIn(params); +} + +AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {} +void AV1HiprecConvolveTest::SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); +} + +void AV1HiprecConvolveTest::TearDown() { libaom_test::ClearSystemState(); } + +void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2); + int i, j; + + uint8_t *input_ = new uint8_t[h * w]; + uint8_t *input = input_; + + // The convolve functions always write rows with widths that are multiples of + // 8. + // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. + int output_n = ((out_w + 7) & ~7) * out_h; + uint8_t *output = new uint8_t[output_n]; + uint8_t *output2 = new uint8_t[output_n]; + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + generate_kernels(&rnd_, hkernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8(); + + for (i = 0; i < num_iters; ++i) { + // Choose random locations within the source block + int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7); + int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7); + aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output, + out_w, hkernel, 16, vkernel, 16, out_w, out_h); + test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16, + vkernel, 16, out_w, out_h); + + for (j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j + << " = (" << (j % out_w) << ", " + << (j / out_w) << ") on iteration " << i; + } + delete[] input_; + delete[] output; + delete[] output2; +} +} // namespace AV1HiprecConvolve + +#if CONFIG_HIGHBITDEPTH +namespace AV1HighbdHiprecConvolve { + +::testing::internal::ParamGenerator BuildParams( + highbd_hiprec_convolve_func filter) { + const HighbdHiprecConvolveParam params[] = { + make_tuple(8, 8, 50000, 8, filter), make_tuple(64, 64, 1000, 8, filter), + make_tuple(32, 8, 10000, 8, filter), make_tuple(8, 8, 50000, 10, filter), + make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter), + make_tuple(8, 8, 50000, 12, filter), make_tuple(64, 64, 1000, 12, filter), + make_tuple(32, 8, 10000, 12, filter), + }; + return ::testing::ValuesIn(params); +} + +AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {} +void AV1HighbdHiprecConvolveTest::SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); +} + +void AV1HighbdHiprecConvolveTest::TearDown() { + libaom_test::ClearSystemState(); +} + +void AV1HighbdHiprecConvolveTest::RunCheckOutput( + highbd_hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2); + const int bd = GET_PARAM(3); + int i, j; + + uint16_t *input = new uint16_t[h * w]; + + // The convolve functions always write rows with widths that are multiples of + // 8. + // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. + int output_n = ((out_w + 7) & ~7) * out_h; + uint16_t *output = new uint16_t[output_n]; + uint16_t *output2 = new uint16_t[output_n]; + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + generate_kernels(&rnd_, hkernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + + uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input); + uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output); + uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2); + + for (i = 0; i < num_iters; ++i) { + // Choose random locations within the source block + int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7); + int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7); + aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w, + output_ptr, out_w, hkernel, 16, vkernel, + 16, out_w, out_h, bd); + test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w, + hkernel, 16, vkernel, 16, out_w, out_h, bd); + + for (j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j + << " = (" << (j % out_w) << ", " + << (j / out_w) << ") on iteration " << i; + } + delete[] input; + delete[] output; + delete[] output2; +} +} // namespace AV1HighbdHiprecConvolve +#endif // CONFIG_HIGHBITDEPTH +} // namespace libaom_test -- cgit v1.2.3 From 7369c7d7a5eed32963d8af37658286617919f91c Mon Sep 17 00:00:00 2001 From: trav90 Date: Thu, 18 Oct 2018 06:04:57 -0500 Subject: Update aom to commit id f5bdeac22930ff4c6b219be49c843db35970b918 --- third_party/aom/test/hiprec_convolve_test_util.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'third_party/aom/test/hiprec_convolve_test_util.cc') diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc index d53384c5b..f5661ec07 100644 --- a/third_party/aom/test/hiprec_convolve_test_util.cc +++ b/third_party/aom/test/hiprec_convolve_test_util.cc @@ -92,8 +92,8 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { for (i = 0; i < num_iters; ++i) { // Choose random locations within the source block - int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7); - int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7); + int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); + int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output, out_w, hkernel, 16, vkernel, 16, out_w, out_h); test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16, @@ -166,8 +166,8 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput( for (i = 0; i < num_iters; ++i) { // Choose random locations within the source block - int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7); - int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7); + int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); + int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16, vkernel, 16, out_w, out_h, bd); -- cgit v1.2.3 From ec910d81405c736a4490383a250299a7837c2e64 Mon Sep 17 00:00:00 2001 From: trav90 Date: Thu, 18 Oct 2018 21:53:44 -0500 Subject: Update aom to commit id e87fb2378f01103d5d6e477a4ef6892dc714e614 --- third_party/aom/test/hiprec_convolve_test_util.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'third_party/aom/test/hiprec_convolve_test_util.cc') diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc index f5661ec07..4dee6ab4d 100644 --- a/third_party/aom/test/hiprec_convolve_test_util.cc +++ b/third_party/aom/test/hiprec_convolve_test_util.cc @@ -100,9 +100,9 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { vkernel, 16, out_w, out_h); for (j = 0; j < out_w * out_h; ++j) - ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j - << " = (" << (j % out_w) << ", " - << (j / out_w) << ") on iteration " << i; + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", " + << (j / out_w) << ") on iteration " << i; } delete[] input_; delete[] output; @@ -175,9 +175,9 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput( hkernel, 16, vkernel, 16, out_w, out_h, bd); for (j = 0; j < out_w * out_h; ++j) - ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j - << " = (" << (j % out_w) << ", " - << (j / out_w) << ") on iteration " << i; + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", " + << (j / out_w) << ") on iteration " << i; } delete[] input; delete[] output; -- cgit v1.2.3 From bbcc64772580c8a979288791afa02d30bc476d2e Mon Sep 17 00:00:00 2001 From: trav90 Date: Fri, 19 Oct 2018 21:52:15 -0500 Subject: Update aom to v1.0.0 Update aom to commit id d14c5bb4f336ef1842046089849dee4a301fbbf0. --- third_party/aom/test/hiprec_convolve_test_util.cc | 185 +++++++++++++++++++--- 1 file changed, 164 insertions(+), 21 deletions(-) (limited to 'third_party/aom/test/hiprec_convolve_test_util.cc') diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc index 4dee6ab4d..2672bcec3 100644 --- a/third_party/aom/test/hiprec_convolve_test_util.cc +++ b/third_party/aom/test/hiprec_convolve_test_util.cc @@ -13,8 +13,8 @@ #include "av1/common/restoration.h" -using std::tr1::tuple; -using std::tr1::make_tuple; +using ::testing::make_tuple; +using ::testing::tuple; namespace libaom_test { @@ -52,8 +52,13 @@ namespace AV1HiprecConvolve { ::testing::internal::ParamGenerator BuildParams( hiprec_convolve_func filter) { const HiprecConvolveParam params[] = { - make_tuple(8, 8, 50000, filter), make_tuple(64, 64, 1000, filter), - make_tuple(32, 8, 10000, filter), + make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter), + make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter), + make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter), + make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter), + make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter), + make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter), + make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter) }; return ::testing::ValuesIn(params); } @@ -70,14 +75,15 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); const int num_iters = GET_PARAM(2); int i, j; + const ConvolveParams conv_params = get_conv_params_wiener(8); uint8_t *input_ = new uint8_t[h * w]; uint8_t *input = input_; - // The convolve functions always write rows with widths that are multiples of - // 8. - // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. - int output_n = ((out_w + 7) & ~7) * out_h; + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; uint8_t *output = new uint8_t[output_n]; uint8_t *output2 = new uint8_t[output_n]; @@ -94,10 +100,11 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { // Choose random locations within the source block int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); - aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output, - out_w, hkernel, 16, vkernel, 16, out_w, out_h); + av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output, + out_w, hkernel, 16, vkernel, 16, out_w, out_h, + &conv_params); test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16, - vkernel, 16, out_w, out_h); + vkernel, 16, out_w, out_h, &conv_params); for (j = 0; j < out_w * out_h; ++j) ASSERT_EQ(output[j], output2[j]) @@ -108,9 +115,74 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { delete[] output; delete[] output2; } + +void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2) / 500; + int i, j, k; + const ConvolveParams conv_params = get_conv_params_wiener(8); + + uint8_t *input_ = new uint8_t[h * w]; + uint8_t *input = input_; + + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; + uint8_t *output = new uint8_t[output_n]; + uint8_t *output2 = new uint8_t[output_n]; + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + generate_kernels(&rnd_, hkernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8(); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w, + hkernel, 16, vkernel, 16, out_w, out_h, + &conv_params); + } + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel, + 16, out_w, out_h, &conv_params); + } + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + + delete[] input_; + delete[] output; + delete[] output2; +} } // namespace AV1HiprecConvolve -#if CONFIG_HIGHBITDEPTH namespace AV1HighbdHiprecConvolve { ::testing::internal::ParamGenerator BuildParams( @@ -141,13 +213,14 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput( const int num_iters = GET_PARAM(2); const int bd = GET_PARAM(3); int i, j; + const ConvolveParams conv_params = get_conv_params_wiener(bd); uint16_t *input = new uint16_t[h * w]; - // The convolve functions always write rows with widths that are multiples of - // 8. - // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. - int output_n = ((out_w + 7) & ~7) * out_h; + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; uint16_t *output = new uint16_t[output_n]; uint16_t *output2 = new uint16_t[output_n]; @@ -168,11 +241,11 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput( // Choose random locations within the source block int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); - aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w, - output_ptr, out_w, hkernel, 16, vkernel, - 16, out_w, out_h, bd); + av1_highbd_wiener_convolve_add_src_c( + input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16, + vkernel, 16, out_w, out_h, &conv_params, bd); test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w, - hkernel, 16, vkernel, 16, out_w, out_h, bd); + hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd); for (j = 0; j < out_w * out_h; ++j) ASSERT_EQ(output[j], output2[j]) @@ -183,6 +256,76 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput( delete[] output; delete[] output2; } + +void AV1HighbdHiprecConvolveTest::RunSpeedTest( + highbd_hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2) / 500; + const int bd = GET_PARAM(3); + int i, j, k; + const ConvolveParams conv_params = get_conv_params_wiener(bd); + + uint16_t *input = new uint16_t[h * w]; + + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; + uint16_t *output = new uint16_t[output_n]; + uint16_t *output2 = new uint16_t[output_n]; + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + generate_kernels(&rnd_, hkernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + + uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input); + uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output); + uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + av1_highbd_wiener_convolve_add_src_c( + input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel, + 16, out_w, out_h, &conv_params, bd); + } + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16, + vkernel, 16, out_w, out_h, &conv_params, bd); + } + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + + delete[] input; + delete[] output; + delete[] output2; +} } // namespace AV1HighbdHiprecConvolve -#endif // CONFIG_HIGHBITDEPTH } // namespace libaom_test -- cgit v1.2.3