diff options
Diffstat (limited to 'third_party/aom/test/hiprec_convolve_test_util.cc')
-rw-r--r-- | third_party/aom/test/hiprec_convolve_test_util.cc | 185 |
1 files changed, 164 insertions, 21 deletions
diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc index 4dee6ab4d..2672bcec3 100644 --- a/third_party/aom/test/hiprec_convolve_test_util.cc +++ b/third_party/aom/test/hiprec_convolve_test_util.cc @@ -13,8 +13,8 @@ #include "av1/common/restoration.h" -using std::tr1::tuple; -using std::tr1::make_tuple; +using ::testing::make_tuple; +using ::testing::tuple; namespace libaom_test { @@ -52,8 +52,13 @@ namespace AV1HiprecConvolve { ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams( hiprec_convolve_func filter) { const HiprecConvolveParam params[] = { - make_tuple(8, 8, 50000, filter), make_tuple(64, 64, 1000, filter), - make_tuple(32, 8, 10000, filter), + make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter), + make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter), + make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter), + make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter), + make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter), + make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter), + make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter) }; return ::testing::ValuesIn(params); } @@ -70,14 +75,15 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); const int num_iters = GET_PARAM(2); int i, j; + const ConvolveParams conv_params = get_conv_params_wiener(8); uint8_t *input_ = new uint8_t[h * w]; uint8_t *input = input_; - // The convolve functions always write rows with widths that are multiples of - // 8. - // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. - int output_n = ((out_w + 7) & ~7) * out_h; + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; uint8_t *output = new uint8_t[output_n]; uint8_t *output2 = new uint8_t[output_n]; @@ -94,10 +100,11 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { // Choose random locations within the source block int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); - aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output, - out_w, hkernel, 16, vkernel, 16, out_w, out_h); + av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output, + out_w, hkernel, 16, vkernel, 16, out_w, out_h, + &conv_params); test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16, - vkernel, 16, out_w, out_h); + vkernel, 16, out_w, out_h, &conv_params); for (j = 0; j < out_w * out_h; ++j) ASSERT_EQ(output[j], output2[j]) @@ -108,9 +115,74 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { delete[] output; delete[] output2; } + +void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2) / 500; + int i, j, k; + const ConvolveParams conv_params = get_conv_params_wiener(8); + + uint8_t *input_ = new uint8_t[h * w]; + uint8_t *input = input_; + + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; + uint8_t *output = new uint8_t[output_n]; + uint8_t *output2 = new uint8_t[output_n]; + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + generate_kernels(&rnd_, hkernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8(); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w, + hkernel, 16, vkernel, 16, out_w, out_h, + &conv_params); + } + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel, + 16, out_w, out_h, &conv_params); + } + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + + delete[] input_; + delete[] output; + delete[] output2; +} } // namespace AV1HiprecConvolve -#if CONFIG_HIGHBITDEPTH namespace AV1HighbdHiprecConvolve { ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams( @@ -141,13 +213,14 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput( const int num_iters = GET_PARAM(2); const int bd = GET_PARAM(3); int i, j; + const ConvolveParams conv_params = get_conv_params_wiener(bd); uint16_t *input = new uint16_t[h * w]; - // The convolve functions always write rows with widths that are multiples of - // 8. - // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. - int output_n = ((out_w + 7) & ~7) * out_h; + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; uint16_t *output = new uint16_t[output_n]; uint16_t *output2 = new uint16_t[output_n]; @@ -168,11 +241,11 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput( // Choose random locations within the source block int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); - aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w, - output_ptr, out_w, hkernel, 16, vkernel, - 16, out_w, out_h, bd); + av1_highbd_wiener_convolve_add_src_c( + input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16, + vkernel, 16, out_w, out_h, &conv_params, bd); test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w, - hkernel, 16, vkernel, 16, out_w, out_h, bd); + hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd); for (j = 0; j < out_w * out_h; ++j) ASSERT_EQ(output[j], output2[j]) @@ -183,6 +256,76 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput( delete[] output; delete[] output2; } + +void AV1HighbdHiprecConvolveTest::RunSpeedTest( + highbd_hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2) / 500; + const int bd = GET_PARAM(3); + int i, j, k; + const ConvolveParams conv_params = get_conv_params_wiener(bd); + + uint16_t *input = new uint16_t[h * w]; + + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; + uint16_t *output = new uint16_t[output_n]; + uint16_t *output2 = new uint16_t[output_n]; + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + generate_kernels(&rnd_, hkernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + + uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input); + uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output); + uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + av1_highbd_wiener_convolve_add_src_c( + input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel, + 16, out_w, out_h, &conv_params, bd); + } + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16, + vkernel, 16, out_w, out_h, &conv_params, bd); + } + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + + delete[] input; + delete[] output; + delete[] output2; +} } // namespace AV1HighbdHiprecConvolve -#endif // CONFIG_HIGHBITDEPTH } // namespace libaom_test |