summaryrefslogtreecommitdiffstats
path: root/third_party/aom/test/hiprec_convolve_test_util.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/test/hiprec_convolve_test_util.cc')
-rw-r--r--third_party/aom/test/hiprec_convolve_test_util.cc185
1 files changed, 164 insertions, 21 deletions
diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
index 4dee6ab4d..2672bcec3 100644
--- a/third_party/aom/test/hiprec_convolve_test_util.cc
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -13,8 +13,8 @@
#include "av1/common/restoration.h"
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
namespace libaom_test {
@@ -52,8 +52,13 @@ namespace AV1HiprecConvolve {
::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
hiprec_convolve_func filter) {
const HiprecConvolveParam params[] = {
- make_tuple(8, 8, 50000, filter), make_tuple(64, 64, 1000, filter),
- make_tuple(32, 8, 10000, filter),
+ make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter),
+ make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter),
+ make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter),
+ make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
+ make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
+ make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter),
+ make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
};
return ::testing::ValuesIn(params);
}
@@ -70,14 +75,15 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
const int num_iters = GET_PARAM(2);
int i, j;
+ const ConvolveParams conv_params = get_conv_params_wiener(8);
uint8_t *input_ = new uint8_t[h * w];
uint8_t *input = input_;
- // The convolve functions always write rows with widths that are multiples of
- // 8.
- // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
- int output_n = ((out_w + 7) & ~7) * out_h;
+ // The AVX2 convolve functions always write rows with widths that are
+ // multiples of 16. So to avoid a buffer overflow, we may need to pad
+ // rows to a multiple of 16.
+ int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
uint8_t *output = new uint8_t[output_n];
uint8_t *output2 = new uint8_t[output_n];
@@ -94,10 +100,11 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output,
- out_w, hkernel, 16, vkernel, 16, out_w, out_h);
+ av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output,
+ out_w, hkernel, 16, vkernel, 16, out_w, out_h,
+ &conv_params);
test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
- vkernel, 16, out_w, out_h);
+ vkernel, 16, out_w, out_h, &conv_params);
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
@@ -108,9 +115,74 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
delete[] output;
delete[] output2;
}
+
+void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
+ const int w = 128, h = 128;
+ const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+ const int num_iters = GET_PARAM(2) / 500;
+ int i, j, k;
+ const ConvolveParams conv_params = get_conv_params_wiener(8);
+
+ uint8_t *input_ = new uint8_t[h * w];
+ uint8_t *input = input_;
+
+ // The AVX2 convolve functions always write rows with widths that are
+ // multiples of 16. So to avoid a buffer overflow, we may need to pad
+ // rows to a multiple of 16.
+ int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+ uint8_t *output = new uint8_t[output_n];
+ uint8_t *output2 = new uint8_t[output_n];
+
+ // Generate random filter kernels
+ DECLARE_ALIGNED(16, InterpKernel, hkernel);
+ DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+ generate_kernels(&rnd_, hkernel, vkernel);
+
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+
+ aom_usec_timer ref_timer;
+ aom_usec_timer_start(&ref_timer);
+ for (i = 0; i < num_iters; ++i) {
+ for (j = 3; j < h - out_h - 4; j++) {
+ for (k = 3; k < w - out_w - 4; k++) {
+ av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w,
+ hkernel, 16, vkernel, 16, out_w, out_h,
+ &conv_params);
+ }
+ }
+ }
+ aom_usec_timer_mark(&ref_timer);
+ const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+ aom_usec_timer tst_timer;
+ aom_usec_timer_start(&tst_timer);
+ for (i = 0; i < num_iters; ++i) {
+ for (j = 3; j < h - out_h - 4; j++) {
+ for (k = 3; k < w - out_w - 4; k++) {
+ test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel,
+ 16, out_w, out_h, &conv_params);
+ }
+ }
+ }
+ aom_usec_timer_mark(&tst_timer);
+ const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+ std::cout << "[ ] C time = " << ref_time / 1000
+ << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+ EXPECT_GT(ref_time, tst_time)
+ << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
+ << "C time: " << ref_time << " us\n"
+ << "SIMD time: " << tst_time << " us\n";
+
+ delete[] input_;
+ delete[] output;
+ delete[] output2;
+}
} // namespace AV1HiprecConvolve
-#if CONFIG_HIGHBITDEPTH
namespace AV1HighbdHiprecConvolve {
::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
@@ -141,13 +213,14 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
const int num_iters = GET_PARAM(2);
const int bd = GET_PARAM(3);
int i, j;
+ const ConvolveParams conv_params = get_conv_params_wiener(bd);
uint16_t *input = new uint16_t[h * w];
- // The convolve functions always write rows with widths that are multiples of
- // 8.
- // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
- int output_n = ((out_w + 7) & ~7) * out_h;
+ // The AVX2 convolve functions always write rows with widths that are
+ // multiples of 16. So to avoid a buffer overflow, we may need to pad
+ // rows to a multiple of 16.
+ int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
uint16_t *output = new uint16_t[output_n];
uint16_t *output2 = new uint16_t[output_n];
@@ -168,11 +241,11 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w,
- output_ptr, out_w, hkernel, 16, vkernel,
- 16, out_w, out_h, bd);
+ av1_highbd_wiener_convolve_add_src_c(
+ input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16,
+ vkernel, 16, out_w, out_h, &conv_params, bd);
test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
- hkernel, 16, vkernel, 16, out_w, out_h, bd);
+ hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
@@ -183,6 +256,76 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
delete[] output;
delete[] output2;
}
+
+void AV1HighbdHiprecConvolveTest::RunSpeedTest(
+ highbd_hiprec_convolve_func test_impl) {
+ const int w = 128, h = 128;
+ const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+ const int num_iters = GET_PARAM(2) / 500;
+ const int bd = GET_PARAM(3);
+ int i, j, k;
+ const ConvolveParams conv_params = get_conv_params_wiener(bd);
+
+ uint16_t *input = new uint16_t[h * w];
+
+ // The AVX2 convolve functions always write rows with widths that are
+ // multiples of 16. So to avoid a buffer overflow, we may need to pad
+ // rows to a multiple of 16.
+ int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+ uint16_t *output = new uint16_t[output_n];
+ uint16_t *output2 = new uint16_t[output_n];
+
+ // Generate random filter kernels
+ DECLARE_ALIGNED(16, InterpKernel, hkernel);
+ DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+ generate_kernels(&rnd_, hkernel, vkernel);
+
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+
+ uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
+ uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
+ uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
+
+ aom_usec_timer ref_timer;
+ aom_usec_timer_start(&ref_timer);
+ for (i = 0; i < num_iters; ++i) {
+ for (j = 3; j < h - out_h - 4; j++) {
+ for (k = 3; k < w - out_w - 4; k++) {
+ av1_highbd_wiener_convolve_add_src_c(
+ input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
+ 16, out_w, out_h, &conv_params, bd);
+ }
+ }
+ }
+ aom_usec_timer_mark(&ref_timer);
+ const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+ aom_usec_timer tst_timer;
+ aom_usec_timer_start(&tst_timer);
+ for (i = 0; i < num_iters; ++i) {
+ for (j = 3; j < h - out_h - 4; j++) {
+ for (k = 3; k < w - out_w - 4; k++) {
+ test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
+ vkernel, 16, out_w, out_h, &conv_params, bd);
+ }
+ }
+ }
+ aom_usec_timer_mark(&tst_timer);
+ const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+ std::cout << "[ ] C time = " << ref_time / 1000
+ << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+ EXPECT_GT(ref_time, tst_time)
+ << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
+ << "C time: " << ref_time << " us\n"
+ << "SIMD time: " << tst_time << " us\n";
+
+ delete[] input;
+ delete[] output;
+ delete[] output2;
+}
} // namespace AV1HighbdHiprecConvolve
-#endif // CONFIG_HIGHBITDEPTH
} // namespace libaom_test