From bbcc64772580c8a979288791afa02d30bc476d2e Mon Sep 17 00:00:00 2001 From: trav90 Date: Fri, 19 Oct 2018 21:52:15 -0500 Subject: Update aom to v1.0.0 Update aom to commit id d14c5bb4f336ef1842046089849dee4a301fbbf0. --- third_party/aom/test/selfguided_filter_test.cc | 239 +++++++++++++++---------- 1 file changed, 147 insertions(+), 92 deletions(-) (limited to 'third_party/aom/test/selfguided_filter_test.cc') diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc index 55ce1d5de..4506a90db 100644 --- a/third_party/aom/test/selfguided_filter_test.cc +++ b/third_party/aom/test/selfguided_filter_test.cc @@ -13,22 +13,30 @@ #include "third_party/googletest/src/googletest/include/gtest/gtest.h" -#include "./av1_rtcd.h" +#include "config/av1_rtcd.h" + #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" +#include "aom_ports/aom_timer.h" #include "av1/common/mv.h" #include "av1/common/restoration.h" namespace { -using std::tr1::tuple; -using std::tr1::make_tuple; +using ::testing::make_tuple; +using ::testing::tuple; using libaom_test::ACMRandom; -typedef tuple<> FilterTestParam; +typedef void (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride, + int eps, const int *xqd, uint8_t *dst8, int dst_stride, + int32_t *tmpbuf, int bit_depth, int highbd); + +// Test parameter list: +// +typedef tuple FilterTestParam; class AV1SelfguidedFilterTest : public ::testing::TestWithParam { @@ -40,6 +48,7 @@ class AV1SelfguidedFilterTest protected: void RunSpeedTest() { + tst_fun_ = GET_PARAM(0); const int pu_width = RESTORATION_PROC_UNIT_SIZE; const int pu_height = RESTORATION_PROC_UNIT_SIZE; const int width = 256, height = 256, stride = 288, out_stride = 288; @@ -47,10 +56,10 @@ class AV1SelfguidedFilterTest int i, j, k; uint8_t *input_ = - (uint8_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint8_t)); + (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t)); uint8_t *output_ = (uint8_t *)aom_memalign( - 16, out_stride * (height + 32) * sizeof(uint8_t)); - int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + 32, out_stride * (height + 32) * sizeof(uint8_t)); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); uint8_t *input = input_ + stride * 16 + 16; uint8_t *output = output_ + out_stride * 16 + 16; @@ -60,19 +69,18 @@ class AV1SelfguidedFilterTest for (j = -16; j < width + 16; ++j) input[i * stride + j] = rnd.Rand16() & 0xFF; - int xqd[2] = { - SGRPROJ_PRJ_MIN0 + - rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), - SGRPROJ_PRJ_MIN1 + - rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) - }; + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; // Fix a parameter set, since the speed depends slightly on r. // Change this to test different combinations of values of r. int eps = 15; av1_loop_restoration_precal(); - std::clock_t start = std::clock(); + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); for (i = 0; i < NUM_ITERS; ++i) { for (k = 0; k < height; k += pu_height) for (j = 0; j < width; j += pu_width) { @@ -80,15 +88,36 @@ class AV1SelfguidedFilterTest int h = AOMMIN(pu_height, height - k); uint8_t *input_p = input + k * stride + j; uint8_t *output_p = output + k * out_stride + j; - apply_selfguided_restoration(input_p, w, h, stride, eps, xqd, - output_p, out_stride, tmpbuf); + apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd, + output_p, out_stride, tmpbuf, 8, 0); } } - std::clock_t end = std::clock(); - double elapsed = ((end - start) / (double)CLOCKS_PER_SEC); + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); - printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width, - height, elapsed, elapsed * 1000000. / NUM_ITERS); + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride, + tmpbuf, 8, 0); + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; aom_free(input_); aom_free(output_); @@ -96,6 +125,7 @@ class AV1SelfguidedFilterTest } void RunCorrectnessTest() { + tst_fun_ = GET_PARAM(0); const int pu_width = RESTORATION_PROC_UNIT_SIZE; const int pu_height = RESTORATION_PROC_UNIT_SIZE; // Set the maximum width/height to test here. We actually test a small @@ -106,12 +136,12 @@ class AV1SelfguidedFilterTest int i, j, k; uint8_t *input_ = - (uint8_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint8_t)); + (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t)); uint8_t *output_ = (uint8_t *)aom_memalign( - 16, out_stride * (max_h + 32) * sizeof(uint8_t)); + 32, out_stride * (max_h + 32) * sizeof(uint8_t)); uint8_t *output2_ = (uint8_t *)aom_memalign( - 16, out_stride * (max_h + 32) * sizeof(uint8_t)); - int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + 32, out_stride * (max_h + 32) * sizeof(uint8_t)); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); uint8_t *input = input_ + stride * 16 + 16; uint8_t *output = output_ + out_stride * 16 + 16; @@ -126,12 +156,10 @@ class AV1SelfguidedFilterTest for (k = -16; k < max_w + 16; ++k) input[j * stride + k] = rnd.Rand16() & 0xFF; - int xqd[2] = { - SGRPROJ_PRJ_MIN0 + - rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), - SGRPROJ_PRJ_MIN1 + - rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) - }; + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); // Test various tile sizes around 256x256 @@ -145,17 +173,12 @@ class AV1SelfguidedFilterTest uint8_t *input_p = input + k * stride + j; uint8_t *output_p = output + k * out_stride + j; uint8_t *output2_p = output2 + k * out_stride + j; - apply_selfguided_restoration(input_p, w, h, stride, eps, xqd, - output_p, out_stride, tmpbuf); + tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride, + tmpbuf, 8, 0); apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd, - output2_p, out_stride, tmpbuf); + output2_p, out_stride, tmpbuf, 8, 0); } - /* - apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd, - output, out_stride, tmpbuf); - apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd, - output2, out_stride, tmpbuf); - */ + for (j = 0; j < test_h; ++j) for (k = 0; k < test_w; ++k) { ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); @@ -167,20 +190,27 @@ class AV1SelfguidedFilterTest aom_free(output2_); aom_free(tmpbuf); } + + private: + SgrFunc tst_fun_; }; -TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); } +TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } #if HAVE_SSE4_1 -const FilterTestParam params[] = { make_tuple() }; INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest, - ::testing::ValuesIn(params)); + ::testing::Values(apply_selfguided_restoration_sse4_1)); #endif -#if CONFIG_HIGHBITDEPTH +#if HAVE_AVX2 +INSTANTIATE_TEST_CASE_P(AVX2, AV1SelfguidedFilterTest, + ::testing::Values(apply_selfguided_restoration_avx2)); +#endif -typedef tuple HighbdFilterTestParam; +// Test parameter list: +// +typedef tuple HighbdFilterTestParam; class AV1HighbdSelfguidedFilterTest : public ::testing::TestWithParam { @@ -192,19 +222,20 @@ class AV1HighbdSelfguidedFilterTest protected: void RunSpeedTest() { + tst_fun_ = GET_PARAM(0); const int pu_width = RESTORATION_PROC_UNIT_SIZE; const int pu_height = RESTORATION_PROC_UNIT_SIZE; const int width = 256, height = 256, stride = 288, out_stride = 288; const int NUM_ITERS = 2000; int i, j, k; - int bit_depth = GET_PARAM(0); + int bit_depth = GET_PARAM(1); int mask = (1 << bit_depth) - 1; uint16_t *input_ = - (uint16_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint16_t)); + (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t)); uint16_t *output_ = (uint16_t *)aom_memalign( - 16, out_stride * (height + 32) * sizeof(uint16_t)); - int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + 32, out_stride * (height + 32) * sizeof(uint16_t)); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); uint16_t *input = input_ + stride * 16 + 16; uint16_t *output = output_ + out_stride * 16 + 16; @@ -214,19 +245,18 @@ class AV1HighbdSelfguidedFilterTest for (j = -16; j < width + 16; ++j) input[i * stride + j] = rnd.Rand16() & mask; - int xqd[2] = { - SGRPROJ_PRJ_MIN0 + - rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), - SGRPROJ_PRJ_MIN1 + - rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) - }; + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; // Fix a parameter set, since the speed depends slightly on r. // Change this to test different combinations of values of r. int eps = 15; av1_loop_restoration_precal(); - std::clock_t start = std::clock(); + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); for (i = 0; i < NUM_ITERS; ++i) { for (k = 0; k < height; k += pu_height) for (j = 0; j < width; j += pu_width) { @@ -234,16 +264,39 @@ class AV1HighbdSelfguidedFilterTest int h = AOMMIN(pu_height, height - k); uint16_t *input_p = input + k * stride + j; uint16_t *output_p = output + k * out_stride + j; - apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth, - eps, xqd, output_p, out_stride, - tmpbuf); + apply_selfguided_restoration_c( + CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1); } } - std::clock_t end = std::clock(); - double elapsed = ((end - start) / (double)CLOCKS_PER_SEC); + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); - printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width, - height, elapsed, elapsed * 1000000. / NUM_ITERS); + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, + 1); + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than " + "C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; aom_free(input_); aom_free(output_); @@ -251,6 +304,7 @@ class AV1HighbdSelfguidedFilterTest } void RunCorrectnessTest() { + tst_fun_ = GET_PARAM(0); const int pu_width = RESTORATION_PROC_UNIT_SIZE; const int pu_height = RESTORATION_PROC_UNIT_SIZE; // Set the maximum width/height to test here. We actually test a small @@ -259,16 +313,16 @@ class AV1HighbdSelfguidedFilterTest const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; const int NUM_ITERS = 81; int i, j, k; - int bit_depth = GET_PARAM(0); + int bit_depth = GET_PARAM(1); int mask = (1 << bit_depth) - 1; uint16_t *input_ = - (uint16_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint16_t)); + (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t)); uint16_t *output_ = (uint16_t *)aom_memalign( - 16, out_stride * (max_h + 32) * sizeof(uint16_t)); + 32, out_stride * (max_h + 32) * sizeof(uint16_t)); uint16_t *output2_ = (uint16_t *)aom_memalign( - 16, out_stride * (max_h + 32) * sizeof(uint16_t)); - int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + 32, out_stride * (max_h + 32) * sizeof(uint16_t)); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); uint16_t *input = input_ + stride * 16 + 16; uint16_t *output = output_ + out_stride * 16 + 16; @@ -283,12 +337,10 @@ class AV1HighbdSelfguidedFilterTest for (k = -16; k < max_w + 16; ++k) input[j * stride + k] = rnd.Rand16() & mask; - int xqd[2] = { - SGRPROJ_PRJ_MIN0 + - rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), - SGRPROJ_PRJ_MIN1 + - rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) - }; + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); // Test various tile sizes around 256x256 @@ -302,22 +354,14 @@ class AV1HighbdSelfguidedFilterTest uint16_t *input_p = input + k * stride + j; uint16_t *output_p = output + k * out_stride + j; uint16_t *output2_p = output2 + k * out_stride + j; - apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth, - eps, xqd, output_p, out_stride, - tmpbuf); - apply_selfguided_restoration_highbd_c(input_p, w, h, stride, - bit_depth, eps, xqd, output2_p, - out_stride, tmpbuf); + tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, + 1); + apply_selfguided_restoration_c( + CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1); } - /* - apply_selfguided_restoration_highbd(input, test_w, test_h, stride, - bit_depth, eps, xqd, output, - out_stride, tmpbuf); - apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride, - bit_depth, eps, xqd, output2, - out_stride, tmpbuf); - */ for (j = 0; j < test_h; ++j) for (k = 0; k < test_w; ++k) ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); @@ -328,17 +372,28 @@ class AV1HighbdSelfguidedFilterTest aom_free(output2_); aom_free(tmpbuf); } + + private: + SgrFunc tst_fun_; }; -TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); } +TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } #if HAVE_SSE4_1 -const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10), - make_tuple(12) }; -INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest, - ::testing::ValuesIn(highbd_params)); +const int highbd_params_sse4_1[] = { 8, 10, 12 }; +INSTANTIATE_TEST_CASE_P( + SSE4_1, AV1HighbdSelfguidedFilterTest, + ::testing::Combine(::testing::Values(apply_selfguided_restoration_sse4_1), + ::testing::ValuesIn(highbd_params_sse4_1))); #endif + +#if HAVE_AVX2 +const int highbd_params_avx2[] = { 8, 10, 12 }; +INSTANTIATE_TEST_CASE_P( + AVX2, AV1HighbdSelfguidedFilterTest, + ::testing::Combine(::testing::Values(apply_selfguided_restoration_avx2), + ::testing::ValuesIn(highbd_params_avx2))); #endif } // namespace -- cgit v1.2.3