diff options
Diffstat (limited to 'third_party/aom/test/selfguided_filter_test.cc')
-rw-r--r-- | third_party/aom/test/selfguided_filter_test.cc | 174 |
1 files changed, 124 insertions, 50 deletions
diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc index 736e3f4c0..55ce1d5de 100644 --- a/third_party/aom/test/selfguided_filter_test.cc +++ b/third_party/aom/test/selfguided_filter_test.cc @@ -40,18 +40,25 @@ class AV1SelfguidedFilterTest protected: void RunSpeedTest() { - const int w = 256, h = 256; + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + const int width = 256, height = 256, stride = 288, out_stride = 288; const int NUM_ITERS = 2000; - int i, j; + int i, j, k; - uint8_t *input = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t)); - uint8_t *output = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t)); + uint8_t *input_ = + (uint8_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint8_t)); + uint8_t *output_ = (uint8_t *)aom_memalign( + 16, out_stride * (height + 32) * sizeof(uint8_t)); int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + uint8_t *input = input_ + stride * 16 + 16; + uint8_t *output = output_ + out_stride * 16 + 16; ACMRandom rnd(ACMRandom::DeterministicSeed()); - for (i = 0; i < h; ++i) - for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & 0xFF; + for (i = -16; i < height + 16; ++i) + for (j = -16; j < width + 16; ++j) + input[i * stride + j] = rnd.Rand16() & 0xFF; int xqd[2] = { SGRPROJ_PRJ_MIN0 + @@ -67,20 +74,30 @@ class AV1SelfguidedFilterTest std::clock_t start = std::clock(); for (i = 0; i < NUM_ITERS; ++i) { - apply_selfguided_restoration(input, w, h, w, eps, xqd, output, w, tmpbuf); + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + apply_selfguided_restoration(input_p, w, h, stride, eps, xqd, + output_p, out_stride, tmpbuf); + } } std::clock_t end = std::clock(); double elapsed = ((end - start) / (double)CLOCKS_PER_SEC); - printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h, - elapsed, elapsed * 1000000. / NUM_ITERS); + printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width, + height, elapsed, elapsed * 1000000. / NUM_ITERS); - aom_free(input); - aom_free(output); + aom_free(input_); + aom_free(output_); aom_free(tmpbuf); } void RunCorrectnessTest() { + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; // Set the maximum width/height to test here. We actually test a small // range of sizes *up to* this size, so that we can check, eg., // the behaviour on tiles which are not a multiple of 4 wide. @@ -88,21 +105,26 @@ class AV1SelfguidedFilterTest const int NUM_ITERS = 81; int i, j, k; - uint8_t *input = - (uint8_t *)aom_memalign(16, stride * max_h * sizeof(uint8_t)); - uint8_t *output = - (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t)); - uint8_t *output2 = - (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t)); + uint8_t *input_ = + (uint8_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint8_t)); + uint8_t *output_ = (uint8_t *)aom_memalign( + 16, out_stride * (max_h + 32) * sizeof(uint8_t)); + uint8_t *output2_ = (uint8_t *)aom_memalign( + 16, out_stride * (max_h + 32) * sizeof(uint8_t)); int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + uint8_t *input = input_ + stride * 16 + 16; + uint8_t *output = output_ + out_stride * 16 + 16; + uint8_t *output2 = output2_ + out_stride * 16 + 16; + ACMRandom rnd(ACMRandom::DeterministicSeed()); av1_loop_restoration_precal(); for (i = 0; i < NUM_ITERS; ++i) { - for (j = 0; j < max_h; ++j) - for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & 0xFF; + for (j = -16; j < max_h + 16; ++j) + for (k = -16; k < max_w + 16; ++k) + input[j * stride + k] = rnd.Rand16() & 0xFF; int xqd[2] = { SGRPROJ_PRJ_MIN0 + @@ -116,18 +138,33 @@ class AV1SelfguidedFilterTest int test_w = max_w - (i / 9); int test_h = max_h - (i % 9); + for (k = 0; k < test_h; k += pu_height) + for (j = 0; j < test_w; j += pu_width) { + int w = AOMMIN(pu_width, test_w - j); + int h = AOMMIN(pu_height, test_h - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + uint8_t *output2_p = output2 + k * out_stride + j; + apply_selfguided_restoration(input_p, w, h, stride, eps, xqd, + output_p, out_stride, tmpbuf); + apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd, + output2_p, out_stride, tmpbuf); + } + /* apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd, output, out_stride, tmpbuf); apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd, output2, out_stride, tmpbuf); + */ for (j = 0; j < test_h; ++j) - for (k = 0; k < test_w; ++k) + for (k = 0; k < test_w; ++k) { ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); + } } - aom_free(input); - aom_free(output); - aom_free(output2); + aom_free(input_); + aom_free(output_); + aom_free(output2_); aom_free(tmpbuf); } }; @@ -135,9 +172,8 @@ class AV1SelfguidedFilterTest TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); } TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } -const FilterTestParam params[] = { make_tuple() }; - #if HAVE_SSE4_1 +const FilterTestParam params[] = { make_tuple() }; INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest, ::testing::ValuesIn(params)); #endif @@ -156,20 +192,27 @@ class AV1HighbdSelfguidedFilterTest protected: void RunSpeedTest() { - const int w = 256, h = 256; + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + const int width = 256, height = 256, stride = 288, out_stride = 288; const int NUM_ITERS = 2000; - int i, j; + int i, j, k; int bit_depth = GET_PARAM(0); int mask = (1 << bit_depth) - 1; - uint16_t *input = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t)); - uint16_t *output = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t)); + uint16_t *input_ = + (uint16_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint16_t)); + uint16_t *output_ = (uint16_t *)aom_memalign( + 16, out_stride * (height + 32) * sizeof(uint16_t)); int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + uint16_t *input = input_ + stride * 16 + 16; + uint16_t *output = output_ + out_stride * 16 + 16; ACMRandom rnd(ACMRandom::DeterministicSeed()); - for (i = 0; i < h; ++i) - for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & mask; + for (i = -16; i < height + 16; ++i) + for (j = -16; j < width + 16; ++j) + input[i * stride + j] = rnd.Rand16() & mask; int xqd[2] = { SGRPROJ_PRJ_MIN0 + @@ -185,21 +228,31 @@ class AV1HighbdSelfguidedFilterTest std::clock_t start = std::clock(); for (i = 0; i < NUM_ITERS; ++i) { - apply_selfguided_restoration_highbd(input, w, h, w, bit_depth, eps, xqd, - output, w, tmpbuf); + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth, + eps, xqd, output_p, out_stride, + tmpbuf); + } } std::clock_t end = std::clock(); double elapsed = ((end - start) / (double)CLOCKS_PER_SEC); - printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h, - elapsed, elapsed * 1000000. / NUM_ITERS); + printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width, + height, elapsed, elapsed * 1000000. / NUM_ITERS); - aom_free(input); - aom_free(output); + aom_free(input_); + aom_free(output_); aom_free(tmpbuf); } void RunCorrectnessTest() { + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; // Set the maximum width/height to test here. We actually test a small // range of sizes *up to* this size, so that we can check, eg., // the behaviour on tiles which are not a multiple of 4 wide. @@ -209,21 +262,26 @@ class AV1HighbdSelfguidedFilterTest int bit_depth = GET_PARAM(0); int mask = (1 << bit_depth) - 1; - uint16_t *input = - (uint16_t *)aom_memalign(16, stride * max_h * sizeof(uint16_t)); - uint16_t *output = - (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t)); - uint16_t *output2 = - (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t)); + uint16_t *input_ = + (uint16_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint16_t)); + uint16_t *output_ = (uint16_t *)aom_memalign( + 16, out_stride * (max_h + 32) * sizeof(uint16_t)); + uint16_t *output2_ = (uint16_t *)aom_memalign( + 16, out_stride * (max_h + 32) * sizeof(uint16_t)); int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + uint16_t *input = input_ + stride * 16 + 16; + uint16_t *output = output_ + out_stride * 16 + 16; + uint16_t *output2 = output2_ + out_stride * 16 + 16; + ACMRandom rnd(ACMRandom::DeterministicSeed()); av1_loop_restoration_precal(); for (i = 0; i < NUM_ITERS; ++i) { - for (j = 0; j < max_h; ++j) - for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & mask; + for (j = -16; j < max_h + 16; ++j) + for (k = -16; k < max_w + 16; ++k) + input[j * stride + k] = rnd.Rand16() & mask; int xqd[2] = { SGRPROJ_PRJ_MIN0 + @@ -237,20 +295,37 @@ class AV1HighbdSelfguidedFilterTest int test_w = max_w - (i / 9); int test_h = max_h - (i % 9); + for (k = 0; k < test_h; k += pu_height) + for (j = 0; j < test_w; j += pu_width) { + int w = AOMMIN(pu_width, test_w - j); + int h = AOMMIN(pu_height, test_h - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + uint16_t *output2_p = output2 + k * out_stride + j; + apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth, + eps, xqd, output_p, out_stride, + tmpbuf); + apply_selfguided_restoration_highbd_c(input_p, w, h, stride, + bit_depth, eps, xqd, output2_p, + out_stride, tmpbuf); + } + + /* apply_selfguided_restoration_highbd(input, test_w, test_h, stride, bit_depth, eps, xqd, output, out_stride, tmpbuf); apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride, bit_depth, eps, xqd, output2, out_stride, tmpbuf); + */ for (j = 0; j < test_h; ++j) for (k = 0; k < test_w; ++k) ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); } - aom_free(input); - aom_free(output); - aom_free(output2); + aom_free(input_); + aom_free(output_); + aom_free(output2_); aom_free(tmpbuf); } }; @@ -258,10 +333,9 @@ class AV1HighbdSelfguidedFilterTest TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); } TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } +#if HAVE_SSE4_1 const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10), make_tuple(12) }; - -#if HAVE_SSE4_1 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest, ::testing::ValuesIn(highbd_params)); #endif |