summaryrefslogtreecommitdiffstats
path: root/third_party/aom/test/selfguided_filter_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/test/selfguided_filter_test.cc')
-rw-r--r--third_party/aom/test/selfguided_filter_test.cc239
1 files changed, 147 insertions, 92 deletions
diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc
index 55ce1d5de..4506a90db 100644
--- a/third_party/aom/test/selfguided_filter_test.cc
+++ b/third_party/aom/test/selfguided_filter_test.cc
@@ -13,22 +13,30 @@
#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "./av1_rtcd.h"
+#include "config/av1_rtcd.h"
+
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
+#include "aom_ports/aom_timer.h"
#include "av1/common/mv.h"
#include "av1/common/restoration.h"
namespace {
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
using libaom_test::ACMRandom;
-typedef tuple<> FilterTestParam;
+typedef void (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride,
+ int eps, const int *xqd, uint8_t *dst8, int dst_stride,
+ int32_t *tmpbuf, int bit_depth, int highbd);
+
+// Test parameter list:
+// <tst_fun_>
+typedef tuple<SgrFunc> FilterTestParam;
class AV1SelfguidedFilterTest
: public ::testing::TestWithParam<FilterTestParam> {
@@ -40,6 +48,7 @@ class AV1SelfguidedFilterTest
protected:
void RunSpeedTest() {
+ tst_fun_ = GET_PARAM(0);
const int pu_width = RESTORATION_PROC_UNIT_SIZE;
const int pu_height = RESTORATION_PROC_UNIT_SIZE;
const int width = 256, height = 256, stride = 288, out_stride = 288;
@@ -47,10 +56,10 @@ class AV1SelfguidedFilterTest
int i, j, k;
uint8_t *input_ =
- (uint8_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint8_t));
+ (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t));
uint8_t *output_ = (uint8_t *)aom_memalign(
- 16, out_stride * (height + 32) * sizeof(uint8_t));
- int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+ 32, out_stride * (height + 32) * sizeof(uint8_t));
+ int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
uint8_t *input = input_ + stride * 16 + 16;
uint8_t *output = output_ + out_stride * 16 + 16;
@@ -60,19 +69,18 @@ class AV1SelfguidedFilterTest
for (j = -16; j < width + 16; ++j)
input[i * stride + j] = rnd.Rand16() & 0xFF;
- int xqd[2] = {
- SGRPROJ_PRJ_MIN0 +
- rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
- SGRPROJ_PRJ_MIN1 +
- rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
- };
+ int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+ SGRPROJ_PRJ_MIN0),
+ SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+ SGRPROJ_PRJ_MIN1) };
// Fix a parameter set, since the speed depends slightly on r.
// Change this to test different combinations of values of r.
int eps = 15;
av1_loop_restoration_precal();
- std::clock_t start = std::clock();
+ aom_usec_timer ref_timer;
+ aom_usec_timer_start(&ref_timer);
for (i = 0; i < NUM_ITERS; ++i) {
for (k = 0; k < height; k += pu_height)
for (j = 0; j < width; j += pu_width) {
@@ -80,15 +88,36 @@ class AV1SelfguidedFilterTest
int h = AOMMIN(pu_height, height - k);
uint8_t *input_p = input + k * stride + j;
uint8_t *output_p = output + k * out_stride + j;
- apply_selfguided_restoration(input_p, w, h, stride, eps, xqd,
- output_p, out_stride, tmpbuf);
+ apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
+ output_p, out_stride, tmpbuf, 8, 0);
}
}
- std::clock_t end = std::clock();
- double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
+ aom_usec_timer_mark(&ref_timer);
+ const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
- printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width,
- height, elapsed, elapsed * 1000000. / NUM_ITERS);
+ aom_usec_timer tst_timer;
+ aom_usec_timer_start(&tst_timer);
+ for (i = 0; i < NUM_ITERS; ++i) {
+ for (k = 0; k < height; k += pu_height)
+ for (j = 0; j < width; j += pu_width) {
+ int w = AOMMIN(pu_width, width - j);
+ int h = AOMMIN(pu_height, height - k);
+ uint8_t *input_p = input + k * stride + j;
+ uint8_t *output_p = output + k * out_stride + j;
+ tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
+ tmpbuf, 8, 0);
+ }
+ }
+ aom_usec_timer_mark(&tst_timer);
+ const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+ std::cout << "[ ] C time = " << ref_time / 1000
+ << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+ EXPECT_GT(ref_time, tst_time)
+ << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n"
+ << "C time: " << ref_time << " us\n"
+ << "SIMD time: " << tst_time << " us\n";
aom_free(input_);
aom_free(output_);
@@ -96,6 +125,7 @@ class AV1SelfguidedFilterTest
}
void RunCorrectnessTest() {
+ tst_fun_ = GET_PARAM(0);
const int pu_width = RESTORATION_PROC_UNIT_SIZE;
const int pu_height = RESTORATION_PROC_UNIT_SIZE;
// Set the maximum width/height to test here. We actually test a small
@@ -106,12 +136,12 @@ class AV1SelfguidedFilterTest
int i, j, k;
uint8_t *input_ =
- (uint8_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint8_t));
+ (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t));
uint8_t *output_ = (uint8_t *)aom_memalign(
- 16, out_stride * (max_h + 32) * sizeof(uint8_t));
+ 32, out_stride * (max_h + 32) * sizeof(uint8_t));
uint8_t *output2_ = (uint8_t *)aom_memalign(
- 16, out_stride * (max_h + 32) * sizeof(uint8_t));
- int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+ 32, out_stride * (max_h + 32) * sizeof(uint8_t));
+ int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
uint8_t *input = input_ + stride * 16 + 16;
uint8_t *output = output_ + out_stride * 16 + 16;
@@ -126,12 +156,10 @@ class AV1SelfguidedFilterTest
for (k = -16; k < max_w + 16; ++k)
input[j * stride + k] = rnd.Rand16() & 0xFF;
- int xqd[2] = {
- SGRPROJ_PRJ_MIN0 +
- rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
- SGRPROJ_PRJ_MIN1 +
- rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
- };
+ int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+ SGRPROJ_PRJ_MIN0),
+ SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+ SGRPROJ_PRJ_MIN1) };
int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
// Test various tile sizes around 256x256
@@ -145,17 +173,12 @@ class AV1SelfguidedFilterTest
uint8_t *input_p = input + k * stride + j;
uint8_t *output_p = output + k * out_stride + j;
uint8_t *output2_p = output2 + k * out_stride + j;
- apply_selfguided_restoration(input_p, w, h, stride, eps, xqd,
- output_p, out_stride, tmpbuf);
+ tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
+ tmpbuf, 8, 0);
apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
- output2_p, out_stride, tmpbuf);
+ output2_p, out_stride, tmpbuf, 8, 0);
}
- /*
- apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd,
- output, out_stride, tmpbuf);
- apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd,
- output2, out_stride, tmpbuf);
- */
+
for (j = 0; j < test_h; ++j)
for (k = 0; k < test_w; ++k) {
ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
@@ -167,20 +190,27 @@ class AV1SelfguidedFilterTest
aom_free(output2_);
aom_free(tmpbuf);
}
+
+ private:
+ SgrFunc tst_fun_;
};
-TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
+TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
#if HAVE_SSE4_1
-const FilterTestParam params[] = { make_tuple() };
INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest,
- ::testing::ValuesIn(params));
+ ::testing::Values(apply_selfguided_restoration_sse4_1));
#endif
-#if CONFIG_HIGHBITDEPTH
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2, AV1SelfguidedFilterTest,
+ ::testing::Values(apply_selfguided_restoration_avx2));
+#endif
-typedef tuple<int> HighbdFilterTestParam;
+// Test parameter list:
+// <tst_fun_, bit_depth>
+typedef tuple<SgrFunc, int> HighbdFilterTestParam;
class AV1HighbdSelfguidedFilterTest
: public ::testing::TestWithParam<HighbdFilterTestParam> {
@@ -192,19 +222,20 @@ class AV1HighbdSelfguidedFilterTest
protected:
void RunSpeedTest() {
+ tst_fun_ = GET_PARAM(0);
const int pu_width = RESTORATION_PROC_UNIT_SIZE;
const int pu_height = RESTORATION_PROC_UNIT_SIZE;
const int width = 256, height = 256, stride = 288, out_stride = 288;
const int NUM_ITERS = 2000;
int i, j, k;
- int bit_depth = GET_PARAM(0);
+ int bit_depth = GET_PARAM(1);
int mask = (1 << bit_depth) - 1;
uint16_t *input_ =
- (uint16_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint16_t));
+ (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t));
uint16_t *output_ = (uint16_t *)aom_memalign(
- 16, out_stride * (height + 32) * sizeof(uint16_t));
- int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+ 32, out_stride * (height + 32) * sizeof(uint16_t));
+ int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
uint16_t *input = input_ + stride * 16 + 16;
uint16_t *output = output_ + out_stride * 16 + 16;
@@ -214,19 +245,18 @@ class AV1HighbdSelfguidedFilterTest
for (j = -16; j < width + 16; ++j)
input[i * stride + j] = rnd.Rand16() & mask;
- int xqd[2] = {
- SGRPROJ_PRJ_MIN0 +
- rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
- SGRPROJ_PRJ_MIN1 +
- rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
- };
+ int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+ SGRPROJ_PRJ_MIN0),
+ SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+ SGRPROJ_PRJ_MIN1) };
// Fix a parameter set, since the speed depends slightly on r.
// Change this to test different combinations of values of r.
int eps = 15;
av1_loop_restoration_precal();
- std::clock_t start = std::clock();
+ aom_usec_timer ref_timer;
+ aom_usec_timer_start(&ref_timer);
for (i = 0; i < NUM_ITERS; ++i) {
for (k = 0; k < height; k += pu_height)
for (j = 0; j < width; j += pu_width) {
@@ -234,16 +264,39 @@ class AV1HighbdSelfguidedFilterTest
int h = AOMMIN(pu_height, height - k);
uint16_t *input_p = input + k * stride + j;
uint16_t *output_p = output + k * out_stride + j;
- apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth,
- eps, xqd, output_p, out_stride,
- tmpbuf);
+ apply_selfguided_restoration_c(
+ CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+ CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1);
}
}
- std::clock_t end = std::clock();
- double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
+ aom_usec_timer_mark(&ref_timer);
+ const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
- printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width,
- height, elapsed, elapsed * 1000000. / NUM_ITERS);
+ aom_usec_timer tst_timer;
+ aom_usec_timer_start(&tst_timer);
+ for (i = 0; i < NUM_ITERS; ++i) {
+ for (k = 0; k < height; k += pu_height)
+ for (j = 0; j < width; j += pu_width) {
+ int w = AOMMIN(pu_width, width - j);
+ int h = AOMMIN(pu_height, height - k);
+ uint16_t *input_p = input + k * stride + j;
+ uint16_t *output_p = output + k * out_stride + j;
+ tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+ CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
+ 1);
+ }
+ }
+ aom_usec_timer_mark(&tst_timer);
+ const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+ std::cout << "[ ] C time = " << ref_time / 1000
+ << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+ EXPECT_GT(ref_time, tst_time)
+ << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than "
+ "C.\n"
+ << "C time: " << ref_time << " us\n"
+ << "SIMD time: " << tst_time << " us\n";
aom_free(input_);
aom_free(output_);
@@ -251,6 +304,7 @@ class AV1HighbdSelfguidedFilterTest
}
void RunCorrectnessTest() {
+ tst_fun_ = GET_PARAM(0);
const int pu_width = RESTORATION_PROC_UNIT_SIZE;
const int pu_height = RESTORATION_PROC_UNIT_SIZE;
// Set the maximum width/height to test here. We actually test a small
@@ -259,16 +313,16 @@ class AV1HighbdSelfguidedFilterTest
const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
const int NUM_ITERS = 81;
int i, j, k;
- int bit_depth = GET_PARAM(0);
+ int bit_depth = GET_PARAM(1);
int mask = (1 << bit_depth) - 1;
uint16_t *input_ =
- (uint16_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint16_t));
+ (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t));
uint16_t *output_ = (uint16_t *)aom_memalign(
- 16, out_stride * (max_h + 32) * sizeof(uint16_t));
+ 32, out_stride * (max_h + 32) * sizeof(uint16_t));
uint16_t *output2_ = (uint16_t *)aom_memalign(
- 16, out_stride * (max_h + 32) * sizeof(uint16_t));
- int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+ 32, out_stride * (max_h + 32) * sizeof(uint16_t));
+ int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
uint16_t *input = input_ + stride * 16 + 16;
uint16_t *output = output_ + out_stride * 16 + 16;
@@ -283,12 +337,10 @@ class AV1HighbdSelfguidedFilterTest
for (k = -16; k < max_w + 16; ++k)
input[j * stride + k] = rnd.Rand16() & mask;
- int xqd[2] = {
- SGRPROJ_PRJ_MIN0 +
- rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
- SGRPROJ_PRJ_MIN1 +
- rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
- };
+ int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+ SGRPROJ_PRJ_MIN0),
+ SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+ SGRPROJ_PRJ_MIN1) };
int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
// Test various tile sizes around 256x256
@@ -302,22 +354,14 @@ class AV1HighbdSelfguidedFilterTest
uint16_t *input_p = input + k * stride + j;
uint16_t *output_p = output + k * out_stride + j;
uint16_t *output2_p = output2 + k * out_stride + j;
- apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth,
- eps, xqd, output_p, out_stride,
- tmpbuf);
- apply_selfguided_restoration_highbd_c(input_p, w, h, stride,
- bit_depth, eps, xqd, output2_p,
- out_stride, tmpbuf);
+ tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+ CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
+ 1);
+ apply_selfguided_restoration_c(
+ CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+ CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1);
}
- /*
- apply_selfguided_restoration_highbd(input, test_w, test_h, stride,
- bit_depth, eps, xqd, output,
- out_stride, tmpbuf);
- apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride,
- bit_depth, eps, xqd, output2,
- out_stride, tmpbuf);
- */
for (j = 0; j < test_h; ++j)
for (k = 0; k < test_w; ++k)
ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
@@ -328,17 +372,28 @@ class AV1HighbdSelfguidedFilterTest
aom_free(output2_);
aom_free(tmpbuf);
}
+
+ private:
+ SgrFunc tst_fun_;
};
-TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
+TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
#if HAVE_SSE4_1
-const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10),
- make_tuple(12) };
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest,
- ::testing::ValuesIn(highbd_params));
+const int highbd_params_sse4_1[] = { 8, 10, 12 };
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1, AV1HighbdSelfguidedFilterTest,
+ ::testing::Combine(::testing::Values(apply_selfguided_restoration_sse4_1),
+ ::testing::ValuesIn(highbd_params_sse4_1)));
#endif
+
+#if HAVE_AVX2
+const int highbd_params_avx2[] = { 8, 10, 12 };
+INSTANTIATE_TEST_CASE_P(
+ AVX2, AV1HighbdSelfguidedFilterTest,
+ ::testing::Combine(::testing::Values(apply_selfguided_restoration_avx2),
+ ::testing::ValuesIn(highbd_params_avx2)));
#endif
} // namespace