summaryrefslogtreecommitdiffstats
path: root/third_party/aom/test/masked_sad_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/test/masked_sad_test.cc')
-rw-r--r--third_party/aom/test/masked_sad_test.cc252
1 files changed, 193 insertions, 59 deletions
diff --git a/third_party/aom/test/masked_sad_test.cc b/third_party/aom/test/masked_sad_test.cc
index 1a393a001..311f1877d 100644
--- a/third_party/aom/test/masked_sad_test.cc
+++ b/third_party/aom/test/masked_sad_test.cc
@@ -44,14 +44,14 @@ class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
}
virtual void TearDown() { libaom_test::ClearSystemState(); }
+ void runMaskedSADTest(int run_times);
protected:
MaskedSADFunc maskedSAD_op_;
MaskedSADFunc ref_maskedSAD_op_;
};
-
-TEST_P(MaskedSADTest, OperationCheck) {
- unsigned int ref_ret, ret;
+void MaskedSADTest::runMaskedSADTest(int run_times) {
+ unsigned int ref_ret = 0, ret = 1;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
@@ -62,7 +62,8 @@ TEST_P(MaskedSADTest, OperationCheck) {
int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_SB_SIZE;
- for (int i = 0; i < number_of_iterations; ++i) {
+ const int iters = run_times == 1 ? number_of_iterations : 1;
+ for (int i = 0; i < iters; ++i) {
for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
@@ -72,24 +73,48 @@ TEST_P(MaskedSADTest, OperationCheck) {
}
for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
- ref_ret =
- ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
+ aom_usec_timer timer;
+ aom_usec_timer_start(&timer);
+ for (int repeat = 0; repeat < run_times; ++repeat) {
+ ref_ret = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
+ second_pred_ptr, msk_ptr, msk_stride,
+ invert_mask);
+ }
+ aom_usec_timer_mark(&timer);
+ const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+ aom_usec_timer_start(&timer);
+ if (run_times == 1) {
+ ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ second_pred_ptr, msk_ptr,
+ msk_stride, invert_mask));
+ } else {
+ for (int repeat = 0; repeat < run_times; ++repeat) {
+ ret =
+ maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
second_pred_ptr, msk_ptr, msk_stride, invert_mask);
- ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride, ref_ptr,
- ref_stride, second_pred_ptr,
- msk_ptr, msk_stride,
- invert_mask));
+ }
+ }
+ aom_usec_timer_mark(&timer);
+ const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+ if (run_times > 10) {
+ printf("%7.2f/%7.2fns", time1, time2);
+ printf("(%3.2f)\n", time1 / time2);
+ }
if (ret != ref_ret) {
err_count++;
if (first_failure == -1) first_failure = i;
}
}
}
- EXPECT_EQ(0, err_count)
- << "Error: Masked SAD Test, C output doesn't match SSSE3 output. "
- << "First failed at test case " << first_failure;
+ EXPECT_EQ(0, err_count) << "Error: Masked SAD Test, output doesn't match. "
+ << "First failed at test case " << first_failure;
}
+TEST_P(MaskedSADTest, OperationCheck) { runMaskedSADTest(1); }
+
+TEST_P(MaskedSADTest, DISABLED_Speed) { runMaskedSADTest(2000000); }
+
typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
const uint8_t *second_pred,
@@ -108,14 +133,14 @@ class HighbdMaskedSADTest
}
virtual void TearDown() { libaom_test::ClearSystemState(); }
+ void runHighbdMaskedSADTest(int run_times);
protected:
HighbdMaskedSADFunc maskedSAD_op_;
HighbdMaskedSADFunc ref_maskedSAD_op_;
};
-
-TEST_P(HighbdMaskedSADTest, OperationCheck) {
- unsigned int ref_ret, ret;
+void HighbdMaskedSADTest::runHighbdMaskedSADTest(int run_times) {
+ unsigned int ref_ret = 0, ret = 1;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
@@ -129,7 +154,8 @@ TEST_P(HighbdMaskedSADTest, OperationCheck) {
int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_SB_SIZE;
- for (int i = 0; i < number_of_iterations; ++i) {
+ const int iters = run_times == 1 ? number_of_iterations : 1;
+ for (int i = 0; i < iters; ++i) {
for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand16() & 0xfff;
ref_ptr[j] = rnd.Rand16() & 0xfff;
@@ -138,13 +164,34 @@ TEST_P(HighbdMaskedSADTest, OperationCheck) {
}
for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
- ref_ret =
- ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
+ aom_usec_timer timer;
+ aom_usec_timer_start(&timer);
+ for (int repeat = 0; repeat < run_times; ++repeat) {
+ ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
+ second_pred8_ptr, msk_ptr, msk_stride,
+ invert_mask);
+ }
+ aom_usec_timer_mark(&timer);
+ const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+ aom_usec_timer_start(&timer);
+ if (run_times == 1) {
+ ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride,
+ ref8_ptr, ref_stride,
+ second_pred8_ptr, msk_ptr,
+ msk_stride, invert_mask));
+ } else {
+ for (int repeat = 0; repeat < run_times; ++repeat) {
+ ret =
+ maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
second_pred8_ptr, msk_ptr, msk_stride, invert_mask);
- ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride,
- ref8_ptr, ref_stride,
- second_pred8_ptr, msk_ptr,
- msk_stride, invert_mask));
+ }
+ }
+ aom_usec_timer_mark(&timer);
+ const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+ if (run_times > 10) {
+ printf("%7.2f/%7.2fns", time1, time2);
+ printf("(%3.2f)\n", time1 / time2);
+ }
if (ret != ref_ret) {
err_count++;
if (first_failure == -1) first_failure = i;
@@ -152,57 +199,144 @@ TEST_P(HighbdMaskedSADTest, OperationCheck) {
}
}
EXPECT_EQ(0, err_count)
- << "Error: High BD Masked SAD Test, C output doesn't match SSSE3 output. "
+ << "Error: High BD Masked SAD Test, output doesn't match. "
<< "First failed at test case " << first_failure;
}
+TEST_P(HighbdMaskedSADTest, OperationCheck) { runHighbdMaskedSADTest(1); }
+
+TEST_P(HighbdMaskedSADTest, DISABLED_Speed) { runHighbdMaskedSADTest(1000000); }
+
using ::testing::make_tuple;
#if HAVE_SSSE3
const MaskedSADParam msad_test[] = {
- make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c),
- make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c),
- make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c),
- make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c),
- make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c),
- make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c),
- make_tuple(&aom_masked_sad32x32_ssse3, &aom_masked_sad32x32_c),
- make_tuple(&aom_masked_sad32x16_ssse3, &aom_masked_sad32x16_c),
- make_tuple(&aom_masked_sad16x32_ssse3, &aom_masked_sad16x32_c),
- make_tuple(&aom_masked_sad16x16_ssse3, &aom_masked_sad16x16_c),
- make_tuple(&aom_masked_sad16x8_ssse3, &aom_masked_sad16x8_c),
- make_tuple(&aom_masked_sad8x16_ssse3, &aom_masked_sad8x16_c),
- make_tuple(&aom_masked_sad8x8_ssse3, &aom_masked_sad8x8_c),
- make_tuple(&aom_masked_sad8x4_ssse3, &aom_masked_sad8x4_c),
+ make_tuple(&aom_masked_sad4x4_ssse3, &aom_masked_sad4x4_c),
make_tuple(&aom_masked_sad4x8_ssse3, &aom_masked_sad4x8_c),
- make_tuple(&aom_masked_sad4x4_ssse3, &aom_masked_sad4x4_c)
+ make_tuple(&aom_masked_sad8x4_ssse3, &aom_masked_sad8x4_c),
+ make_tuple(&aom_masked_sad8x8_ssse3, &aom_masked_sad8x8_c),
+ make_tuple(&aom_masked_sad8x16_ssse3, &aom_masked_sad8x16_c),
+ make_tuple(&aom_masked_sad16x8_ssse3, &aom_masked_sad16x8_c),
+ make_tuple(&aom_masked_sad16x16_ssse3, &aom_masked_sad16x16_c),
+ make_tuple(&aom_masked_sad16x32_ssse3, &aom_masked_sad16x32_c),
+ make_tuple(&aom_masked_sad32x16_ssse3, &aom_masked_sad32x16_c),
+ make_tuple(&aom_masked_sad32x32_ssse3, &aom_masked_sad32x32_c),
+ make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c),
+ make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c),
+ make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c),
+ make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c),
+ make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c),
+ make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c),
+ make_tuple(&aom_masked_sad4x16_ssse3, &aom_masked_sad4x16_c),
+ make_tuple(&aom_masked_sad16x4_ssse3, &aom_masked_sad16x4_c),
+ make_tuple(&aom_masked_sad8x32_ssse3, &aom_masked_sad8x32_c),
+ make_tuple(&aom_masked_sad32x8_ssse3, &aom_masked_sad32x8_c),
+ make_tuple(&aom_masked_sad16x64_ssse3, &aom_masked_sad16x64_c),
+ make_tuple(&aom_masked_sad64x16_ssse3, &aom_masked_sad64x16_c),
};
-INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, MaskedSADTest,
- ::testing::ValuesIn(msad_test));
+INSTANTIATE_TEST_CASE_P(SSSE3, MaskedSADTest, ::testing::ValuesIn(msad_test));
+
const HighbdMaskedSADParam hbd_msad_test[] = {
- make_tuple(&aom_highbd_masked_sad128x128_ssse3,
- &aom_highbd_masked_sad128x128_c),
- make_tuple(&aom_highbd_masked_sad128x64_ssse3,
- &aom_highbd_masked_sad128x64_c),
+ make_tuple(&aom_highbd_masked_sad4x4_ssse3, &aom_highbd_masked_sad4x4_c),
+ make_tuple(&aom_highbd_masked_sad4x8_ssse3, &aom_highbd_masked_sad4x8_c),
+ make_tuple(&aom_highbd_masked_sad8x4_ssse3, &aom_highbd_masked_sad8x4_c),
+ make_tuple(&aom_highbd_masked_sad8x8_ssse3, &aom_highbd_masked_sad8x8_c),
+ make_tuple(&aom_highbd_masked_sad8x16_ssse3, &aom_highbd_masked_sad8x16_c),
+ make_tuple(&aom_highbd_masked_sad16x8_ssse3, &aom_highbd_masked_sad16x8_c),
+ make_tuple(&aom_highbd_masked_sad16x16_ssse3, &aom_highbd_masked_sad16x16_c),
+ make_tuple(&aom_highbd_masked_sad16x32_ssse3, &aom_highbd_masked_sad16x32_c),
+ make_tuple(&aom_highbd_masked_sad32x16_ssse3, &aom_highbd_masked_sad32x16_c),
+ make_tuple(&aom_highbd_masked_sad32x32_ssse3, &aom_highbd_masked_sad32x32_c),
+ make_tuple(&aom_highbd_masked_sad32x64_ssse3, &aom_highbd_masked_sad32x64_c),
+ make_tuple(&aom_highbd_masked_sad64x32_ssse3, &aom_highbd_masked_sad64x32_c),
+ make_tuple(&aom_highbd_masked_sad64x64_ssse3, &aom_highbd_masked_sad64x64_c),
make_tuple(&aom_highbd_masked_sad64x128_ssse3,
&aom_highbd_masked_sad64x128_c),
- make_tuple(&aom_highbd_masked_sad64x64_ssse3, &aom_highbd_masked_sad64x64_c),
- make_tuple(&aom_highbd_masked_sad64x32_ssse3, &aom_highbd_masked_sad64x32_c),
- make_tuple(&aom_highbd_masked_sad32x64_ssse3, &aom_highbd_masked_sad32x64_c),
- make_tuple(&aom_highbd_masked_sad32x32_ssse3, &aom_highbd_masked_sad32x32_c),
- make_tuple(&aom_highbd_masked_sad32x16_ssse3, &aom_highbd_masked_sad32x16_c),
- make_tuple(&aom_highbd_masked_sad16x32_ssse3, &aom_highbd_masked_sad16x32_c),
- make_tuple(&aom_highbd_masked_sad16x16_ssse3, &aom_highbd_masked_sad16x16_c),
- make_tuple(&aom_highbd_masked_sad16x8_ssse3, &aom_highbd_masked_sad16x8_c),
- make_tuple(&aom_highbd_masked_sad8x16_ssse3, &aom_highbd_masked_sad8x16_c),
- make_tuple(&aom_highbd_masked_sad8x8_ssse3, &aom_highbd_masked_sad8x8_c),
- make_tuple(&aom_highbd_masked_sad8x4_ssse3, &aom_highbd_masked_sad8x4_c),
- make_tuple(&aom_highbd_masked_sad4x8_ssse3, &aom_highbd_masked_sad4x8_c),
- make_tuple(&aom_highbd_masked_sad4x4_ssse3, &aom_highbd_masked_sad4x4_c)
+ make_tuple(&aom_highbd_masked_sad128x64_ssse3,
+ &aom_highbd_masked_sad128x64_c),
+ make_tuple(&aom_highbd_masked_sad128x128_ssse3,
+ &aom_highbd_masked_sad128x128_c),
+ make_tuple(&aom_highbd_masked_sad4x16_ssse3, &aom_highbd_masked_sad4x16_c),
+ make_tuple(&aom_highbd_masked_sad16x4_ssse3, &aom_highbd_masked_sad16x4_c),
+ make_tuple(&aom_highbd_masked_sad8x32_ssse3, &aom_highbd_masked_sad8x32_c),
+ make_tuple(&aom_highbd_masked_sad32x8_ssse3, &aom_highbd_masked_sad32x8_c),
+ make_tuple(&aom_highbd_masked_sad16x64_ssse3, &aom_highbd_masked_sad16x64_c),
+ make_tuple(&aom_highbd_masked_sad64x16_ssse3, &aom_highbd_masked_sad64x16_c),
};
-INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, HighbdMaskedSADTest,
+INSTANTIATE_TEST_CASE_P(SSSE3, HighbdMaskedSADTest,
::testing::ValuesIn(hbd_msad_test));
#endif // HAVE_SSSE3
+
+#if HAVE_AVX2
+const MaskedSADParam msad_avx2_test[] = {
+ make_tuple(&aom_masked_sad4x4_avx2, &aom_masked_sad4x4_ssse3),
+ make_tuple(&aom_masked_sad4x8_avx2, &aom_masked_sad4x8_ssse3),
+ make_tuple(&aom_masked_sad8x4_avx2, &aom_masked_sad8x4_ssse3),
+ make_tuple(&aom_masked_sad8x8_avx2, &aom_masked_sad8x8_ssse3),
+ make_tuple(&aom_masked_sad8x16_avx2, &aom_masked_sad8x16_ssse3),
+ make_tuple(&aom_masked_sad16x8_avx2, &aom_masked_sad16x8_ssse3),
+ make_tuple(&aom_masked_sad16x16_avx2, &aom_masked_sad16x16_ssse3),
+ make_tuple(&aom_masked_sad16x32_avx2, &aom_masked_sad16x32_ssse3),
+ make_tuple(&aom_masked_sad32x16_avx2, &aom_masked_sad32x16_ssse3),
+ make_tuple(&aom_masked_sad32x32_avx2, &aom_masked_sad32x32_ssse3),
+ make_tuple(&aom_masked_sad32x64_avx2, &aom_masked_sad32x64_ssse3),
+ make_tuple(&aom_masked_sad64x32_avx2, &aom_masked_sad64x32_ssse3),
+ make_tuple(&aom_masked_sad64x64_avx2, &aom_masked_sad64x64_ssse3),
+ make_tuple(&aom_masked_sad64x128_avx2, &aom_masked_sad64x128_ssse3),
+ make_tuple(&aom_masked_sad128x64_avx2, &aom_masked_sad128x64_ssse3),
+ make_tuple(&aom_masked_sad128x128_avx2, &aom_masked_sad128x128_ssse3),
+ make_tuple(&aom_masked_sad4x16_avx2, &aom_masked_sad4x16_ssse3),
+ make_tuple(&aom_masked_sad16x4_avx2, &aom_masked_sad16x4_ssse3),
+ make_tuple(&aom_masked_sad8x32_avx2, &aom_masked_sad8x32_ssse3),
+ make_tuple(&aom_masked_sad32x8_avx2, &aom_masked_sad32x8_ssse3),
+ make_tuple(&aom_masked_sad16x64_avx2, &aom_masked_sad16x64_ssse3),
+ make_tuple(&aom_masked_sad64x16_avx2, &aom_masked_sad64x16_ssse3)
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, MaskedSADTest,
+ ::testing::ValuesIn(msad_avx2_test));
+
+const HighbdMaskedSADParam hbd_msad_avx2_test[] = {
+ make_tuple(&aom_highbd_masked_sad4x4_avx2, &aom_highbd_masked_sad4x4_ssse3),
+ make_tuple(&aom_highbd_masked_sad4x8_avx2, &aom_highbd_masked_sad4x8_ssse3),
+ make_tuple(&aom_highbd_masked_sad8x4_avx2, &aom_highbd_masked_sad8x4_ssse3),
+ make_tuple(&aom_highbd_masked_sad8x8_avx2, &aom_highbd_masked_sad8x8_ssse3),
+ make_tuple(&aom_highbd_masked_sad8x16_avx2, &aom_highbd_masked_sad8x16_ssse3),
+ make_tuple(&aom_highbd_masked_sad16x8_avx2, &aom_highbd_masked_sad16x8_ssse3),
+ make_tuple(&aom_highbd_masked_sad16x16_avx2,
+ &aom_highbd_masked_sad16x16_ssse3),
+ make_tuple(&aom_highbd_masked_sad16x32_avx2,
+ &aom_highbd_masked_sad16x32_ssse3),
+ make_tuple(&aom_highbd_masked_sad32x16_avx2,
+ &aom_highbd_masked_sad32x16_ssse3),
+ make_tuple(&aom_highbd_masked_sad32x32_avx2,
+ &aom_highbd_masked_sad32x32_ssse3),
+ make_tuple(&aom_highbd_masked_sad32x64_avx2,
+ &aom_highbd_masked_sad32x64_ssse3),
+ make_tuple(&aom_highbd_masked_sad64x32_avx2,
+ &aom_highbd_masked_sad64x32_ssse3),
+ make_tuple(&aom_highbd_masked_sad64x64_avx2,
+ &aom_highbd_masked_sad64x64_ssse3),
+ make_tuple(&aom_highbd_masked_sad64x128_avx2,
+ &aom_highbd_masked_sad64x128_ssse3),
+ make_tuple(&aom_highbd_masked_sad128x64_avx2,
+ &aom_highbd_masked_sad128x64_ssse3),
+ make_tuple(&aom_highbd_masked_sad128x128_avx2,
+ &aom_highbd_masked_sad128x128_ssse3),
+ make_tuple(&aom_highbd_masked_sad4x16_avx2, &aom_highbd_masked_sad4x16_ssse3),
+ make_tuple(&aom_highbd_masked_sad16x4_avx2, &aom_highbd_masked_sad16x4_ssse3),
+ make_tuple(&aom_highbd_masked_sad8x32_avx2, &aom_highbd_masked_sad8x32_ssse3),
+ make_tuple(&aom_highbd_masked_sad32x8_avx2, &aom_highbd_masked_sad32x8_ssse3),
+ make_tuple(&aom_highbd_masked_sad16x64_avx2,
+ &aom_highbd_masked_sad16x64_ssse3),
+ make_tuple(&aom_highbd_masked_sad64x16_avx2,
+ &aom_highbd_masked_sad64x16_ssse3)
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, HighbdMaskedSADTest,
+ ::testing::ValuesIn(hbd_msad_avx2_test));
+#endif // HAVE_AVX2
+
} // namespace