diff options
Diffstat (limited to 'third_party/aom/test')
142 files changed, 31198 insertions, 0 deletions
diff --git a/third_party/aom/test/accounting_test.cc b/third_party/aom/test/accounting_test.cc new file mode 100644 index 000000000..e8387d0dc --- /dev/null +++ b/third_party/aom/test/accounting_test.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "aom/aom_integer.h" +#include "aom_dsp/bitreader.h" +#include "aom_dsp/bitwriter.h" + +using libaom_test::ACMRandom; + +TEST(AV1, TestAccounting) { + const int kBufferSize = 10000; + const int kSymbols = 1024; + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + aom_start_encode(&bw, bw_buffer); + for (int i = 0; i < kSymbols; i++) { + aom_write(&bw, 0, 32); + aom_write(&bw, 0, 32); + aom_write(&bw, 0, 32); + } + aom_stop_encode(&bw); + aom_reader br; +#if CONFIG_ANS && ANS_MAX_SYMBOLS + br.window_size = 1 << 16; +#endif + aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL); + + Accounting accounting; + aom_accounting_init(&accounting); + br.accounting = &accounting; + for (int i = 0; i < kSymbols; i++) { + aom_read(&br, 32, "A"); + } + // Consecutive symbols that are the same are coalesced. + GTEST_ASSERT_EQ(accounting.syms.num_syms, 1); + GTEST_ASSERT_EQ(accounting.syms.syms[0].samples, (unsigned int)kSymbols); + + aom_accounting_reset(&accounting); + GTEST_ASSERT_EQ(accounting.syms.num_syms, 0); + + // Should record 2 * kSymbols accounting symbols. + aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL); + br.accounting = &accounting; + for (int i = 0; i < kSymbols; i++) { + aom_read(&br, 32, "A"); + aom_read(&br, 32, "B"); + aom_read(&br, 32, "B"); + } + GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols * 2); + uint32_t tell_frac = aom_reader_tell_frac(&br); + for (int i = 0; i < accounting.syms.num_syms; i++) { + tell_frac -= accounting.syms.syms[i].bits; + } + GTEST_ASSERT_EQ(tell_frac, 0U); + + GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, "A"), + aom_accounting_dictionary_lookup(&accounting, "A")); + + // Check for collisions. The current aom_accounting_hash function returns + // the same hash code for AB and BA. + GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, "AB"), + aom_accounting_dictionary_lookup(&accounting, "BA")); +} diff --git a/third_party/aom/test/acm_random.h b/third_party/aom/test/acm_random.h new file mode 100644 index 000000000..4842345ff --- /dev/null +++ b/third_party/aom/test/acm_random.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TEST_ACM_RANDOM_H_ +#define TEST_ACM_RANDOM_H_ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "aom/aom_integer.h" + +namespace libaom_test { + +class ACMRandom { + public: + ACMRandom() : random_(DeterministicSeed()) {} + + explicit ACMRandom(int seed) : random_(seed) {} + + void Reset(int seed) { random_.Reseed(seed); } + + uint32_t Rand31(void) { + return random_.Generate(testing::internal::Random::kMaxRange); + } + + uint16_t Rand16(void) { + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + return (value >> 15) & 0xffff; + } + + int16_t Rand9Signed(void) { + // Use 9 bits: values between 255 (0x0FF) and -256 (0x100). + const uint32_t value = random_.Generate(512); + return static_cast<int16_t>(value) - 256; + } + + uint8_t Rand8(void) { + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + // There's a bit more entropy in the upper bits of this implementation. + return (value >> 23) & 0xff; + } + + uint8_t Rand8Extremes(void) { + // Returns a random value near 0 or near 255, to better exercise + // saturation behavior. + const uint8_t r = Rand8(); + return r < 128 ? r << 4 : r >> 4; + } + + int PseudoUniform(int range) { return random_.Generate(range); } + + int operator()(int n) { return PseudoUniform(n); } + + static int DeterministicSeed(void) { return 0xbaba; } + + private: + testing::internal::Random random_; +}; + +} // namespace libaom_test + +#endif // TEST_ACM_RANDOM_H_ diff --git a/third_party/aom/test/active_map_refresh_test.cc b/third_party/aom/test/active_map_refresh_test.cc new file mode 100644 index 000000000..7ee86e7e6 --- /dev/null +++ b/third_party/aom/test/active_map_refresh_test.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <algorithm> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { + +// Check if any pixel in a 16x16 macroblock varies between frames. +int CheckMb(const aom_image_t ¤t, const aom_image_t &previous, int mb_r, + int mb_c) { + for (int plane = 0; plane < 3; plane++) { + int r = 16 * mb_r; + int c0 = 16 * mb_c; + int r_top = std::min(r + 16, static_cast<int>(current.d_h)); + int c_top = std::min(c0 + 16, static_cast<int>(current.d_w)); + r = std::max(r, 0); + c0 = std::max(c0, 0); + if (plane > 0 && current.x_chroma_shift) { + c_top = (c_top + 1) >> 1; + c0 >>= 1; + } + if (plane > 0 && current.y_chroma_shift) { + r_top = (r_top + 1) >> 1; + r >>= 1; + } + for (; r < r_top; ++r) { + for (int c = c0; c < c_top; ++c) { + if (current.planes[plane][current.stride[plane] * r + c] != + previous.planes[plane][previous.stride[plane] * r + c]) + return 1; + } + } + } + return 0; +} + +void GenerateMap(int mb_rows, int mb_cols, const aom_image_t ¤t, + const aom_image_t &previous, uint8_t *map) { + for (int mb_r = 0; mb_r < mb_rows; ++mb_r) { + for (int mb_c = 0; mb_c < mb_cols; ++mb_c) { + map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c); + } + } +} + +const int kAqModeCyclicRefresh = 3; + +class ActiveMapRefreshTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~ActiveMapRefreshTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + cpu_used_ = GET_PARAM(2); + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + ::libaom_test::Y4mVideoSource *y4m_video = + static_cast<libaom_test::Y4mVideoSource *>(video); + if (video->frame() == 1) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_SET_AQ_MODE, kAqModeCyclicRefresh); + } else if (video->frame() >= 2 && video->img()) { + aom_image_t *current = video->img(); + aom_image_t *previous = y4m_holder_->img(); + ASSERT_TRUE(previous != NULL); + aom_active_map_t map = aom_active_map_t(); + const int width = static_cast<int>(current->d_w); + const int height = static_cast<int>(current->d_h); + const int mb_width = (width + 15) / 16; + const int mb_height = (height + 15) / 16; + uint8_t *active_map = new uint8_t[mb_width * mb_height]; + GenerateMap(mb_height, mb_width, *current, *previous, active_map); + map.cols = mb_width; + map.rows = mb_height; + map.active_map = active_map; + encoder->Control(AOME_SET_ACTIVEMAP, &map); + delete[] active_map; + } + if (video->img()) { + y4m_video->SwapBuffers(y4m_holder_); + } + } + + int cpu_used_; + ::libaom_test::Y4mVideoSource *y4m_holder_; +}; + +TEST_P(ActiveMapRefreshTest, Test) { + cfg_.g_lag_in_frames = 0; + cfg_.g_profile = 1; + cfg_.rc_target_bitrate = 600; + cfg_.rc_resize_allowed = 0; + cfg_.rc_min_quantizer = 8; + cfg_.rc_max_quantizer = 30; + cfg_.g_pass = AOM_RC_ONE_PASS; + cfg_.rc_end_usage = AOM_CBR; + cfg_.kf_max_dist = 90000; + + ::libaom_test::Y4mVideoSource video("desktop_credits.y4m", 0, 10); + ::libaom_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 10); + video_holder.Begin(); + y4m_holder_ = &video_holder; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(5, 6)); +} // namespace diff --git a/third_party/aom/test/active_map_test.cc b/third_party/aom/test/active_map_test.cc new file mode 100644 index 000000000..a926b0faf --- /dev/null +++ b/third_party/aom/test/active_map_test.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <climits> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class ActiveMapTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + static const int kWidth = 208; + static const int kHeight = 144; + + ActiveMapTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~ActiveMapTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + cpu_used_ = GET_PARAM(2); + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + } else if (video->frame() == 3) { + aom_active_map_t map = aom_active_map_t(); + /* clang-format off */ + uint8_t active_map[9 * 13] = { + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, + 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, + }; + /* clang-format on */ + map.cols = (kWidth + 15) / 16; + map.rows = (kHeight + 15) / 16; + ASSERT_EQ(map.cols, 13u); + ASSERT_EQ(map.rows, 9u); + map.active_map = active_map; + encoder->Control(AOME_SET_ACTIVEMAP, &map); + } else if (video->frame() == 15) { + aom_active_map_t map = aom_active_map_t(); + map.cols = (kWidth + 15) / 16; + map.rows = (kHeight + 15) / 16; + map.active_map = NULL; + encoder->Control(AOME_SET_ACTIVEMAP, &map); + } + } + + void DoTest() { + // Validate that this non multiple of 64 wide clip encodes + cfg_.g_lag_in_frames = 0; + cfg_.rc_target_bitrate = 400; + cfg_.rc_resize_allowed = 0; + cfg_.g_pass = AOM_RC_ONE_PASS; + cfg_.rc_end_usage = AOM_CBR; + cfg_.kf_max_dist = 90000; + ::libaom_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30, + 1, 0, 20); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + int cpu_used_; +}; + +TEST_P(ActiveMapTest, Test) { DoTest(); } + +class ActiveMapTestLarge : public ActiveMapTest {}; + +TEST_P(ActiveMapTestLarge, Test) { DoTest(); } + +AV1_INSTANTIATE_TEST_CASE(ActiveMapTestLarge, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(0, 5)); + +AV1_INSTANTIATE_TEST_CASE(ActiveMapTest, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(5, 9)); + +} // namespace diff --git a/third_party/aom/test/altref_test.cc b/third_party/aom/test/altref_test.cc new file mode 100644 index 000000000..6dd8b5186 --- /dev/null +++ b/third_party/aom/test/altref_test.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +namespace { + +class AltRefForcedKeyTestLarge + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + AltRefForcedKeyTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), forced_kf_frame_num_(1), frame_num_(0) {} + virtual ~AltRefForcedKeyTestLarge() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_threads = 0; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); +#if CONFIG_AV1_ENCODER + // override test default for tile columns if necessary. + if (GET_PARAM(0) == &libaom_test::kAV1) { + encoder->Control(AV1E_SET_TILE_COLUMNS, 6); + } +#endif + } + frame_flags_ = + (video->frame() == forced_kf_frame_num_) ? AOM_EFLAG_FORCE_KF : 0; + } + + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + if (frame_num_ == forced_kf_frame_num_) { + ASSERT_TRUE(!!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) + << "Frame #" << frame_num_ << " isn't a keyframe!"; + } + ++frame_num_; + } + + ::libaom_test::TestMode encoding_mode_; + int cpu_used_; + unsigned int forced_kf_frame_num_; + unsigned int frame_num_; +}; + +TEST_P(AltRefForcedKeyTestLarge, Frame1IsKey) { + const aom_rational timebase = { 1, 30 }; + const int lag_values[] = { 3, 15, 25, -1 }; + + forced_kf_frame_num_ = 1; + for (int i = 0; lag_values[i] != -1; ++i) { + frame_num_ = 0; + cfg_.g_lag_in_frames = lag_values[i]; + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } +} + +TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) { + const aom_rational timebase = { 1, 30 }; + const int lag_values[] = { 3, 15, 25, -1 }; + + for (int i = 0; lag_values[i] != -1; ++i) { + frame_num_ = 0; + forced_kf_frame_num_ = lag_values[i] - 1; + cfg_.g_lag_in_frames = lag_values[i]; + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } +} + +AV1_INSTANTIATE_TEST_CASE(AltRefForcedKeyTestLarge, + ::testing::Values(::libaom_test::kOnePassGood), + ::testing::Range(0, 9)); + +} // namespace diff --git a/third_party/aom/test/android/Android.mk b/third_party/aom/test/android/Android.mk new file mode 100644 index 000000000..74f9d7cba --- /dev/null +++ b/third_party/aom/test/android/Android.mk @@ -0,0 +1,58 @@ +# +# Copyright (c) 2016, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# +# This make file builds aom_test app for android. +# The test app itself runs on the command line through adb shell +# The paths are really messed up as the libaom make file +# expects to be made from a parent directory. +CUR_WD := $(call my-dir) +BINDINGS_DIR := $(CUR_WD)/../../.. +LOCAL_PATH := $(CUR_WD)/../../.. + +#libwebm +include $(CLEAR_VARS) +include $(BINDINGS_DIR)/libaom/third_party/libwebm/Android.mk +LOCAL_PATH := $(CUR_WD)/../../.. + +#libaom +include $(CLEAR_VARS) +LOCAL_STATIC_LIBRARIES := libwebm +include $(BINDINGS_DIR)/libaom/build/make/Android.mk +LOCAL_PATH := $(CUR_WD)/../.. + +#libgtest +include $(CLEAR_VARS) +LOCAL_ARM_MODE := arm +LOCAL_CPP_EXTENSION := .cc +LOCAL_MODULE := gtest +LOCAL_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/googletest/src +LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/googletest/include +LOCAL_SRC_FILES := ./third_party/googletest/src/googletest/src/gtest-all.cc +include $(BUILD_STATIC_LIBRARY) + +#libaom_test +include $(CLEAR_VARS) +LOCAL_ARM_MODE := arm +LOCAL_MODULE := libaom_test +LOCAL_STATIC_LIBRARIES := gtest libwebm + +ifeq ($(ENABLE_SHARED),1) + LOCAL_SHARED_LIBRARIES := aom +else + LOCAL_STATIC_LIBRARIES += aom +endif + +include $(LOCAL_PATH)/test/test.mk +LOCAL_C_INCLUDES := $(BINDINGS_DIR) +FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBAOM_TEST_SRCS-yes))) +LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC)) +# some test files depend on *_rtcd.h, ensure they're generated first. +$(eval $(call rtcd_dep_template)) +include $(BUILD_EXECUTABLE) diff --git a/third_party/aom/test/android/README b/third_party/aom/test/android/README new file mode 100644 index 000000000..35c829738 --- /dev/null +++ b/third_party/aom/test/android/README @@ -0,0 +1,32 @@ +Android.mk will build aom unittests on android. +1) Configure libaom from the parent directory: +./libaom/configure --target=armv7-android-gcc --enable-external-build \ + --enable-postproc --disable-install-srcs --enable-multi-res-encoding \ + --enable-temporal-denoising --disable-unit-tests --disable-install-docs \ + --disable-examples --disable-runtime-cpu-detect --sdk-path=$NDK + +2) From the parent directory, invoke ndk-build: +NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libaom/test/android/Android.mk \ + APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release \ + APP_STL=gnustl_static + +Note: Both adb and ndk-build are available prebuilt at: + https://chromium.googlesource.com/android_tools + +3) Run get_files.py to download the test files: +python get_files.py -i /path/to/test-data.sha1 -o /path/to/put/files \ + -u http://downloads.webmproject.org/test_data/libaom + +4) Transfer files to device using adb. Ensure you have proper permissions for +the target + +adb push /path/to/test_files /data/local/tmp +adb push /path/to/built_libs /data/local/tmp + +NOTE: Built_libs defaults to parent_dir/libs/armeabi-v7a + +5) Run tests: +adb shell +(on device) +cd /data/local/tmp +LD_LIBRARY_PATH=. ./aom_test diff --git a/third_party/aom/test/android/get_files.py b/third_party/aom/test/android/get_files.py new file mode 100644 index 000000000..bdae9a315 --- /dev/null +++ b/third_party/aom/test/android/get_files.py @@ -0,0 +1,120 @@ +# +# Copyright (c) 2016, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# +# This simple script pulls test files from the webm homepage +# It is intelligent enough to only pull files if +# 1) File / test_data folder does not exist +# 2) SHA mismatch + +import pycurl +import csv +import hashlib +import re +import os.path +import time +import itertools +import sys +import getopt + +#globals +url = '' +file_list_path = '' +local_resource_path = '' + +# Helper functions: +# A simple function which returns the sha hash of a file in hex +def get_file_sha(filename): + try: + sha_hash = hashlib.sha1() + with open(filename, 'rb') as file: + buf = file.read(HASH_CHUNK) + while len(buf) > 0: + sha_hash.update(buf) + buf = file.read(HASH_CHUNK) + return sha_hash.hexdigest() + except IOError: + print "Error reading " + filename + +# Downloads a file from a url, and then checks the sha against the passed +# in sha +def download_and_check_sha(url, filename, sha): + path = os.path.join(local_resource_path, filename) + fp = open(path, "wb") + curl = pycurl.Curl() + curl.setopt(pycurl.URL, url + "/" + filename) + curl.setopt(pycurl.WRITEDATA, fp) + curl.perform() + curl.close() + fp.close() + return get_file_sha(path) == sha + +#constants +ftp_retries = 3 + +SHA_COL = 0 +NAME_COL = 1 +EXPECTED_COL = 2 +HASH_CHUNK = 65536 + +# Main script +try: + opts, args = \ + getopt.getopt(sys.argv[1:], \ + "u:i:o:", ["url=", "input_csv=", "output_dir="]) +except: + print 'get_files.py -u <url> -i <input_csv> -o <output_dir>' + sys.exit(2) + +for opt, arg in opts: + if opt == '-u': + url = arg + elif opt in ("-i", "--input_csv"): + file_list_path = os.path.join(arg) + elif opt in ("-o", "--output_dir"): + local_resource_path = os.path.join(arg) + +if len(sys.argv) != 7: + print "Expects two paths and a url!" + exit(1) + +if not os.path.isdir(local_resource_path): + os.makedirs(local_resource_path) + +file_list_csv = open(file_list_path, "rb") + +# Our 'csv' file uses multiple spaces as a delimiter, python's +# csv class only uses single character delimiters, so we convert them below +file_list_reader = csv.reader((re.sub(' +', ' ', line) \ + for line in file_list_csv), delimiter = ' ') + +file_shas = [] +file_names = [] + +for row in file_list_reader: + if len(row) != EXPECTED_COL: + continue + file_shas.append(row[SHA_COL]) + file_names.append(row[NAME_COL]) + +file_list_csv.close() + +# Download files, only if they don't already exist and have correct shas +for filename, sha in itertools.izip(file_names, file_shas): + path = os.path.join(local_resource_path, filename) + if os.path.isfile(path) \ + and get_file_sha(path) == sha: + print path + ' exists, skipping' + continue + for retry in range(0, ftp_retries): + print "Downloading " + path + if not download_and_check_sha(url, filename, sha): + print "Sha does not match, retrying..." + else: + break diff --git a/third_party/aom/test/android/scrape_gtest_log.py b/third_party/aom/test/android/scrape_gtest_log.py new file mode 100644 index 000000000..e0c929a5d --- /dev/null +++ b/third_party/aom/test/android/scrape_gtest_log.py @@ -0,0 +1,60 @@ +# +# Copyright (c) 2016, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# + +"""Standalone script which parses a gtest log for json. + +Json is returned returns as an array. This script is used by the libaom +waterfall to gather json results mixed in with gtest logs. This is +dubious software engineering. +""" + +import getopt +import json +import os +import re +import sys + + +def main(): + if len(sys.argv) != 3: + print "Expects a file to write json to!" + exit(1) + + try: + opts, _ = \ + getopt.getopt(sys.argv[1:], \ + 'o:', ['output-json=']) + except getopt.GetOptError: + print 'scrape_gtest_log.py -o <output_json>' + sys.exit(2) + + output_json = '' + for opt, arg in opts: + if opt in ('-o', '--output-json'): + output_json = os.path.join(arg) + + blob = sys.stdin.read() + json_string = '[' + ','.join('{' + x + '}' for x in + re.findall(r'{([^}]*.?)}', blob)) + ']' + print blob + + output = json.dumps(json.loads(json_string), indent=4, sort_keys=True) + print output + + path = os.path.dirname(output_json) + if path and not os.path.exists(path): + os.makedirs(path) + + outfile = open(output_json, 'w') + outfile.write(output) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/third_party/aom/test/ans_codec_test.cc b/third_party/aom/test/ans_codec_test.cc new file mode 100644 index 000000000..a1b25fbda --- /dev/null +++ b/third_party/aom/test/ans_codec_test.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "aom_dsp/ans.h" +#include "av1/av1_dx_iface.c" + +// A note on ANS_MAX_SYMBOLS == 0: +// Fused gtest doesn't work with EXPECT_FATAL_FAILURE [1]. Just run with a +// single iteration and don't try to check the window size if we are unwindowed. +// [1] https://github.com/google/googletest/issues/356 + +namespace { + +const char kTestVideoName[] = "niklas_1280_720_30.y4m"; +const int kTestVideoFrames = 10; + +class AnsCodecTest : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<int> { + protected: + AnsCodecTest() + : EncoderTest(GET_PARAM(0)), ans_window_size_log2_(GET_PARAM(1)) {} + + virtual ~AnsCodecTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(::libaom_test::kOnePassGood); + cfg_.g_lag_in_frames = 25; + cfg_.rc_end_usage = AOM_CQ; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { +#if ANS_MAX_SYMBOLS + encoder->Control(AV1E_SET_ANS_WINDOW_SIZE_LOG2, ans_window_size_log2_); +#endif + // Try to push a high symbol count through the codec + encoder->Control(AOME_SET_CQ_LEVEL, 8); + encoder->Control(AOME_SET_CPUUSED, 2); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + encoder->Control(AV1E_SET_TILE_COLUMNS, 0); + encoder->Control(AV1E_SET_TILE_ROWS, 0); + } + } + + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) { + aom_codec_ctx_t *const av1_decoder = decoder->GetDecoder(); +#if ANS_MAX_SYMBOLS + aom_codec_alg_priv_t *const priv = + reinterpret_cast<aom_codec_alg_priv_t *>(av1_decoder->priv); + FrameWorkerData *const worker_data = + reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1); + AV1_COMMON *const common = &worker_data->pbi->common; + + EXPECT_EQ(ans_window_size_log2_, common->ans_window_size_log2); +#endif + + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + return AOM_CODEC_OK == res_dec; + } + + private: + int ans_window_size_log2_; +}; + +TEST_P(AnsCodecTest, BitstreamParms) { + testing::internal::scoped_ptr<libaom_test::VideoSource> video( + new libaom_test::Y4mVideoSource(kTestVideoName, 0, kTestVideoFrames)); + ASSERT_TRUE(video.get() != NULL); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); +} + +#if ANS_MAX_SYMBOLS +AV1_INSTANTIATE_TEST_CASE(AnsCodecTest, ::testing::Range(8, 24)); +#else +AV1_INSTANTIATE_TEST_CASE(AnsCodecTest, ::testing::Range(0, 1)); +#endif +} // namespace diff --git a/third_party/aom/test/ans_test.cc b/third_party/aom/test/ans_test.cc new file mode 100644 index 000000000..a553a9e84 --- /dev/null +++ b/third_party/aom/test/ans_test.cc @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <assert.h> +#include <math.h> +#include <stdio.h> +#include <ctime> +#include <utility> +#include <vector> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "aom_dsp/ansreader.h" +#include "aom_dsp/buf_ans.h" + +namespace { +typedef std::vector<std::pair<uint8_t, bool> > PvVec; + +const int kPrintStats = 0; +// Use a small buffer size to exercise ANS window spills or buffer growth +const int kBufAnsSize = 1 << 8; + +PvVec abs_encode_build_vals(int iters) { + PvVec ret; + libaom_test::ACMRandom gen(0x30317076); + double entropy = 0; + for (int i = 0; i < iters; ++i) { + uint8_t p; + do { + p = gen.Rand8(); + } while (p == 0); // zero is not a valid coding probability + bool b = gen.Rand8() < p; + ret.push_back(std::make_pair(static_cast<uint8_t>(p), b)); + if (kPrintStats) { + double d = p / 256.; + entropy += -d * log2(d) - (1 - d) * log2(1 - d); + } + } + if (kPrintStats) printf("entropy %f\n", entropy); + return ret; +} + +bool check_rabs(const PvVec &pv_vec, uint8_t *buf) { + BufAnsCoder a; + aom_buf_ans_alloc(&a, NULL, kBufAnsSize); + buf_ans_write_init(&a, buf); + + std::clock_t start = std::clock(); + for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) { + buf_rabs_write(&a, it->second, 256 - it->first); + } + aom_buf_ans_flush(&a); + std::clock_t enc_time = std::clock() - start; + int offset = buf_ans_write_end(&a); + aom_buf_ans_free(&a); + bool okay = true; + AnsDecoder d; +#if ANS_MAX_SYMBOLS + d.window_size = kBufAnsSize; +#endif + if (ans_read_init(&d, buf, offset)) return false; + start = std::clock(); + for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) { + okay = okay && (rabs_read(&d, 256 - it->first) != 0) == it->second; + } + std::clock_t dec_time = std::clock() - start; + if (!okay) return false; + if (kPrintStats) + printf("uABS size %d enc_time %f dec_time %f\n", offset, + static_cast<float>(enc_time) / CLOCKS_PER_SEC, + static_cast<float>(dec_time) / CLOCKS_PER_SEC); + return ans_read_end(&d) != 0; +} + +const aom_cdf_prob spareto65[] = { 8320, 6018, 4402, 3254, 4259, + 3919, 2057, 492, 45, 2 }; + +const int kRansSymbols = + static_cast<int>(sizeof(spareto65) / sizeof(spareto65[0])); + +struct rans_sym { + aom_cdf_prob prob; + aom_cdf_prob cum_prob; // not-inclusive +}; + +std::vector<int> ans_encode_build_vals(rans_sym *const tab, int iters) { + aom_cdf_prob sum = 0; + for (int i = 0; i < kRansSymbols; ++i) { + tab[i].cum_prob = sum; + tab[i].prob = spareto65[i]; + sum += spareto65[i]; + } + std::vector<int> p_to_sym; + for (int i = 0; i < kRansSymbols; ++i) { + p_to_sym.insert(p_to_sym.end(), tab[i].prob, i); + } + assert(p_to_sym.size() == RANS_PRECISION); + std::vector<int> ret; + libaom_test::ACMRandom gen(18543637); + for (int i = 0; i < iters; ++i) { + int sym = + p_to_sym[((gen.Rand8() << 8) + gen.Rand8()) & (RANS_PRECISION - 1)]; + ret.push_back(sym); + } + return ret; +} + +void rans_build_dec_tab(const struct rans_sym sym_tab[], + aom_cdf_prob *dec_tab) { + unsigned int sum = 0; + for (int i = 0; sum < RANS_PRECISION; ++i) { + dec_tab[i] = sum += sym_tab[i].prob; + } +} + +bool check_rans(const std::vector<int> &sym_vec, const rans_sym *const tab, + uint8_t *buf) { + BufAnsCoder a; + aom_buf_ans_alloc(&a, NULL, kBufAnsSize); + buf_ans_write_init(&a, buf); + aom_cdf_prob dec_tab[kRansSymbols]; + rans_build_dec_tab(tab, dec_tab); + + std::clock_t start = std::clock(); + for (std::vector<int>::const_iterator it = sym_vec.begin(); + it != sym_vec.end(); ++it) { + buf_rans_write(&a, tab[*it].cum_prob, tab[*it].prob); + } + aom_buf_ans_flush(&a); + std::clock_t enc_time = std::clock() - start; + int offset = buf_ans_write_end(&a); + aom_buf_ans_free(&a); + bool okay = true; + AnsDecoder d; +#if ANS_MAX_SYMBOLS + d.window_size = kBufAnsSize; +#endif + if (ans_read_init(&d, buf, offset)) return false; + start = std::clock(); + for (std::vector<int>::const_iterator it = sym_vec.begin(); + it != sym_vec.end(); ++it) { + okay &= rans_read(&d, dec_tab) == *it; + } + std::clock_t dec_time = std::clock() - start; + if (!okay) return false; + if (kPrintStats) + printf("rANS size %d enc_time %f dec_time %f\n", offset, + static_cast<float>(enc_time) / CLOCKS_PER_SEC, + static_cast<float>(dec_time) / CLOCKS_PER_SEC); + return ans_read_end(&d) != 0; +} + +class AbsTestFix : public ::testing::Test { + protected: + static void SetUpTestCase() { pv_vec_ = abs_encode_build_vals(kNumBools); } + virtual void SetUp() { buf_ = new uint8_t[kNumBools / 8]; } + virtual void TearDown() { delete[] buf_; } + static const int kNumBools = 100000000; + static PvVec pv_vec_; + uint8_t *buf_; +}; +PvVec AbsTestFix::pv_vec_; + +class AnsTestFix : public ::testing::Test { + protected: + static void SetUpTestCase() { + sym_vec_ = ans_encode_build_vals(rans_sym_tab_, kNumSyms); + } + virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; } + virtual void TearDown() { delete[] buf_; } + static const int kNumSyms = 25000000; + static std::vector<int> sym_vec_; + static rans_sym rans_sym_tab_[kRansSymbols]; + uint8_t *buf_; +}; +std::vector<int> AnsTestFix::sym_vec_; +rans_sym AnsTestFix::rans_sym_tab_[kRansSymbols]; + +TEST_F(AbsTestFix, Rabs) { EXPECT_TRUE(check_rabs(pv_vec_, buf_)); } +TEST_F(AnsTestFix, Rans) { + EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab_, buf_)); +} +TEST(AnsTest, FinalStateSerialization) { + for (unsigned i = L_BASE; i < L_BASE * IO_BASE; ++i) { + uint8_t buf[8]; + AnsCoder c; + ans_write_init(&c, buf); + c.state = i; + const int written_size = ans_write_end(&c); + ASSERT_LT(static_cast<size_t>(written_size), sizeof(buf)); + AnsDecoder d; +#if ANS_MAX_SYMBOLS + // There is no real data window here because no symbols are sent through + // ans (only synthetic states), so use a dummy value + d.window_size = 1024; +#endif + const int read_init_status = ans_read_init(&d, buf, written_size); + EXPECT_EQ(read_init_status, 0); + EXPECT_EQ(d.state, i); + } +} +} // namespace diff --git a/third_party/aom/test/aomcx_set_ref.sh b/third_party/aom/test/aomcx_set_ref.sh new file mode 100755 index 000000000..f51b73c58 --- /dev/null +++ b/third_party/aom/test/aomcx_set_ref.sh @@ -0,0 +1,58 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom aom_cx_set_ref example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to aom_cx_set_ref_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +aom_cx_set_ref_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi +} + +# Runs aom_cx_set_ref and updates the reference frame before encoding frame 90. +# $1 is the codec name, which aom_cx_set_ref does not support at present: It's +# currently used only to name the output file. +# TODO(tomfinegan): Pass the codec param once the example is updated to support +# AV1. +aom_set_ref() { + local encoder="${LIBAOM_BIN_PATH}/aom_cx_set_ref${AOM_TEST_EXE_SUFFIX}" + local codec="$1" + local output_file="${AOM_TEST_OUTPUT_DIR}/aom_cx_set_ref_${codec}.ivf" + local ref_frame_num=4 + local limit=10 + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ + "${ref_frame_num}" "${limit}" ${devnull} + + [ -e "${output_file}" ] || return 1 +} + +aom_cx_set_ref_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + aom_set_ref av1 || return 1 + fi +} + +aom_cx_set_ref_tests="aom_cx_set_ref_av1" + +run_tests aom_cx_set_ref_verify_environment "${aom_cx_set_ref_tests}" + diff --git a/third_party/aom/test/aomdec.sh b/third_party/aom/test/aomdec.sh new file mode 100755 index 000000000..28901ed1b --- /dev/null +++ b/third_party/aom/test/aomdec.sh @@ -0,0 +1,124 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests aomdec. To add new tests to this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to aomdec_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available. +aomdec_verify_environment() { + if [ "$(av1_encode_available)" != "yes" ] ; then + if [ ! -e "${AV1_WEBM_FILE}" ] || \ + [ ! -e "${AV1_FPM_WEBM_FILE}" ] || \ + [ ! -e "${AV1_LT_50_FRAMES_WEBM_FILE}" ] ; then + elog "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + fi + if [ -z "$(aom_tool_path aomdec)" ]; then + elog "aomdec not found. It must exist in LIBAOM_BIN_PATH or its parent." + return 1 + fi +} + +# Wrapper function for running aomdec with pipe input. Requires that +# LIBAOM_BIN_PATH points to the directory containing aomdec. $1 is used as the +# input file path and shifted away. All remaining parameters are passed through +# to aomdec. +aomdec_pipe() { + local readonly input="$1" + shift + if [ ! -e "${input}" ]; then + local file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf" + encode_yuv_raw_input_av1 "${file}" --ivf + else + local file="${input}" + fi + cat "${file}" | aomdec - "$@" ${devnull} +} + + +# Wrapper function for running aomdec. Requires that LIBAOM_BIN_PATH points to +# the directory containing aomdec. $1 one is used as the input file path and +# shifted away. All remaining parameters are passed through to aomdec. +aomdec() { + local readonly decoder="$(aom_tool_path aomdec)" + local readonly input="$1" + shift + eval "${AOM_TEST_PREFIX}" "${decoder}" "$input" "$@" ${devnull} +} + +aomdec_can_decode_av1() { + if [ "$(av1_decode_available)" = "yes" ]; then + echo yes + fi +} + +aomdec_aom_ivf_pipe_input() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + aomdec_pipe "${AOM_IVF_FILE}" --summary --noblit + fi +} + +aomdec_av1_webm() { + if [ "$(aomdec_can_decode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + if [ ! -e "${AV1_WEBM_FILE}" ]; then + local file="${AOM_TEST_OUTPUT_DIR}/test_encode.webm" + encode_yuv_raw_input_av1 "${file}" + else + aomdec "${AV1_WEBM_FILE}" --summary --noblit + fi + fi +} + +aomdec_av1_webm_frame_parallel() { + if [ "$(aomdec_can_decode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local file + if [ ! -e "${AV1_WEBM_FILE}" ]; then + file="${AOM_TEST_OUTPUT_DIR}/test_encode.webm" + encode_yuv_raw_input_av1 "${file}" "--ivf --error-resilient=1 " + else + file="${AV1_FPM_WEBM_FILE}" + fi + for threads in 2 3 4 5 6 7 8; do + aomdec "${file}" --summary --noblit --threads=$threads \ + --frame-parallel + done + fi +} + +# TODO(vigneshv): Enable or remove this test and associated code. +DISABLED_aomdec_av1_webm_less_than_50_frames() { + # ensure that reaching eof in webm_guess_framerate doesn't result in invalid + # frames in actual webm_read_frame calls. + if [ "$(aomdec_can_decode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly decoder="$(aom_tool_path aomdec)" + local readonly expected=10 + local readonly num_frames=$(${AOM_TEST_PREFIX} "${decoder}" \ + "${AV1_LT_50_FRAMES_WEBM_FILE}" --summary --noblit 2>&1 \ + | awk '/^[0-9]+ decoded frames/ { print $1 }') + if [ "$num_frames" -ne "$expected" ]; then + elog "Output frames ($num_frames) != expected ($expected)" + return 1 + fi + fi +} + +aomdec_tests="aomdec_av1_webm + aomdec_av1_webm_frame_parallel + aomdec_aom_ivf_pipe_input + DISABLED_aomdec_av1_webm_less_than_50_frames" + +run_tests aomdec_verify_environment "${aomdec_tests}" diff --git a/third_party/aom/test/aomenc.sh b/third_party/aom/test/aomenc.sh new file mode 100755 index 000000000..57a4c28a5 --- /dev/null +++ b/third_party/aom/test/aomenc.sh @@ -0,0 +1,241 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests aomenc using hantro_collage_w352h288.yuv as input. To add +## new tests to this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to aomenc_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +readonly TEST_FRAMES=5 + +# Environment check: Make sure input is available. +aomenc_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then + elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in" + elog "LIBAOM_TEST_DATA_PATH." + return 1 + fi + fi + if [ -z "$(aom_tool_path aomenc)" ]; then + elog "aomenc not found. It must exist in LIBAOM_BIN_PATH or its parent." + return 1 + fi +} + +aomenc_can_encode_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + echo yes + fi +} + +aomenc_can_encode_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + echo yes + fi +} + +# Utilities that echo aomenc input file parameters. +y4m_input_non_square_par() { + echo ""${Y4M_NOSQ_PAR_INPUT}"" +} + +y4m_input_720p() { + echo ""${Y4M_720P_INPUT}"" +} + +# Echo default aomenc real time encoding params. $1 is the codec, which defaults +# to av1 if unspecified. +aomenc_rt_params() { + local readonly codec="${1:-av1}" + echo "--codec=${codec} + --buf-initial-sz=500 + --buf-optimal-sz=600 + --buf-sz=1000 + --cpu-used=-6 + --end-usage=cbr + --error-resilient=1 + --kf-max-dist=90000 + --lag-in-frames=0 + --max-intra-rate=300 + --max-q=56 + --min-q=2 + --noise-sensitivity=0 + --overshoot-pct=50 + --passes=1 + --profile=0 + --resize-allowed=0 + --rt + --static-thresh=0 + --undershoot-pct=50" +} + +# Wrapper function for running aomenc with pipe input. Requires that +# LIBAOM_BIN_PATH points to the directory containing aomenc. $1 is used as the +# input file path and shifted away. All remaining parameters are passed through +# to aomenc. +aomenc_pipe() { + local readonly encoder="$(aom_tool_path aomenc)" + local readonly input="$1" + shift + cat "${input}" | eval "${AOM_TEST_PREFIX}" "${encoder}" - \ + --test-decode=fatal \ + "$@" ${devnull} +} + +# Wrapper function for running aomenc. Requires that LIBAOM_BIN_PATH points to +# the directory containing aomenc. $1 one is used as the input file path and +# shifted away. All remaining parameters are passed through to aomenc. +aomenc() { + local readonly encoder="$(aom_tool_path aomenc)" + local readonly input="$1" + shift + eval "${AOM_TEST_PREFIX}" "${encoder}" "${input}" \ + --test-decode=fatal \ + "$@" ${devnull} +} + +aomenc_av1_ivf() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.ivf" + aomenc $(yuv_raw_input) \ + --codec=av1 \ + --limit="${TEST_FRAMES}" \ + --ivf \ + --output="${output}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_webm() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.webm" + aomenc $(yuv_raw_input) \ + --codec=av1 \ + --limit="${TEST_FRAMES}" \ + --output="${output}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_webm_2pass() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.webm" + aomenc $(yuv_raw_input) \ + --codec=av1 \ + --limit="${TEST_FRAMES}" \ + --output="${output}" \ + --passes=2 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_ivf_lossless() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lossless.ivf" + aomenc $(yuv_raw_input) \ + --codec=av1 \ + --limit="${TEST_FRAMES}" \ + --ivf \ + --output="${output}" \ + --lossless=1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_ivf_minq0_maxq0() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lossless_minq0_maxq0.ivf" + aomenc $(yuv_raw_input) \ + --codec=av1 \ + --limit="${TEST_FRAMES}" \ + --ivf \ + --output="${output}" \ + --min-q=0 \ + --max-q=0 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_webm_lag5_frames10() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly lag_total_frames=10 + local readonly lag_frames=5 + local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lag5_frames10.webm" + aomenc $(yuv_raw_input) \ + --codec=av1 \ + --limit="${lag_total_frames}" \ + --lag-in-frames="${lag_frames}" \ + --output="${output}" \ + --passes=2 \ + --auto-alt-ref=1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +# TODO(fgalligan): Test that DisplayWidth is different than video width. +aomenc_av1_webm_non_square_par() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_non_square_par.webm" + aomenc $(y4m_input_non_square_par) \ + --codec=av1 \ + --limit="${TEST_FRAMES}" \ + --output="${output}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_tests="aomenc_av1_ivf + aomenc_av1_webm + aomenc_av1_webm_2pass + aomenc_av1_ivf_lossless + aomenc_av1_ivf_minq0_maxq0 + aomenc_av1_webm_lag5_frames10 + aomenc_av1_webm_non_square_par" + +run_tests aomenc_verify_environment "${aomenc_tests}" diff --git a/third_party/aom/test/aq_segment_test.cc b/third_party/aom/test/aq_segment_test.cc new file mode 100644 index 000000000..5dc93ec79 --- /dev/null +++ b/third_party/aom/test/aq_segment_test.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "./aom_config.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class AqSegmentTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + AqSegmentTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~AqSegmentTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + aq_mode_ = 0; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_AQ_MODE, aq_mode_); +#if CONFIG_EXT_DELTA_Q + encoder->Control(AV1E_SET_DELTAQ_MODE, deltaq_mode_); +#endif + encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100); + } + } + + void DoTest(int aq_mode) { + aq_mode_ = aq_mode; +#if CONFIG_EXT_DELTA_Q + deltaq_mode_ = 0; +#endif + cfg_.kf_max_dist = 12; + cfg_.rc_min_quantizer = 8; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 6; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_target_bitrate = 300; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 15); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + int set_cpu_used_; + int aq_mode_; +#if CONFIG_EXT_DELTA_Q + int deltaq_mode_; +#endif +}; + +// Validate that this AQ segmentation mode (AQ=1, variance_ap) +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchAQ1) { DoTest(1); } + +// Validate that this AQ segmentation mode (AQ=2, complexity_aq) +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchAQ2) { DoTest(2); } + +// Validate that this AQ segmentation mode (AQ=3, cyclic_refresh_aq) +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchAQ3) { DoTest(3); } + +class AqSegmentTestLarge : public AqSegmentTest {}; + +TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ1) { DoTest(1); } + +TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ2) { DoTest(2); } + +TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ3) { DoTest(3); } + +#if CONFIG_DELTA_Q & !CONFIG_EXT_DELTA_Q +// Validate that this AQ mode (AQ=4, delta q) +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchAQ4) { + cfg_.rc_end_usage = AOM_CQ; + aq_mode_ = 4; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 100); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +#endif + +#if CONFIG_EXT_DELTA_Q +// Validate that this delta q mode +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchExtDeltaQ) { + cfg_.rc_end_usage = AOM_CQ; + aq_mode_ = 0; + deltaq_mode_ = 2; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 100); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +#endif + +AV1_INSTANTIATE_TEST_CASE(AqSegmentTest, + ::testing::Values(::libaom_test::kRealTime, + ::libaom_test::kOnePassGood), + ::testing::Range(5, 9)); +AV1_INSTANTIATE_TEST_CASE(AqSegmentTestLarge, + ::testing::Values(::libaom_test::kRealTime, + ::libaom_test::kOnePassGood), + ::testing::Range(3, 5)); +} // namespace diff --git a/third_party/aom/test/arf_freq_test.cc b/third_party/aom/test/arf_freq_test.cc new file mode 100644 index 000000000..bef58b3e8 --- /dev/null +++ b/third_party/aom/test/arf_freq_test.cc @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" +#include "av1/encoder/ratectrl.h" + +namespace { + +const unsigned int kFrames = 100; +const int kBitrate = 500; + +#define ARF_NOT_SEEN 1000001 +#define ARF_SEEN_ONCE 1000000 + +typedef struct { + const char *filename; + unsigned int width; + unsigned int height; + unsigned int framerate_num; + unsigned int framerate_den; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +typedef struct { + libaom_test::TestMode mode; + int cpu_used; +} TestEncodeParam; + +const TestVideoParam kTestVectors[] = { + // artificially increase framerate to trigger default check + { "hantro_collage_w352h288.yuv", 352, 288, 5000, 1, 8, AOM_IMG_FMT_I420, + AOM_BITS_8, 0 }, + { "hantro_collage_w352h288.yuv", 352, 288, 30, 1, 8, AOM_IMG_FMT_I420, + AOM_BITS_8, 0 }, + { "rush_hour_444.y4m", 352, 288, 30, 1, 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 }, +#if CONFIG_HIGHBITDEPTH +// Add list of profile 2/3 test videos here ... +#endif // CONFIG_HIGHBITDEPTH +}; + +const TestEncodeParam kEncodeVectors[] = { + { ::libaom_test::kOnePassGood, 2 }, { ::libaom_test::kOnePassGood, 5 }, + { ::libaom_test::kTwoPassGood, 1 }, { ::libaom_test::kTwoPassGood, 2 }, + { ::libaom_test::kTwoPassGood, 5 }, { ::libaom_test::kRealTime, 5 }, +}; + +const int kMinArfVectors[] = { + // NOTE: 0 refers to the default built-in logic in: + // av1_rc_get_default_min_gf_interval(...) + 0, 4, 8, 12, 15 +}; + +int is_extension_y4m(const char *filename) { + const char *dot = strrchr(filename, '.'); + if (!dot || dot == filename) + return 0; + else + return !strcmp(dot, ".y4m"); +} + +class ArfFreqTestLarge + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith3Params<TestVideoParam, + TestEncodeParam, int> { + protected: + ArfFreqTestLarge() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + test_encode_param_(GET_PARAM(2)), min_arf_requested_(GET_PARAM(3)) {} + + virtual ~ArfFreqTestLarge() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(test_encode_param_.mode); + if (test_encode_param_.mode != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 25; + cfg_.rc_end_usage = AOM_VBR; + } else { + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + } + dec_cfg_.threads = 4; + } + + virtual void BeginPassHook(unsigned int) { + min_run_ = ARF_NOT_SEEN; + run_of_visible_frames_ = 0; + } + + int GetNumFramesInPkt(const aom_codec_cx_pkt_t *pkt) { + const uint8_t *buffer = reinterpret_cast<uint8_t *>(pkt->data.frame.buf); + const uint8_t marker = buffer[pkt->data.frame.sz - 1]; + const int mag = ((marker >> 3) & 3) + 1; + int frames = (marker & 0x7) + 1; + const unsigned int index_sz = 2 + mag * frames; + // Check for superframe or not. + // Assume superframe has only one visible frame, the rest being + // invisible. If superframe index is not found, then there is only + // one frame. + if (!((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz && + buffer[pkt->data.frame.sz - index_sz] == marker)) { + frames = 1; + } + return frames; + } + + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return; + const int frames = GetNumFramesInPkt(pkt); + if (frames == 1) { + run_of_visible_frames_++; + } else if (frames == 2) { + if (min_run_ == ARF_NOT_SEEN) { + min_run_ = ARF_SEEN_ONCE; + } else if (min_run_ == ARF_SEEN_ONCE || + run_of_visible_frames_ < min_run_) { + min_run_ = run_of_visible_frames_; + } + run_of_visible_frames_ = 1; + } else { + min_run_ = 0; + run_of_visible_frames_ = 1; + } + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + encoder->Control(AOME_SET_CPUUSED, test_encode_param_.cpu_used); + encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_arf_requested_); + if (test_encode_param_.mode != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + } + + int GetMinVisibleRun() const { return min_run_; } + + int GetMinArfDistanceRequested() const { + if (min_arf_requested_) + return min_arf_requested_; + else + return av1_rc_get_default_min_gf_interval( + test_video_param_.width, test_video_param_.height, + (double)test_video_param_.framerate_num / + test_video_param_.framerate_den); + } + + TestVideoParam test_video_param_; + TestEncodeParam test_encode_param_; + + private: + int min_arf_requested_; + int min_run_; + int run_of_visible_frames_; +}; + +TEST_P(ArfFreqTestLarge, MinArfFreqTest) { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + testing::internal::scoped_ptr<libaom_test::VideoSource> video; + if (is_extension_y4m(test_video_param_.filename)) { + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + } else { + video.reset(new libaom_test::YUVVideoSource( + test_video_param_.filename, test_video_param_.fmt, + test_video_param_.width, test_video_param_.height, + test_video_param_.framerate_num, test_video_param_.framerate_den, 0, + kFrames)); + } + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const int min_run = GetMinVisibleRun(); + const int min_arf_dist_requested = GetMinArfDistanceRequested(); + if (min_run != ARF_NOT_SEEN && min_run != ARF_SEEN_ONCE) { + const int min_arf_dist = min_run + 1; + EXPECT_GE(min_arf_dist, min_arf_dist_requested); + } +} + +#if CONFIG_HIGHBITDEPTH || CONFIG_EXT_REFS +#if CONFIG_AV1_ENCODER +// TODO(angiebird): 25-29 fail in high bitdepth mode. +// TODO(zoeliu): This ArfFreqTest does not work with BWDREF_FRAME, as +// BWDREF_FRAME is also a non-show frame, and the minimum run between two +// consecutive BWDREF_FRAME's may vary between 1 and any arbitrary positive +// number as long as it does not exceed the gf_group interval. +INSTANTIATE_TEST_CASE_P( + DISABLED_AV1, ArfFreqTestLarge, + ::testing::Combine( + ::testing::Values( + static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)), + ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), + ::testing::ValuesIn(kMinArfVectors))); +#endif // CONFIG_AV1_ENCODER +#else +AV1_INSTANTIATE_TEST_CASE(ArfFreqTestLarge, ::testing::ValuesIn(kTestVectors), + ::testing::ValuesIn(kEncodeVectors), + ::testing::ValuesIn(kMinArfVectors)); +#endif // CONFIG_HIGHBITDEPTH || CONFIG_EXT_REFS +} // namespace diff --git a/third_party/aom/test/av1_convolve_optimz_test.cc b/third_party/aom/test/av1_convolve_optimz_test.cc new file mode 100644 index 000000000..fd0f6dbce --- /dev/null +++ b/third_party/aom/test/av1_convolve_optimz_test.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { + +using std::tr1::tuple; +using libaom_test::ACMRandom; + +typedef void (*ConvInit)(); +typedef void (*conv_filter_t)(const uint8_t *, int, uint8_t *, int, int, int, + const InterpFilterParams, int, int, + ConvolveParams *); +#if CONFIG_HIGHBITDEPTH +typedef void (*hbd_conv_filter_t)(const uint16_t *, int, uint16_t *, int, int, + int, const InterpFilterParams, int, int, int, + int); +#endif + +// Test parameter list: +// <convolve_horiz_func, convolve_vert_func, +// <width, height>, filter_params, subpel_x_q4, avg> +typedef tuple<int, int> BlockDimension; +typedef tuple<ConvInit, conv_filter_t, conv_filter_t, BlockDimension, + InterpFilter, int, int> + ConvParams; +#if CONFIG_HIGHBITDEPTH +// Test parameter list: +// <convolve_horiz_func, convolve_vert_func, +// <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth> +typedef tuple<ConvInit, hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension, + InterpFilter, int, int, int> + HbdConvParams; +#endif + +// Note: +// src_ and src_ref_ have special boundary requirement +// dst_ and dst_ref_ don't +const size_t maxWidth = 256; +const size_t maxHeight = 256; +const size_t maxBlockSize = maxWidth * maxHeight; +const int horizOffset = 32; +const int vertiOffset = 32; +const int stride = 128; +const int x_step_q4 = 16; + +class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> { + public: + virtual ~AV1ConvolveOptimzTest() {} + virtual void SetUp() { + ConvInit conv_init = GET_PARAM(0); + conv_init(); + conv_horiz_ = GET_PARAM(1); + conv_vert_ = GET_PARAM(2); + BlockDimension block = GET_PARAM(3); + width_ = std::tr1::get<0>(block); + height_ = std::tr1::get<1>(block); + filter_ = GET_PARAM(4); + subpel_ = GET_PARAM(5); + int ref = GET_PARAM(6); + const int plane = 0; + conv_params_ = get_conv_params(ref, plane); + + alloc_ = new uint8_t[maxBlockSize * 4]; + src_ = alloc_ + (vertiOffset * maxWidth); + src_ += horizOffset; + src_ref_ = src_ + maxBlockSize; + + dst_ = alloc_ + 2 * maxBlockSize; + dst_ref_ = alloc_ + 3 * maxBlockSize; + } + + virtual void TearDown() { + delete[] alloc_; + libaom_test::ClearSystemState(); + } + + protected: + void RunHorizFilterBitExactCheck(); + void RunVertFilterBitExactCheck(); + + private: + void PrepFilterBuffer(); + void DiffFilterBuffer(); + conv_filter_t conv_horiz_; + conv_filter_t conv_vert_; + uint8_t *alloc_; + uint8_t *src_; + uint8_t *dst_; + uint8_t *src_ref_; + uint8_t *dst_ref_; + int width_; + int height_; + InterpFilter filter_; + int subpel_; + ConvolveParams conv_params_; +}; + +void AV1ConvolveOptimzTest::PrepFilterBuffer() { + int r, c; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0])); + + uint8_t *src_ptr = src_; + uint8_t *dst_ptr = dst_; + uint8_t *src_ref_ptr = src_ref_; + uint8_t *dst_ref_ptr = dst_ref_; + + for (r = 0; r < height_; ++r) { + for (c = 0; c < width_; ++c) { + src_ptr[c] = rnd.Rand8(); + src_ref_ptr[c] = src_ptr[c]; + dst_ptr[c] = rnd.Rand8(); + dst_ref_ptr[c] = dst_ptr[c]; + } + src_ptr += stride; + src_ref_ptr += stride; + dst_ptr += stride; + dst_ref_ptr += stride; + } +} + +void AV1ConvolveOptimzTest::DiffFilterBuffer() { + int r, c; + const uint8_t *dst_ptr = dst_; + const uint8_t *dst_ref_ptr = dst_ref_; + for (r = 0; r < height_; ++r) { + for (c = 0; c < width_; ++c) { + EXPECT_EQ((uint8_t)dst_ref_ptr[c], (uint8_t)dst_ptr[c]) + << "Error at row: " << r << " col: " << c << " " + << "w = " << width_ << " " + << "h = " << height_ << " " + << "filter group index = " << filter_ << " " + << "filter index = " << subpel_; + } + dst_ptr += stride; + dst_ref_ptr += stride; + } +} + +void AV1ConvolveOptimzTest::RunHorizFilterBitExactCheck() { + PrepFilterBuffer(); + + InterpFilterParams filter_params = av1_get_interp_filter_params(filter_); + + av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_, height_, + filter_params, subpel_, x_step_q4, &conv_params_); + + conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params, + subpel_, x_step_q4, &conv_params_); + + DiffFilterBuffer(); + + // Note: + // Here we need calculate a height which is different from the specified one + // and test again. + int intermediate_height = + (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps; + PrepFilterBuffer(); + + av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_, + intermediate_height, filter_params, subpel_, x_step_q4, + &conv_params_); + + conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height, + filter_params, subpel_, x_step_q4, &conv_params_); + + DiffFilterBuffer(); +} + +void AV1ConvolveOptimzTest::RunVertFilterBitExactCheck() { + PrepFilterBuffer(); + + InterpFilterParams filter_params = av1_get_interp_filter_params(filter_); + + av1_convolve_vert_c(src_ref_, stride, dst_ref_, stride, width_, height_, + filter_params, subpel_, x_step_q4, &conv_params_); + + conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params, + subpel_, x_step_q4, &conv_params_); + + DiffFilterBuffer(); +} + +TEST_P(AV1ConvolveOptimzTest, HorizBitExactCheck) { + RunHorizFilterBitExactCheck(); +} +TEST_P(AV1ConvolveOptimzTest, VerticalBitExactCheck) { + RunVertFilterBitExactCheck(); +} + +using std::tr1::make_tuple; + +#if (HAVE_SSSE3 || HAVE_SSE4_1) && CONFIG_DUAL_FILTER +const BlockDimension kBlockDim[] = { + make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4), + make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8), + make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16), + make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32), + make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64), + make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128), +}; + +// 10/12-tap filters +const InterpFilter kFilter[] = { FILTER_REGULAR_UV, BILINEAR, MULTITAP_SHARP }; + +const int kSubpelQ4[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + +const int kAvg[] = { 0, 1 }; +#endif + +#if HAVE_SSSE3 && CONFIG_DUAL_FILTER +INSTANTIATE_TEST_CASE_P( + SSSE3, AV1ConvolveOptimzTest, + ::testing::Combine(::testing::Values(av1_lowbd_convolve_init_ssse3), + ::testing::Values(av1_convolve_horiz_ssse3), + ::testing::Values(av1_convolve_vert_ssse3), + ::testing::ValuesIn(kBlockDim), + ::testing::ValuesIn(kFilter), + ::testing::ValuesIn(kSubpelQ4), + ::testing::ValuesIn(kAvg))); +#endif // HAVE_SSSE3 && CONFIG_DUAL_FILTER + +#if CONFIG_HIGHBITDEPTH +typedef ::testing::TestWithParam<HbdConvParams> TestWithHbdConvParams; +class AV1HbdConvolveOptimzTest : public TestWithHbdConvParams { + public: + virtual ~AV1HbdConvolveOptimzTest() {} + virtual void SetUp() { + ConvInit conv_init = GET_PARAM(0); + conv_init(); + conv_horiz_ = GET_PARAM(1); + conv_vert_ = GET_PARAM(2); + BlockDimension block = GET_PARAM(3); + width_ = std::tr1::get<0>(block); + height_ = std::tr1::get<1>(block); + filter_ = GET_PARAM(4); + subpel_ = GET_PARAM(5); + avg_ = GET_PARAM(6); + bit_depth_ = GET_PARAM(7); + + alloc_ = new uint16_t[maxBlockSize * 4]; + src_ = alloc_ + (vertiOffset * maxWidth); + src_ += horizOffset; + src_ref_ = src_ + maxBlockSize; + + dst_ = alloc_ + 2 * maxBlockSize; + dst_ref_ = alloc_ + 3 * maxBlockSize; + } + + virtual void TearDown() { + delete[] alloc_; + libaom_test::ClearSystemState(); + } + + protected: + void RunHorizFilterBitExactCheck(); + void RunVertFilterBitExactCheck(); + + private: + void PrepFilterBuffer(); + void DiffFilterBuffer(); + hbd_conv_filter_t conv_horiz_; + hbd_conv_filter_t conv_vert_; + uint16_t *alloc_; + uint16_t *src_; + uint16_t *dst_; + uint16_t *src_ref_; + uint16_t *dst_ref_; + int width_; + int height_; + InterpFilter filter_; + int subpel_; + int avg_; + int bit_depth_; +}; + +void AV1HbdConvolveOptimzTest::PrepFilterBuffer() { + int r, c; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0])); + + uint16_t *src_ptr = src_; + uint16_t *dst_ptr = dst_; + uint16_t *dst_ref_ptr = dst_ref_; + uint16_t hbd_mask = (1 << bit_depth_) - 1; + + for (r = 0; r < height_; ++r) { + for (c = 0; c < width_; ++c) { + src_ptr[c] = rnd.Rand16() & hbd_mask; + dst_ptr[c] = rnd.Rand16() & hbd_mask; + dst_ref_ptr[c] = dst_ptr[c]; + } + src_ptr += stride; + dst_ptr += stride; + dst_ref_ptr += stride; + } +} + +void AV1HbdConvolveOptimzTest::DiffFilterBuffer() { + int r, c; + const uint16_t *dst_ptr = dst_; + const uint16_t *dst_ref_ptr = dst_ref_; + for (r = 0; r < height_; ++r) { + for (c = 0; c < width_; ++c) { + EXPECT_EQ((uint16_t)dst_ref_ptr[c], (uint16_t)dst_ptr[c]) + << "Error at row: " << r << " col: " << c << " " + << "w = " << width_ << " " + << "h = " << height_ << " " + << "filter group index = " << filter_ << " " + << "filter index = " << subpel_ << " " + << "bit depth = " << bit_depth_; + } + dst_ptr += stride; + dst_ref_ptr += stride; + } +} + +void AV1HbdConvolveOptimzTest::RunHorizFilterBitExactCheck() { + PrepFilterBuffer(); + + InterpFilterParams filter_params = av1_get_interp_filter_params(filter_); + + av1_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_, height_, + filter_params, subpel_, x_step_q4, avg_, + bit_depth_); + + conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params, + subpel_, x_step_q4, avg_, bit_depth_); + + DiffFilterBuffer(); + + // Note: + // Here we need calculate a height which is different from the specified one + // and test again. + int intermediate_height = + (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps; + PrepFilterBuffer(); + + av1_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_, + intermediate_height, filter_params, subpel_, + x_step_q4, avg_, bit_depth_); + + conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height, + filter_params, subpel_, x_step_q4, avg_, bit_depth_); + + DiffFilterBuffer(); +} + +void AV1HbdConvolveOptimzTest::RunVertFilterBitExactCheck() { + PrepFilterBuffer(); + + InterpFilterParams filter_params = av1_get_interp_filter_params(filter_); + + av1_highbd_convolve_vert_c(src_, stride, dst_ref_, stride, width_, height_, + filter_params, subpel_, x_step_q4, avg_, + bit_depth_); + + conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params, + subpel_, x_step_q4, avg_, bit_depth_); + + DiffFilterBuffer(); +} + +TEST_P(AV1HbdConvolveOptimzTest, HorizBitExactCheck) { + RunHorizFilterBitExactCheck(); +} +TEST_P(AV1HbdConvolveOptimzTest, VertBitExactCheck) { + RunVertFilterBitExactCheck(); +} + +#if HAVE_SSE4_1 && CONFIG_DUAL_FILTER + +const int kBitdepth[] = { 10, 12 }; + +INSTANTIATE_TEST_CASE_P( + SSE4_1, AV1HbdConvolveOptimzTest, + ::testing::Combine(::testing::Values(av1_highbd_convolve_init_sse4_1), + ::testing::Values(av1_highbd_convolve_horiz_sse4_1), + ::testing::Values(av1_highbd_convolve_vert_sse4_1), + ::testing::ValuesIn(kBlockDim), + ::testing::ValuesIn(kFilter), + ::testing::ValuesIn(kSubpelQ4), + ::testing::ValuesIn(kAvg), + ::testing::ValuesIn(kBitdepth))); +#endif // HAVE_SSE4_1 && CONFIG_DUAL_FILTER +#endif // CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/av1_convolve_test.cc b/third_party/aom/test/av1_convolve_test.cc new file mode 100644 index 000000000..02ac8e7bb --- /dev/null +++ b/third_party/aom/test/av1_convolve_test.cc @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <algorithm> +#include <vector> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" +#include "aom_dsp/aom_dsp_common.h" +#include "aom_ports/mem.h" +#include "av1/common/filter.h" +#include "av1/common/convolve.h" +#include "test/acm_random.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { +using std::tr1::tuple; +static void filter_block1d_horiz_c(const uint8_t *src_ptr, int src_stride, + const int16_t *filter, int tap, + uint8_t *dst_ptr, int dst_stride, int w, + int h) { + src_ptr -= tap / 2 - 1; + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + int sum = 0; + for (int i = 0; i < tap; ++i) { + sum += src_ptr[c + i] * filter[i]; + } + dst_ptr[c] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); + } + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static void filter_block1d_vert_c(const uint8_t *src_ptr, int src_stride, + const int16_t *filter, int tap, + uint8_t *dst_ptr, int dst_stride, int w, + int h) { + src_ptr -= (tap / 2 - 1) * src_stride; + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + int sum = 0; + for (int i = 0; i < tap; ++i) { + sum += src_ptr[c + i * src_stride] * filter[i]; + } + dst_ptr[c] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); + } + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static int match(const uint8_t *out, int out_stride, const uint8_t *ref_out, + int ref_out_stride, int w, int h) { + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + if (out[r * out_stride + c] != ref_out[r * ref_out_stride + c]) return 0; + } + } + return 1; +} + +typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst, + int dst_stride, int w, int h, + const InterpFilterParams filter_params, + const int subpel_q4, int step_q4, + ConvolveParams *conv_params); + +struct ConvolveFunctions { + ConvolveFunctions(ConvolveFunc hf, ConvolveFunc vf) : hf_(hf), vf_(vf) {} + ConvolveFunc hf_; + ConvolveFunc vf_; +}; + +typedef tuple<ConvolveFunctions *, InterpFilter /*filter_x*/, + InterpFilter /*filter_y*/> + ConvolveParam; + +class Av1ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { + public: + virtual void SetUp() { + rnd_(ACMRandom::DeterministicSeed()); + cfs_ = GET_PARAM(0); + interp_filter_ls_[0] = GET_PARAM(2); + interp_filter_ls_[2] = interp_filter_ls_[0]; + interp_filter_ls_[1] = GET_PARAM(1); + interp_filter_ls_[3] = interp_filter_ls_[1]; + } + virtual void TearDown() { + while (buf_ls_.size() > 0) { + uint8_t *buf = buf_ls_.back(); + aom_free(buf); + buf_ls_.pop_back(); + } + } + virtual uint8_t *add_input(int w, int h, int *stride) { + uint8_t *buf = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, kBufferSize)); + buf_ls_.push_back(buf); + *stride = w + MAX_FILTER_TAP - 1; + int offset = MAX_FILTER_TAP / 2 - 1; + for (int r = 0; r < h + MAX_FILTER_TAP - 1; ++r) { + for (int c = 0; c < w + MAX_FILTER_TAP - 1; ++c) { + buf[r * (*stride) + c] = rnd_.Rand8(); + } + } + return buf + offset * (*stride) + offset; + } + virtual uint8_t *add_output(int w, int /*h*/, int *stride) { + uint8_t *buf = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, kBufferSize)); + buf_ls_.push_back(buf); + *stride = w; + return buf; + } + virtual void random_init_buf(uint8_t *buf, int w, int h, int stride) { + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + buf[r * stride + c] = rnd_.Rand8(); + } + } + } + + protected: + static const int kDataAlignment = 16; + static const int kOuterBlockSize = MAX_SB_SIZE + MAX_FILTER_TAP - 1; + static const int kBufferSize = kOuterBlockSize * kOuterBlockSize; + std::vector<uint8_t *> buf_ls_; + InterpFilter interp_filter_ls_[4]; + ConvolveFunctions *cfs_; + ACMRandom rnd_; +}; + +int bsize_ls[] = { 1, 2, 4, 8, 16, 32, 64, 3, 7, 15, 31, 63 }; +int bsize_num = sizeof(bsize_ls) / sizeof(bsize_ls[0]); + +TEST_P(Av1ConvolveTest, av1_convolve_vert) { + const int y_step_q4 = 16; + ConvolveParams conv_params = get_conv_params(0, 0); + + int in_stride, out_stride, ref_out_stride, avg_out_stride, ref_avg_out_stride; + uint8_t *in = add_input(MAX_SB_SIZE, MAX_SB_SIZE, &in_stride); + uint8_t *out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &out_stride); + uint8_t *ref_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_out_stride); + uint8_t *avg_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &avg_out_stride); + uint8_t *ref_avg_out = + add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_avg_out_stride); + for (int hb_idx = 0; hb_idx < bsize_num; ++hb_idx) { + for (int vb_idx = 0; vb_idx < bsize_num; ++vb_idx) { + int w = bsize_ls[hb_idx]; + int h = bsize_ls[vb_idx]; + for (int subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; ++subpel_y_q4) { + InterpFilter filter_y = interp_filter_ls_[0]; + InterpFilterParams param_vert = av1_get_interp_filter_params(filter_y); + const int16_t *filter_vert = + av1_get_interp_filter_subpel_kernel(param_vert, subpel_y_q4); + + filter_block1d_vert_c(in, in_stride, filter_vert, param_vert.taps, + ref_out, ref_out_stride, w, h); + + conv_params.ref = 0; + cfs_->vf_(in, in_stride, out, out_stride, w, h, param_vert, subpel_y_q4, + y_step_q4, &conv_params); + EXPECT_EQ(match(out, out_stride, ref_out, ref_out_stride, w, h), 1) + << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_y " + << filter_y << " subpel_y_q4 " << subpel_y_q4; + + random_init_buf(avg_out, w, h, avg_out_stride); + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + ref_avg_out[r * ref_avg_out_stride + c] = ROUND_POWER_OF_TWO( + avg_out[r * avg_out_stride + c] + out[r * out_stride + c], 1); + } + } + conv_params.ref = 1; + cfs_->vf_(in, in_stride, avg_out, avg_out_stride, w, h, param_vert, + subpel_y_q4, y_step_q4, &conv_params); + EXPECT_EQ(match(avg_out, avg_out_stride, ref_avg_out, + ref_avg_out_stride, w, h), + 1) + << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_y " + << filter_y << " subpel_y_q4 " << subpel_y_q4; + } + } + } +}; + +TEST_P(Av1ConvolveTest, av1_convolve_horiz) { + const int x_step_q4 = 16; + ConvolveParams conv_params = get_conv_params(0, 0); + + int in_stride, out_stride, ref_out_stride, avg_out_stride, ref_avg_out_stride; + uint8_t *in = add_input(MAX_SB_SIZE, MAX_SB_SIZE, &in_stride); + uint8_t *out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &out_stride); + uint8_t *ref_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_out_stride); + uint8_t *avg_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &avg_out_stride); + uint8_t *ref_avg_out = + add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_avg_out_stride); + for (int hb_idx = 0; hb_idx < bsize_num; ++hb_idx) { + for (int vb_idx = 0; vb_idx < bsize_num; ++vb_idx) { + int w = bsize_ls[hb_idx]; + int h = bsize_ls[vb_idx]; + for (int subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; ++subpel_x_q4) { + InterpFilter filter_x = interp_filter_ls_[1]; + InterpFilterParams param_horiz = av1_get_interp_filter_params(filter_x); + const int16_t *filter_horiz = + av1_get_interp_filter_subpel_kernel(param_horiz, subpel_x_q4); + + filter_block1d_horiz_c(in, in_stride, filter_horiz, param_horiz.taps, + ref_out, ref_out_stride, w, h); + + conv_params.ref = 0; + cfs_->hf_(in, in_stride, out, out_stride, w, h, param_horiz, + subpel_x_q4, x_step_q4, &conv_params); + EXPECT_EQ(match(out, out_stride, ref_out, ref_out_stride, w, h), 1) + << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_x " + << filter_x << " subpel_x_q4 " << subpel_x_q4; + + random_init_buf(avg_out, w, h, avg_out_stride); + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + ref_avg_out[r * ref_avg_out_stride + c] = ROUND_POWER_OF_TWO( + avg_out[r * avg_out_stride + c] + out[r * out_stride + c], 1); + } + } + conv_params.ref = 1; + cfs_->hf_(in, in_stride, avg_out, avg_out_stride, w, h, param_horiz, + subpel_x_q4, x_step_q4, &conv_params); + EXPECT_EQ(match(avg_out, avg_out_stride, ref_avg_out, + ref_avg_out_stride, w, h), + 1) + << "hb_idx " << hb_idx << "vb_idx" << vb_idx << " filter_x " + << filter_x << "subpel_x_q4 " << subpel_x_q4; + } + } + } +}; + +ConvolveFunctions convolve_functions_c(av1_convolve_horiz_c, + av1_convolve_vert_c); + +InterpFilter filter_ls[] = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH, + MULTITAP_SHARP }; + +INSTANTIATE_TEST_CASE_P( + C, Av1ConvolveTest, + ::testing::Combine(::testing::Values(&convolve_functions_c), + ::testing::ValuesIn(filter_ls), + ::testing::ValuesIn(filter_ls))); + +#if CONFIG_HIGHBITDEPTH +TEST(AV1ConvolveTest, av1_highbd_convolve) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); +#if CONFIG_DUAL_FILTER + InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR, + EIGHTTAP_REGULAR, EIGHTTAP_REGULAR }; + InterpFilterParams filter_params = + av1_get_interp_filter_params(interp_filter[0]); +#else + InterpFilter interp_filter = EIGHTTAP_REGULAR; + InterpFilterParams filter_params = + av1_get_interp_filter_params(interp_filter); +#endif + int filter_size = filter_params.taps; + int filter_center = filter_size / 2 - 1; + uint16_t src[12 * 12]; + int src_stride = filter_size; + uint16_t dst[1] = { 0 }; + int dst_stride = 1; + int x_step_q4 = 16; + int y_step_q4 = 16; + int avg = 0; + int bd = 10; + int w = 1; + int h = 1; + + int subpel_x_q4; + int subpel_y_q4; + + for (int i = 0; i < filter_size * filter_size; i++) { + src[i] = rnd.Rand16() % (1 << bd); + } + + for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) { + for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) { + av1_highbd_convolve( + CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center), + src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filter, + subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd); + + const int16_t *x_filter = + av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4); + const int16_t *y_filter = + av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4); + + int temp[12]; + int dst_ref = 0; + for (int r = 0; r < filter_size; r++) { + temp[r] = 0; + for (int c = 0; c < filter_size; c++) { + temp[r] += x_filter[c] * src[r * filter_size + c]; + } + temp[r] = + clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd); + dst_ref += temp[r] * y_filter[r]; + } + dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd); + EXPECT_EQ(dst[0], dst_ref); + } + } +} + +TEST(AV1ConvolveTest, av1_highbd_convolve_avg) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); +#if CONFIG_DUAL_FILTER + InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR, + EIGHTTAP_REGULAR, EIGHTTAP_REGULAR }; + InterpFilterParams filter_params = + av1_get_interp_filter_params(interp_filter[0]); +#else + InterpFilter interp_filter = EIGHTTAP_REGULAR; + InterpFilterParams filter_params = + av1_get_interp_filter_params(interp_filter); +#endif + int filter_size = filter_params.taps; + int filter_center = filter_size / 2 - 1; + uint16_t src0[12 * 12]; + uint16_t src1[12 * 12]; + int src_stride = filter_size; + uint16_t dst0[1] = { 0 }; + uint16_t dst1[1] = { 0 }; + uint16_t dst[1] = { 0 }; + int dst_stride = 1; + int x_step_q4 = 16; + int y_step_q4 = 16; + int avg = 0; + int bd = 10; + + int w = 1; + int h = 1; + + int subpel_x_q4; + int subpel_y_q4; + + for (int i = 0; i < filter_size * filter_size; i++) { + src0[i] = rnd.Rand16() % (1 << bd); + src1[i] = rnd.Rand16() % (1 << bd); + } + + for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) { + for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) { + int offset = filter_size * filter_center + filter_center; + + avg = 0; + av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride, + CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h, + interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4, + y_step_q4, avg, bd); + avg = 0; + av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride, + CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h, + interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4, + y_step_q4, avg, bd); + + avg = 0; + av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride, + CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, + interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4, + y_step_q4, avg, bd); + avg = 1; + av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride, + CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, + interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4, + y_step_q4, avg, bd); + + EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1)); + } + } +} +#endif // CONFIG_HIGHBITDEPTH + +#define CONVOLVE_SPEED_TEST 0 +#if CONVOLVE_SPEED_TEST +#define highbd_convolve_speed(func, block_size, frame_size) \ + TEST(AV1ConvolveTest, func##_speed_##block_size##_##frame_size) { \ + ACMRandom rnd(ACMRandom::DeterministicSeed()); \ + InterpFilter interp_filter = EIGHTTAP; \ + InterpFilterParams filter_params = \ + av1_get_interp_filter_params(interp_filter); \ + int filter_size = filter_params.tap; \ + int filter_center = filter_size / 2 - 1; \ + DECLARE_ALIGNED(16, uint16_t, \ + src[(frame_size + 7) * (frame_size + 7)]) = { 0 }; \ + int src_stride = frame_size + 7; \ + DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = { 0 }; \ + int dst_stride = frame_size; \ + int x_step_q4 = 16; \ + int y_step_q4 = 16; \ + int subpel_x_q4 = 8; \ + int subpel_y_q4 = 6; \ + int bd = 10; \ + \ + int w = block_size; \ + int h = block_size; \ + \ + const int16_t *filter_x = \ + av1_get_interp_filter_kernel(filter_params, subpel_x_q4); \ + const int16_t *filter_y = \ + av1_get_interp_filter_kernel(filter_params, subpel_y_q4); \ + \ + for (int i = 0; i < src_stride * src_stride; i++) { \ + src[i] = rnd.Rand16() % (1 << bd); \ + } \ + \ + int offset = filter_center * src_stride + filter_center; \ + int row_offset = 0; \ + int col_offset = 0; \ + for (int i = 0; i < 100000; i++) { \ + int src_total_offset = offset + col_offset * src_stride + row_offset; \ + int dst_total_offset = col_offset * dst_stride + row_offset; \ + func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride, \ + CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \ + x_step_q4, filter_y, y_step_q4, w, h, bd); \ + if (offset + w + w < frame_size) { \ + row_offset += w; \ + } else { \ + row_offset = 0; \ + col_offset += h; \ + } \ + if (col_offset + h >= frame_size) { \ + col_offset = 0; \ + } \ + } \ + } + +#define lowbd_convolve_speed(func, block_size, frame_size) \ + TEST(AV1ConvolveTest, func##_speed_l_##block_size##_##frame_size) { \ + ACMRandom rnd(ACMRandom::DeterministicSeed()); \ + InterpFilter interp_filter = EIGHTTAP; \ + InterpFilterParams filter_params = \ + av1_get_interp_filter_params(interp_filter); \ + int filter_size = filter_params.tap; \ + int filter_center = filter_size / 2 - 1; \ + DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \ + int src_stride = frame_size + 7; \ + DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]); \ + int dst_stride = frame_size; \ + int x_step_q4 = 16; \ + int y_step_q4 = 16; \ + int subpel_x_q4 = 8; \ + int subpel_y_q4 = 6; \ + int bd = 8; \ + \ + int w = block_size; \ + int h = block_size; \ + \ + const int16_t *filter_x = \ + av1_get_interp_filter_kernel(filter_params, subpel_x_q4); \ + const int16_t *filter_y = \ + av1_get_interp_filter_kernel(filter_params, subpel_y_q4); \ + \ + for (int i = 0; i < src_stride * src_stride; i++) { \ + src[i] = rnd.Rand16() % (1 << bd); \ + } \ + \ + int offset = filter_center * src_stride + filter_center; \ + int row_offset = 0; \ + int col_offset = 0; \ + for (int i = 0; i < 100000; i++) { \ + func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4, \ + filter_y, y_step_q4, w, h); \ + if (offset + w + w < frame_size) { \ + row_offset += w; \ + } else { \ + row_offset = 0; \ + col_offset += h; \ + } \ + if (col_offset + h >= frame_size) { \ + col_offset = 0; \ + } \ + } \ + } + +// This experiment shows that when frame size is 64x64 +// aom_highbd_convolve8_sse2 and aom_convolve8_sse2's speed are similar. +// However when frame size becomes 1024x1024 +// aom_highbd_convolve8_sse2 is around 50% slower than aom_convolve8_sse2 +// we think the bottleneck is from memory IO +highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 64); +highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 64); +highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 64); +highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 64); + +lowbd_convolve_speed(aom_convolve8_sse2, 8, 64); +lowbd_convolve_speed(aom_convolve8_sse2, 16, 64); +lowbd_convolve_speed(aom_convolve8_sse2, 32, 64); +lowbd_convolve_speed(aom_convolve8_sse2, 64, 64); + +highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 1024); +highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 1024); +highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 1024); +highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 1024); + +lowbd_convolve_speed(aom_convolve8_sse2, 8, 1024); +lowbd_convolve_speed(aom_convolve8_sse2, 16, 1024); +lowbd_convolve_speed(aom_convolve8_sse2, 32, 1024); +lowbd_convolve_speed(aom_convolve8_sse2, 64, 1024); +#endif // CONVOLVE_SPEED_TEST +} // namespace diff --git a/third_party/aom/test/av1_dct_test.cc b/third_party/aom/test/av1_dct_test.cc new file mode 100644 index 000000000..691cc8b79 --- /dev/null +++ b/third_party/aom/test/av1_dct_test.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <new> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "./aom_config.h" +#include "aom_ports/msvc.h" + +#undef CONFIG_COEFFICIENT_RANGE_CHECKING +#define CONFIG_COEFFICIENT_RANGE_CHECKING 1 +#define AV1_DCT_GTEST +#include "av1/encoder/dct.c" + +using libaom_test::ACMRandom; + +namespace { +void reference_dct_1d(const double *in, double *out, int size) { + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size)); + } + if (k == 0) out[k] = out[k] * kInvSqrt2; + } +} + +typedef void (*FdctFuncRef)(const double *in, double *out, int size); +typedef void (*IdctFuncRef)(const double *in, double *out, int size); +typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out); +typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out); + +class TransTestBase { + public: + virtual ~TransTestBase() {} + + protected: + void RunFwdAccuracyCheck() { + tran_low_t *input = new tran_low_t[txfm_size_]; + tran_low_t *output = new tran_low_t[txfm_size_]; + double *ref_input = new double[txfm_size_]; + double *ref_output = new double[txfm_size_]; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + for (int ti = 0; ti < count_test_block; ++ti) { + for (int ni = 0; ni < txfm_size_; ++ni) { + input[ni] = rnd.Rand8() - rnd.Rand8(); + ref_input[ni] = static_cast<double>(input[ni]); + } + + fwd_txfm_(input, output); + fwd_txfm_ref_(ref_input, ref_output, txfm_size_); + + for (int ni = 0; ni < txfm_size_; ++ni) { + EXPECT_LE( + abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))), + max_error_); + } + } + + delete[] input; + delete[] output; + delete[] ref_input; + delete[] ref_output; + } + + double max_error_; + int txfm_size_; + FdctFunc fwd_txfm_; + FdctFuncRef fwd_txfm_ref_; +}; + +typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam; +class AV1FwdTxfm : public TransTestBase, + public ::testing::TestWithParam<FdctParam> { + public: + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + fwd_txfm_ref_ = GET_PARAM(1); + txfm_size_ = GET_PARAM(2); + max_error_ = GET_PARAM(3); + } + virtual void TearDown() {} +}; + +TEST_P(AV1FwdTxfm, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); } + +INSTANTIATE_TEST_CASE_P( + C, AV1FwdTxfm, + ::testing::Values(FdctParam(&fdct4, &reference_dct_1d, 4, 1), + FdctParam(&fdct8, &reference_dct_1d, 8, 1), + FdctParam(&fdct16, &reference_dct_1d, 16, 2), + FdctParam(&fdct32, &reference_dct_1d, 32, 3))); +} // namespace diff --git a/third_party/aom/test/av1_ext_tile_test.cc b/third_party/aom/test/av1_ext_tile_test.cc new file mode 100644 index 000000000..f96447965 --- /dev/null +++ b/third_party/aom/test/av1_ext_tile_test.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <assert.h> +#include <string> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/md5_helper.h" +#include "test/util.h" + +namespace { +// The number of frames to be encoded/decoded +const int kLimit = 8; +// Skip 1 frame to check the frame decoding independency. +const int kSkip = 5; +const int kTileSize = 1; +const int kTIleSizeInPixels = (kTileSize << 6); +// Fake width and height so that they can be multiples of the tile size. +const int kImgWidth = 704; +const int kImgHeight = 576; + +// This test tests "tile_encoding_mode = TILE_VR" case. The TILE_NORMAL case is +// tested by the tile_independence test. +class AV1ExtTileTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + AV1ExtTileTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + set_cpu_used_(GET_PARAM(2)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = kImgWidth; + cfg.h = kImgHeight; + + decoder_ = codec_->CreateDecoder(cfg, 0); + decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder_->Control(AV1_SET_DECODE_TILE_COL, -1); + + // Allocate buffer to store tile image. + aom_img_alloc(&tile_img_, AOM_IMG_FMT_I420, kImgWidth, kImgHeight, 32); + + md5_.clear(); + tile_md5_.clear(); + } + + virtual ~AV1ExtTileTest() { + aom_img_free(&tile_img_); + delete decoder_; + } + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_error_resilient = 1; + + cfg_.rc_max_quantizer = 56; + cfg_.rc_min_quantizer = 0; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 0) { + // Encode setting + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + + // The tile size is 64x64. + encoder->Control(AV1E_SET_TILE_COLUMNS, kTileSize); + encoder->Control(AV1E_SET_TILE_ROWS, kTileSize); + encoder->Control(AV1E_SET_TILE_ENCODING_MODE, 1); // TILE_VR +#if CONFIG_EXT_PARTITION + // Always use 64x64 max partition. + encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_64X64); +#endif + } + + if (video->frame() == 1) { + frame_flags_ = + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF; + } + } + + virtual void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) { + // Skip 1 already decoded frame to be consistent with the decoder in this + // test. + if (pts == (aom_codec_pts_t)kSkip) return; + + // Calculate MD5 as the reference. + ::libaom_test::MD5 md5_res; + md5_res.Add(&img); + md5_.push_back(md5_res.Get()); + } + + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + // Skip decoding 1 frame. + if (pkt->data.frame.pts == (aom_codec_pts_t)kSkip) return; + + bool IsLastFrame = (pkt->data.frame.pts == (aom_codec_pts_t)(kLimit - 1)); + + // Decode the first (kLimit - 1) frames as whole frame, and decode the last + // frame in single tiles. + for (int r = 0; r < kImgHeight / kTIleSizeInPixels; ++r) { + for (int c = 0; c < kImgWidth / kTIleSizeInPixels; ++c) { + if (!IsLastFrame) { + decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder_->Control(AV1_SET_DECODE_TILE_COL, -1); + } else { + decoder_->Control(AV1_SET_DECODE_TILE_ROW, r); + decoder_->Control(AV1_SET_DECODE_TILE_COL, c); + } + + const aom_codec_err_t res = decoder_->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = decoder_->GetDxData().Next(); + + if (!IsLastFrame) { + if (img) { + ::libaom_test::MD5 md5_res; + md5_res.Add(img); + tile_md5_.push_back(md5_res.Get()); + } + break; + } + + const int kMaxMBPlane = 3; + for (int plane = 0; plane < kMaxMBPlane; ++plane) { + const int shift = (plane == 0) ? 0 : 1; + int tile_height = kTIleSizeInPixels >> shift; + int tile_width = kTIleSizeInPixels >> shift; + + for (int tr = 0; tr < tile_height; ++tr) { + memcpy(tile_img_.planes[plane] + + tile_img_.stride[plane] * (r * tile_height + tr) + + c * tile_width, + img->planes[plane] + img->stride[plane] * tr, tile_width); + } + } + } + + if (!IsLastFrame) break; + } + + if (IsLastFrame) { + ::libaom_test::MD5 md5_res; + md5_res.Add(&tile_img_); + tile_md5_.push_back(md5_res.Get()); + } + } + + ::libaom_test::TestMode encoding_mode_; + int set_cpu_used_; + ::libaom_test::Decoder *decoder_; + aom_image_t tile_img_; + std::vector<std::string> md5_; + std::vector<std::string> tile_md5_; +}; + +TEST_P(AV1ExtTileTest, DecoderResultTest) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", kImgWidth, + kImgHeight, 30, 1, 0, kLimit); + cfg_.rc_target_bitrate = 500; + cfg_.g_error_resilient = AOM_ERROR_RESILIENT_DEFAULT; + cfg_.g_lag_in_frames = 0; + cfg_.g_threads = 1; + + // Tile encoding + init_flags_ = AOM_CODEC_USE_PSNR; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Compare to check if two vectors are equal. + ASSERT_EQ(md5_, tile_md5_); +} + +AV1_INSTANTIATE_TEST_CASE( + // Now only test 2-pass mode. + AV1ExtTileTest, ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Range(0, 4)); +} // namespace diff --git a/third_party/aom/test/av1_fht16x16_test.cc b/third_party/aom/test/av1_fht16x16_test.cc new file mode 100644 index 000000000..e1032ef24 --- /dev/null +++ b/third_party/aom/test/av1_fht16x16_test.cc @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" +#include "aom_ports/mem.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x16Param; + +void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht16x16_c(in, out, stride, tx_type); +} + +void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, + int tx_type) { + av1_iht16x16_256_add_c(in, dest, stride, tx_type); +} + +#if CONFIG_HIGHBITDEPTH +typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd); +typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd); + +// Target optimized function, tx_type, bit depth +typedef tuple<HbdHtFunc, int, int> HighbdHt16x16Param; + +void highbd_fht16x16_ref(const int16_t *in, int32_t *out, int stride, + int tx_type, int bd) { + av1_fwd_txfm2d_16x16_c(in, out, stride, tx_type, bd); +} +#endif // CONFIG_HIGHBITDEPTH + +class AV1Trans16x16HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht16x16Param> { + public: + virtual ~AV1Trans16x16HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 16; + height_ = 16; + fwd_txfm_ref = fht16x16_ref; + inv_txfm_ref = iht16x16_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans16x16HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); } +TEST_P(AV1Trans16x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } +TEST_P(AV1Trans16x16HT, CoeffCheck) { RunCoeffCheck(); } +TEST_P(AV1Trans16x16HT, InvCoeffCheck) { RunInvCoeffCheck(); } + +#if CONFIG_HIGHBITDEPTH +class AV1HighbdTrans16x16HT + : public ::testing::TestWithParam<HighbdHt16x16Param> { + public: + virtual ~AV1HighbdTrans16x16HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + fwd_txfm_ref_ = highbd_fht16x16_ref; + tx_type_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = 256; + + input_ = reinterpret_cast<int16_t *>( + aom_memalign(32, sizeof(int16_t) * num_coeffs_)); + output_ = reinterpret_cast<int32_t *>( + aom_memalign(32, sizeof(int32_t) * num_coeffs_)); + output_ref_ = reinterpret_cast<int32_t *>( + aom_memalign(32, sizeof(int32_t) * num_coeffs_)); + } + + virtual void TearDown() { + aom_free(input_); + aom_free(output_); + aom_free(output_ref_); + libaom_test::ClearSystemState(); + } + + protected: + void RunBitexactCheck(); + + private: + HbdHtFunc fwd_txfm_; + HbdHtFunc fwd_txfm_ref_; + int tx_type_; + int bit_depth_; + int mask_; + int num_coeffs_; + int16_t *input_; + int32_t *output_; + int32_t *output_ref_; +}; + +void AV1HighbdTrans16x16HT::RunBitexactCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int i, j; + const int stride = 16; + const int num_tests = 1000; + + for (i = 0; i < num_tests; ++i) { + for (j = 0; j < num_coeffs_; ++j) { + input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + } + + fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_); + ASM_REGISTER_STATE_CHECK( + fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_)); + + for (j = 0; j < num_coeffs_; ++j) { + EXPECT_EQ(output_ref_[j], output_[j]) + << "Not bit-exact result at index: " << j << " at test block: " << i; + } + } +} + +TEST_P(AV1HighbdTrans16x16HT, HighbdCoeffCheck) { RunBitexactCheck(); } +#endif // CONFIG_HIGHBITDEPTH + +using std::tr1::make_tuple; + +#if HAVE_SSE2 +const Ht16x16Param kArrayHt16x16Param_sse2[] = { + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 0, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 1, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 2, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 3, AOM_BITS_8, + 256), +#if CONFIG_EXT_TX + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 4, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 5, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 6, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 7, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 8, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 9, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 10, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 11, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 12, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 13, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 14, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 15, AOM_BITS_8, + 256) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x16HT, + ::testing::ValuesIn(kArrayHt16x16Param_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +const Ht16x16Param kArrayHt16x16Param_avx2[] = { + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 0, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 1, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 2, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 3, AOM_BITS_8, + 256), +#if CONFIG_EXT_TX + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 4, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 5, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 6, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 7, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 8, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 9, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 10, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 11, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 12, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 13, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 14, AOM_BITS_8, + 256), + make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 15, AOM_BITS_8, + 256) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans16x16HT, + ::testing::ValuesIn(kArrayHt16x16Param_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH +const HighbdHt16x16Param kArrayHBDHt16x16Param_sse4_1[] = { + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 0, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 0, 12), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 1, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 1, 12), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 2, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 2, 12), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 3, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 3, 12), +#if CONFIG_EXT_TX + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 4, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 4, 12), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 5, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 5, 12), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 6, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 6, 12), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 7, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 7, 12), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 8, 10), + make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 8, 12), +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans16x16HT, + ::testing::ValuesIn(kArrayHBDHt16x16Param_sse4_1)); +#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/av1_fht16x32_test.cc b/third_party/aom/test/av1_fht16x32_test.cc new file mode 100644 index 000000000..43d025327 --- /dev/null +++ b/third_party/aom/test/av1_fht16x32_test.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "./av1_rtcd.h" + +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x32Param; + +void fht16x32_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht16x32_c(in, out, stride, tx_type); +} + +void iht16x32_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_iht16x32_512_add_c(in, out, stride, tx_type); +} + +class AV1Trans16x32HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht16x32Param> { + public: + virtual ~AV1Trans16x32HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 16; + height_ = 32; + fwd_txfm_ref = fht16x32_ref; + inv_txfm_ref = iht16x32_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); } +TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); } +TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); } +TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); } + +using std::tr1::make_tuple; +const Ht16x32Param kArrayHt16x32Param_c[] = { + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 0, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 1, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 2, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 3, AOM_BITS_8, 512), +#if CONFIG_EXT_TX + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 4, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 5, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 6, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 7, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 8, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 9, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 10, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 11, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 12, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 13, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 14, AOM_BITS_8, 512), + make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 15, AOM_BITS_8, 512) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(C, AV1Trans16x32HT, + ::testing::ValuesIn(kArrayHt16x32Param_c)); + +#if HAVE_SSE2 +const Ht16x32Param kArrayHt16x32Param_sse2[] = { + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 0, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 1, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 2, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 3, AOM_BITS_8, + 512), +#if CONFIG_EXT_TX + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 4, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 5, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 6, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 7, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 8, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 9, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 10, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 11, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 12, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 13, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 14, AOM_BITS_8, + 512), + make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 15, AOM_BITS_8, + 512) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x32HT, + ::testing::ValuesIn(kArrayHt16x32Param_sse2)); +#endif // HAVE_SSE2 + +} // namespace diff --git a/third_party/aom/test/av1_fht16x8_test.cc b/third_party/aom/test/av1_fht16x8_test.cc new file mode 100644 index 000000000..d99bec5eb --- /dev/null +++ b/third_party/aom/test/av1_fht16x8_test.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "./av1_rtcd.h" + +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x8Param; + +void fht16x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht16x8_c(in, out, stride, tx_type); +} + +void iht16x8_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_iht16x8_128_add_c(in, out, stride, tx_type); +} + +class AV1Trans16x8HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht16x8Param> { + public: + virtual ~AV1Trans16x8HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 16; + height_ = 8; + inv_txfm_ref = iht16x8_ref; + fwd_txfm_ref = fht16x8_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); } +TEST_P(AV1Trans16x8HT, CoeffCheck) { RunCoeffCheck(); } +TEST_P(AV1Trans16x8HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans16x8HT, InvCoeffCheck) { RunInvCoeffCheck(); } +TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } + +using std::tr1::make_tuple; + +const Ht16x8Param kArrayHt16x8Param_c[] = { + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 0, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 1, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 2, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 3, AOM_BITS_8, 128), +#if CONFIG_EXT_TX + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 4, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 5, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 6, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 7, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 8, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 9, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 10, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 11, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 12, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 13, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 14, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 15, AOM_BITS_8, 128) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(C, AV1Trans16x8HT, + ::testing::ValuesIn(kArrayHt16x8Param_c)); + +#if HAVE_SSE2 +const Ht16x8Param kArrayHt16x8Param_sse2[] = { + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 0, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 1, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 2, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 3, AOM_BITS_8, 128), +#if CONFIG_EXT_TX + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 4, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 5, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 6, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 7, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 8, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 9, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 10, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 11, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 12, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 13, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 14, AOM_BITS_8, 128), + make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 15, AOM_BITS_8, 128) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x8HT, + ::testing::ValuesIn(kArrayHt16x8Param_sse2)); +#endif // HAVE_SSE2 + +} // namespace diff --git a/third_party/aom/test/av1_fht32x16_test.cc b/third_party/aom/test/av1_fht32x16_test.cc new file mode 100644 index 000000000..e38283f86 --- /dev/null +++ b/third_party/aom/test/av1_fht32x16_test.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "./av1_rtcd.h" + +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht32x16Param; + +void fht32x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht32x16_c(in, out, stride, tx_type); +} + +void iht32x16_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_iht32x16_512_add_c(in, out, stride, tx_type); +} + +class AV1Trans32x16HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht32x16Param> { + public: + virtual ~AV1Trans32x16HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 32; + height_ = 16; + fwd_txfm_ref = fht32x16_ref; + inv_txfm_ref = iht32x16_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans32x16HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); } +TEST_P(AV1Trans32x16HT, CoeffCheck) { RunCoeffCheck(); } +TEST_P(AV1Trans32x16HT, InvCoeffCheck) { RunInvCoeffCheck(); } +TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); } + +using std::tr1::make_tuple; +const Ht32x16Param kArrayHt32x16Param_c[] = { + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 0, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 1, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 2, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 3, AOM_BITS_8, 512), +#if CONFIG_EXT_TX + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 4, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 5, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 6, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 7, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 8, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 9, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 10, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 11, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 12, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 13, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 14, AOM_BITS_8, 512), + make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 15, AOM_BITS_8, 512) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(C, AV1Trans32x16HT, + ::testing::ValuesIn(kArrayHt32x16Param_c)); + +#if HAVE_SSE2 +const Ht32x16Param kArrayHt32x16Param_sse2[] = { + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 0, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 1, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 2, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 3, AOM_BITS_8, + 512), +#if CONFIG_EXT_TX + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 4, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 5, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 6, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 7, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 8, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 9, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 10, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 11, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 12, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 13, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 14, AOM_BITS_8, + 512), + make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 15, AOM_BITS_8, + 512) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x16HT, + ::testing::ValuesIn(kArrayHt32x16Param_sse2)); +#endif // HAVE_SSE2 + +} // namespace diff --git a/third_party/aom/test/av1_fht4x4_test.cc b/third_party/aom/test/av1_fht4x4_test.cc new file mode 100644 index 000000000..42837d3a4 --- /dev/null +++ b/third_party/aom/test/av1_fht4x4_test.cc @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" +#include "aom_ports/mem.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x4Param; + +void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht4x4_c(in, out, stride, tx_type); +} + +void iht4x4_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_iht4x4_16_add_c(in, out, stride, tx_type); +} + +#if CONFIG_HIGHBITDEPTH +typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd); +typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd); + +// HighbdHt4x4Param argument list: +// <Target optimized function, tx_type, bit depth> +typedef tuple<HBDFhtFunc, int, int> HighbdHt4x4Param; + +void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride, int tx_type, + int bd) { + av1_fwd_txfm2d_4x4_c(in, out, stride, tx_type, bd); +} +#endif // CONFIG_HIGHBITDEPTH + +class AV1Trans4x4HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht4x4Param> { + public: + virtual ~AV1Trans4x4HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 4; + height_ = 4; + fwd_txfm_ref = fht4x4_ref; + inv_txfm_ref = iht4x4_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans4x4HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans4x4HT, CoeffCheck) { RunCoeffCheck(); } +// Note: +// TODO(luoyi): Add tx_type, 9-15 for inverse transform. +// Need cleanup since same tests may be done in fdct4x4_test.cc +// TEST_P(AV1Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(0); } +// TEST_P(AV1Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } +// TEST_P(AV1Trans4x4HT, InvCoeffCheck) { RunInvCoeffCheck(); } + +#if CONFIG_HIGHBITDEPTH +class AV1HighbdTrans4x4HT : public ::testing::TestWithParam<HighbdHt4x4Param> { + public: + virtual ~AV1HighbdTrans4x4HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + fwd_txfm_ref_ = highbe_fht4x4_ref; + tx_type_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = 16; + + input_ = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + output_ = reinterpret_cast<int32_t *>( + aom_memalign(16, sizeof(int32_t) * num_coeffs_)); + output_ref_ = reinterpret_cast<int32_t *>( + aom_memalign(16, sizeof(int32_t) * num_coeffs_)); + } + + virtual void TearDown() { + aom_free(input_); + aom_free(output_); + aom_free(output_ref_); + libaom_test::ClearSystemState(); + } + + protected: + void RunBitexactCheck(); + + private: + HBDFhtFunc fwd_txfm_; + HBDFhtFunc fwd_txfm_ref_; + int tx_type_; + int bit_depth_; + int mask_; + int num_coeffs_; + int16_t *input_; + int32_t *output_; + int32_t *output_ref_; +}; + +void AV1HighbdTrans4x4HT::RunBitexactCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int i, j; + const int stride = 4; + const int num_tests = 1000; + const int num_coeffs = 16; + + for (i = 0; i < num_tests; ++i) { + for (j = 0; j < num_coeffs; ++j) { + input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + } + + fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_); + fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_); + + for (j = 0; j < num_coeffs; ++j) { + EXPECT_EQ(output_[j], output_ref_[j]) + << "Not bit-exact result at index: " << j << " at test block: " << i; + } + } +} + +TEST_P(AV1HighbdTrans4x4HT, HighbdCoeffCheck) { RunBitexactCheck(); } +#endif // CONFIG_HIGHBITDEPTH + +using std::tr1::make_tuple; + +#if HAVE_SSE2 +const Ht4x4Param kArrayHt4x4Param_sse2[] = { + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 0, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 1, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 2, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 3, AOM_BITS_8, 16), +#if CONFIG_EXT_TX + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 4, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 5, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 6, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 7, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 8, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 9, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 10, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 11, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 12, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 13, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 14, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 15, AOM_BITS_8, 16) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x4HT, + ::testing::ValuesIn(kArrayHt4x4Param_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH +const HighbdHt4x4Param kArrayHighbdHt4x4Param[] = { + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 0, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 0, 12), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 1, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 1, 12), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 2, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 2, 12), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 3, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 3, 12), +#if CONFIG_EXT_TX + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 4, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 4, 12), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 5, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 5, 12), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 6, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 6, 12), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 7, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 7, 12), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 8, 10), + make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 8, 12), +#endif // CONFIG_EXT_TX +}; + +INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans4x4HT, + ::testing::ValuesIn(kArrayHighbdHt4x4Param)); + +#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/av1_fht4x8_test.cc b/third_party/aom/test/av1_fht4x8_test.cc new file mode 100644 index 000000000..a899c8739 --- /dev/null +++ b/third_party/aom/test/av1_fht4x8_test.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "./av1_rtcd.h" + +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x8Param; + +void fht4x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht4x8_c(in, out, stride, tx_type); +} + +void iht4x8_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_iht4x8_32_add_c(in, out, stride, tx_type); +} + +class AV1Trans4x8HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht4x8Param> { + public: + virtual ~AV1Trans4x8HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 4; + height_ = 8; + fwd_txfm_ref = fht4x8_ref; + inv_txfm_ref = iht4x8_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans4x8HT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); } +TEST_P(AV1Trans4x8HT, CoeffCheck) { RunCoeffCheck(); } +TEST_P(AV1Trans4x8HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans4x8HT, InvCoeffCheck) { RunInvCoeffCheck(); } +TEST_P(AV1Trans4x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } + +using std::tr1::make_tuple; + +const Ht4x8Param kArrayHt4x8Param_c[] = { + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 0, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 1, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 2, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 3, AOM_BITS_8, 32), +#if CONFIG_EXT_TX + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 4, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 5, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 6, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 7, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 8, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 9, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 10, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 11, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 12, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 13, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 14, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 15, AOM_BITS_8, 32) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(C, AV1Trans4x8HT, + ::testing::ValuesIn(kArrayHt4x8Param_c)); + +#if HAVE_SSE2 +const Ht4x8Param kArrayHt4x8Param_sse2[] = { + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 0, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 1, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 2, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 3, AOM_BITS_8, 32), +#if CONFIG_EXT_TX + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 4, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 5, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 6, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 7, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 8, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 9, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 10, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 11, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 12, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 13, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 14, AOM_BITS_8, 32), + make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 15, AOM_BITS_8, 32) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x8HT, + ::testing::ValuesIn(kArrayHt4x8Param_sse2)); +#endif // HAVE_SSE2 + +} // namespace diff --git a/third_party/aom/test/av1_fht8x16_test.cc b/third_party/aom/test/av1_fht8x16_test.cc new file mode 100644 index 000000000..ace9a8f47 --- /dev/null +++ b/third_party/aom/test/av1_fht8x16_test.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "./av1_rtcd.h" + +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x16Param; + +void fht8x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht8x16_c(in, out, stride, tx_type); +} + +void iht8x16_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_iht8x16_128_add_c(in, out, stride, tx_type); +} + +class AV1Trans8x16HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht8x16Param> { + public: + virtual ~AV1Trans8x16HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 8; + height_ = 16; + inv_txfm_ref = iht8x16_ref; + fwd_txfm_ref = fht8x16_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); } +TEST_P(AV1Trans8x16HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans8x16HT, CoeffCheck) { RunCoeffCheck(); } +TEST_P(AV1Trans8x16HT, InvCoeffCheck) { RunInvCoeffCheck(); } +TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } + +using std::tr1::make_tuple; + +const Ht8x16Param kArrayHt8x16Param_c[] = { + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 0, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 1, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 2, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 3, AOM_BITS_8, 128), +#if CONFIG_EXT_TX + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 4, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 5, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 6, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 7, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 8, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 9, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 10, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 11, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 12, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 13, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 14, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 15, AOM_BITS_8, 128) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(C, AV1Trans8x16HT, + ::testing::ValuesIn(kArrayHt8x16Param_c)); + +#if HAVE_SSE2 +const Ht8x16Param kArrayHt8x16Param_sse2[] = { + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 0, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 1, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 2, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 3, AOM_BITS_8, 128), +#if CONFIG_EXT_TX + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 4, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 5, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 6, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 7, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 8, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 9, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 10, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 11, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 12, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 13, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 14, AOM_BITS_8, 128), + make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 15, AOM_BITS_8, 128) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x16HT, + ::testing::ValuesIn(kArrayHt8x16Param_sse2)); +#endif // HAVE_SSE2 + +} // namespace diff --git a/third_party/aom/test/av1_fht8x4_test.cc b/third_party/aom/test/av1_fht8x4_test.cc new file mode 100644 index 000000000..9bf4ff647 --- /dev/null +++ b/third_party/aom/test/av1_fht8x4_test.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "./av1_rtcd.h" + +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x4Param; + +void fht8x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht8x4_c(in, out, stride, tx_type); +} + +void iht8x4_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_iht8x4_32_add_c(in, out, stride, tx_type); +} + +class AV1Trans8x4HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht8x4Param> { + public: + virtual ~AV1Trans8x4HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 8; + height_ = 4; + fwd_txfm_ref = fht8x4_ref; + inv_txfm_ref = iht8x4_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans8x4HT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); } +TEST_P(AV1Trans8x4HT, CoeffCheck) { RunCoeffCheck(); } +TEST_P(AV1Trans8x4HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans8x4HT, InvCoeffCheck) { RunInvCoeffCheck(); } +TEST_P(AV1Trans8x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } + +using std::tr1::make_tuple; + +const Ht8x4Param kArrayHt8x4Param_c[] = { + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 0, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 1, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 2, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 3, AOM_BITS_8, 32), +#if CONFIG_EXT_TX + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 4, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 5, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 6, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 7, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 8, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 9, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 10, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 11, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 12, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 13, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 14, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 15, AOM_BITS_8, 32) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(C, AV1Trans8x4HT, + ::testing::ValuesIn(kArrayHt8x4Param_c)); + +#if HAVE_SSE2 +const Ht8x4Param kArrayHt8x4Param_sse2[] = { + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 0, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 1, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 2, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 3, AOM_BITS_8, 32), +#if CONFIG_EXT_TX + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 4, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 5, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 6, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 7, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 8, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 9, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 10, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 11, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 12, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 13, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 14, AOM_BITS_8, 32), + make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 15, AOM_BITS_8, 32) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x4HT, + ::testing::ValuesIn(kArrayHt8x4Param_sse2)); +#endif // HAVE_SSE2 + +} // namespace diff --git a/third_party/aom/test/av1_fht8x8_test.cc b/third_party/aom/test/av1_fht8x8_test.cc new file mode 100644 index 000000000..99cff1014 --- /dev/null +++ b/third_party/aom/test/av1_fht8x8_test.cc @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" +#include "aom_ports/mem.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); + +using libaom_test::FhtFunc; +using std::tr1::tuple; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x8Param; + +void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht8x8_c(in, out, stride, tx_type); +} + +void iht8x8_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_iht8x8_64_add_c(in, out, stride, tx_type); +} + +#if CONFIG_HIGHBITDEPTH +typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd); +typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd); +// Target optimized function, tx_type, bit depth +typedef tuple<HbdHtFunc, int, int> HighbdHt8x8Param; + +void highbd_fht8x8_ref(const int16_t *in, int32_t *out, int stride, int tx_type, + int bd) { + av1_fwd_txfm2d_8x8_c(in, out, stride, tx_type, bd); +} +#endif // CONFIG_HIGHBITDEPTH + +class AV1Trans8x8HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht8x8Param> { + public: + virtual ~AV1Trans8x8HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 8; + height_ = 8; + fwd_txfm_ref = fht8x8_ref; + inv_txfm_ref = iht8x8_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans8x8HT, MemCheck) { RunMemCheck(); } +TEST_P(AV1Trans8x8HT, CoeffCheck) { RunCoeffCheck(); } +// Note: +// TODO(luoyi): Add tx_type, 9-15 for inverse transform. +// Need cleanup since same tests may be done in fdct8x8_test.cc +// TEST_P(AV1Trans8x8HT, AccuracyCheck) { RunAccuracyCheck(0); } +// TEST_P(AV1Trans8x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } +// TEST_P(AV1Trans8x8HT, InvCoeffCheck) { RunInvCoeffCheck(); } + +#if CONFIG_HIGHBITDEPTH +class AV1HighbdTrans8x8HT : public ::testing::TestWithParam<HighbdHt8x8Param> { + public: + virtual ~AV1HighbdTrans8x8HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + fwd_txfm_ref_ = highbd_fht8x8_ref; + tx_type_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = 64; + + input_ = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + output_ = reinterpret_cast<int32_t *>( + aom_memalign(16, sizeof(int32_t) * num_coeffs_)); + output_ref_ = reinterpret_cast<int32_t *>( + aom_memalign(16, sizeof(int32_t) * num_coeffs_)); + } + + virtual void TearDown() { + aom_free(input_); + aom_free(output_); + aom_free(output_ref_); + libaom_test::ClearSystemState(); + } + + protected: + void RunBitexactCheck(); + + private: + HbdHtFunc fwd_txfm_; + HbdHtFunc fwd_txfm_ref_; + int tx_type_; + int bit_depth_; + int mask_; + int num_coeffs_; + int16_t *input_; + int32_t *output_; + int32_t *output_ref_; +}; + +void AV1HighbdTrans8x8HT::RunBitexactCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int i, j; + const int stride = 8; + const int num_tests = 1000; + const int num_coeffs = 64; + + for (i = 0; i < num_tests; ++i) { + for (j = 0; j < num_coeffs; ++j) { + input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + } + + fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_); + ASM_REGISTER_STATE_CHECK( + fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_)); + + for (j = 0; j < num_coeffs; ++j) { + EXPECT_EQ(output_ref_[j], output_[j]) + << "Not bit-exact result at index: " << j << " at test block: " << i; + } + } +} + +TEST_P(AV1HighbdTrans8x8HT, HighbdCoeffCheck) { RunBitexactCheck(); } +#endif // CONFIG_HIGHBITDEPTH + +using std::tr1::make_tuple; + +#if HAVE_SSE2 +const Ht8x8Param kArrayHt8x8Param_sse2[] = { + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 0, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 1, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 2, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 3, AOM_BITS_8, 64), +#if CONFIG_EXT_TX + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 4, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 5, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 6, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 7, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 8, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 9, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 10, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 11, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 12, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 13, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 14, AOM_BITS_8, 64), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 15, AOM_BITS_8, 64) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x8HT, + ::testing::ValuesIn(kArrayHt8x8Param_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH +const HighbdHt8x8Param kArrayHBDHt8x8Param_sse4_1[] = { + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 0, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 0, 12), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 1, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 1, 12), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 2, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 2, 12), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 3, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 3, 12), +#if CONFIG_EXT_TX + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 4, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 4, 12), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 5, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 5, 12), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 6, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 6, 12), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 7, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 7, 12), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 8, 10), + make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 8, 12), +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans8x8HT, + ::testing::ValuesIn(kArrayHBDHt8x8Param_sse4_1)); +#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/av1_fwd_txfm1d_test.cc b/third_party/aom/test/av1_fwd_txfm1d_test.cc new file mode 100644 index 000000000..a9b3f8e40 --- /dev/null +++ b/third_party/aom/test/av1_fwd_txfm1d_test.cc @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "av1/common/av1_fwd_txfm1d.h" +#include "test/av1_txfm_test.h" + +using libaom_test::ACMRandom; +using libaom_test::input_base; +using libaom_test::reference_hybrid_1d; +using libaom_test::TYPE_TXFM; +using libaom_test::TYPE_DCT; +using libaom_test::TYPE_ADST; + +namespace { +const int txfm_type_num = 2; +const TYPE_TXFM txfm_type_ls[2] = { TYPE_DCT, TYPE_ADST }; + +const int txfm_size_num = 5; +const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 }; + +const TxfmFunc fwd_txfm_func_ls[2][5] = { +#if CONFIG_TX64X64 + { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, + av1_fdct64_new }, +#else + { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL }, +#endif + { av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL } +}; + +// the maximum stage number of fwd/inv 1d dct/adst txfm is 12 +const int8_t cos_bit[12] = { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 }; +const int8_t range_bit[12] = { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 }; + +TEST(av1_fwd_txfm1d, round_shift) { + EXPECT_EQ(round_shift(7, 1), 4); + EXPECT_EQ(round_shift(-7, 1), -3); + + EXPECT_EQ(round_shift(7, 2), 2); + EXPECT_EQ(round_shift(-7, 2), -2); + + EXPECT_EQ(round_shift(8, 2), 2); + EXPECT_EQ(round_shift(-8, 2), -2); +} + +TEST(av1_fwd_txfm1d, get_max_bit) { + int max_bit = get_max_bit(8); + EXPECT_EQ(max_bit, 3); +} + +TEST(av1_fwd_txfm1d, cospi_arr) { + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 64; j++) { + EXPECT_EQ(cospi_arr[i][j], + (int32_t)round(cos(M_PI * j / 128) * (1 << (cos_bit_min + i)))); + } + } +} + +TEST(av1_fwd_txfm1d, clamp_block) { + int16_t block[5][5] = { { 7, -5, 6, -3, 9 }, + { 7, -5, 6, -3, 9 }, + { 7, -5, 6, -3, 9 }, + { 7, -5, 6, -3, 9 }, + { 7, -5, 6, -3, 9 } }; + + int16_t ref_block[5][5] = { { 7, -5, 6, -3, 9 }, + { 7, -5, 6, -3, 9 }, + { 7, -4, 2, -3, 9 }, + { 7, -4, 2, -3, 9 }, + { 7, -4, 2, -3, 9 } }; + + int row = 2; + int col = 1; + int block_size = 3; + int stride = 5; + clamp_block(block[row] + col, block_size, stride, -4, 2); + for (int r = 0; r < stride; r++) { + for (int c = 0; c < stride; c++) { + EXPECT_EQ(block[r][c], ref_block[r][c]); + } + } +} + +TEST(av1_fwd_txfm1d, accuracy) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int si = 0; si < txfm_size_num; ++si) { + int txfm_size = txfm_size_ls[si]; + int32_t *input = new int32_t[txfm_size]; + int32_t *output = new int32_t[txfm_size]; + double *ref_input = new double[txfm_size]; + double *ref_output = new double[txfm_size]; + + for (int ti = 0; ti < txfm_type_num; ++ti) { + TYPE_TXFM txfm_type = txfm_type_ls[ti]; + TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si]; + int max_error = 7; + + const int count_test_block = 5000; + if (fwd_txfm_func != NULL) { + for (int ti = 0; ti < count_test_block; ++ti) { + for (int ni = 0; ni < txfm_size; ++ni) { + input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base; + ref_input[ni] = static_cast<double>(input[ni]); + } + + fwd_txfm_func(input, output, cos_bit, range_bit); + reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type); + + for (int ni = 0; ni < txfm_size; ++ni) { + EXPECT_LE( + abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))), + max_error); + } + } + } + } + + delete[] input; + delete[] output; + delete[] ref_input; + delete[] ref_output; + } +} +} // namespace diff --git a/third_party/aom/test/av1_fwd_txfm2d_test.cc b/third_party/aom/test/av1_fwd_txfm2d_test.cc new file mode 100644 index 000000000..25cf5ad53 --- /dev/null +++ b/third_party/aom/test/av1_fwd_txfm2d_test.cc @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include "test/acm_random.h" +#include "test/util.h" +#include "test/av1_txfm_test.h" +#include "av1/common/av1_txfm.h" +#include "./av1_rtcd.h" + +using libaom_test::ACMRandom; +using libaom_test::input_base; +using libaom_test::bd; +using libaom_test::compute_avg_abs_error; +using libaom_test::Fwd_Txfm2d_Func; +using libaom_test::TYPE_TXFM; + +namespace { +#if CONFIG_HIGHBITDEPTH +// tx_type_, tx_size_, max_error_, max_avg_error_ +typedef std::tr1::tuple<TX_TYPE, TX_SIZE, double, double> AV1FwdTxfm2dParam; + +class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> { + public: + virtual void SetUp() { + tx_type_ = GET_PARAM(0); + tx_size_ = GET_PARAM(1); + max_error_ = GET_PARAM(2); + max_avg_error_ = GET_PARAM(3); + count_ = 500; + TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg = + av1_get_fwd_txfm_cfg(tx_type_, tx_size_); + const TXFM_2D_CFG *fwd_txfm_cfg = fwd_txfm_flip_cfg.cfg; + int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] + + fwd_txfm_cfg->shift[2]; + ud_flip_ = fwd_txfm_flip_cfg.ud_flip; + lr_flip_ = fwd_txfm_flip_cfg.lr_flip; + amplify_factor_ = + amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit)); + + fwd_txfm_ = libaom_test::fwd_txfm_func_ls[tx_size_]; + txfm1d_size_ = libaom_test::get_txfm1d_size(tx_size_); + txfm2d_size_ = txfm1d_size_ * txfm1d_size_; + get_txfm1d_type(tx_type_, &type0_, &type1_); + input_ = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(input_[0]) * txfm2d_size_)); + output_ = reinterpret_cast<int32_t *>( + aom_memalign(16, sizeof(output_[0]) * txfm2d_size_)); + ref_input_ = reinterpret_cast<double *>( + aom_memalign(16, sizeof(ref_input_[0]) * txfm2d_size_)); + ref_output_ = reinterpret_cast<double *>( + aom_memalign(16, sizeof(ref_output_[0]) * txfm2d_size_)); + } + + void RunFwdAccuracyCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + double avg_abs_error = 0; + for (int ci = 0; ci < count_; ci++) { + for (int ni = 0; ni < txfm2d_size_; ++ni) { + input_[ni] = rnd.Rand16() % input_base; + ref_input_[ni] = static_cast<double>(input_[ni]); + output_[ni] = 0; + ref_output_[ni] = 0; + } + + fwd_txfm_(input_, output_, txfm1d_size_, tx_type_, bd); + + if (lr_flip_ && ud_flip_) + libaom_test::fliplrud(ref_input_, txfm1d_size_, txfm1d_size_); + else if (lr_flip_) + libaom_test::fliplr(ref_input_, txfm1d_size_, txfm1d_size_); + else if (ud_flip_) + libaom_test::flipud(ref_input_, txfm1d_size_, txfm1d_size_); + + reference_hybrid_2d(ref_input_, ref_output_, txfm1d_size_, type0_, + type1_); + + for (int ni = 0; ni < txfm2d_size_; ++ni) { + ref_output_[ni] = round(ref_output_[ni] * amplify_factor_); + EXPECT_GE(max_error_, + fabs(output_[ni] - ref_output_[ni]) / amplify_factor_); + } + avg_abs_error += compute_avg_abs_error<int32_t, double>( + output_, ref_output_, txfm2d_size_); + } + + avg_abs_error /= amplify_factor_; + avg_abs_error /= count_; + // max_abs_avg_error comes from upper bound of avg_abs_error + // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error: + // %f\n", type0_, type1_, txfm1d_size_, avg_abs_error); + EXPECT_GE(max_avg_error_, avg_abs_error); + } + + virtual void TearDown() { + aom_free(input_); + aom_free(output_); + aom_free(ref_input_); + aom_free(ref_output_); + } + + private: + double max_error_; + double max_avg_error_; + int count_; + double amplify_factor_; + TX_TYPE tx_type_; + TX_SIZE tx_size_; + int txfm1d_size_; + int txfm2d_size_; + Fwd_Txfm2d_Func fwd_txfm_; + TYPE_TXFM type0_; + TYPE_TXFM type1_; + int16_t *input_; + int32_t *output_; + double *ref_input_; + double *ref_output_; + int ud_flip_; // flip upside down + int lr_flip_; // flip left to right +}; + +TEST_P(AV1FwdTxfm2d, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); } +const AV1FwdTxfm2dParam av1_fwd_txfm2d_param_c[] = { +#if CONFIG_EXT_TX + AV1FwdTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(ADST_FLIPADST, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(FLIPADST_ADST, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(FLIPADST_DCT, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(DCT_FLIPADST, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(ADST_FLIPADST, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(FLIPADST_ADST, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(FLIPADST_DCT, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(DCT_FLIPADST, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(ADST_FLIPADST, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(FLIPADST_ADST, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(FLIPADST_DCT, TX_32X32, 70, 7), + AV1FwdTxfm2dParam(DCT_FLIPADST, TX_32X32, 70, 7), + AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_32X32, 70, 7), + AV1FwdTxfm2dParam(ADST_FLIPADST, TX_32X32, 70, 7), + AV1FwdTxfm2dParam(FLIPADST_ADST, TX_32X32, 70, 7), +#endif + AV1FwdTxfm2dParam(DCT_DCT, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(ADST_DCT, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(DCT_ADST, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(ADST_ADST, TX_4X4, 2, 0.2), + AV1FwdTxfm2dParam(DCT_DCT, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(ADST_DCT, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(DCT_ADST, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(ADST_ADST, TX_8X8, 5, 0.6), + AV1FwdTxfm2dParam(DCT_DCT, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(ADST_DCT, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(DCT_ADST, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(ADST_ADST, TX_16X16, 11, 1.5), + AV1FwdTxfm2dParam(DCT_DCT, TX_32X32, 70, 7), + AV1FwdTxfm2dParam(ADST_DCT, TX_32X32, 70, 7), + AV1FwdTxfm2dParam(DCT_ADST, TX_32X32, 70, 7), + AV1FwdTxfm2dParam(ADST_ADST, TX_32X32, 70, 7) +}; + +INSTANTIATE_TEST_CASE_P(C, AV1FwdTxfm2d, + ::testing::ValuesIn(av1_fwd_txfm2d_param_c)); + +#endif // CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/av1_highbd_iht_test.cc b/third_party/aom/test/av1_highbd_iht_test.cc new file mode 100644 index 000000000..3b263638f --- /dev/null +++ b/third_party/aom/test/av1_highbd_iht_test.cc @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/enums.h" +#include "aom_dsp/aom_dsp_common.h" +#include "aom_ports/mem.h" + +namespace { + +using std::tr1::tuple; +using libaom_test::ACMRandom; + +typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd); + +typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride, + int tx_type, int bd); + +// Test parameter argument list: +// <transform reference function, +// optimized inverse transform function, +// inverse transform reference function, +// num_coeffs, +// tx_type, +// bit_depth> +typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, int, int> IHbdHtParam; + +class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> { + public: + virtual ~AV1HighbdInvHTNxN() {} + + virtual void SetUp() { + txfm_ref_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + inv_txfm_ref_ = GET_PARAM(2); + num_coeffs_ = GET_PARAM(3); + tx_type_ = GET_PARAM(4); + bit_depth_ = GET_PARAM(5); + + input_ = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(input_[0]) * num_coeffs_)); + + // Note: + // Inverse transform input buffer is 32-byte aligned + // Refer to <root>/av1/encoder/context_tree.c, function, + // void alloc_mode_context(). + coeffs_ = reinterpret_cast<int32_t *>( + aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_)); + output_ = reinterpret_cast<uint16_t *>( + aom_memalign(32, sizeof(output_[0]) * num_coeffs_)); + output_ref_ = reinterpret_cast<uint16_t *>( + aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_)); + } + + virtual void TearDown() { + aom_free(input_); + aom_free(coeffs_); + aom_free(output_); + aom_free(output_ref_); + libaom_test::ClearSystemState(); + } + + protected: + void RunBitexactCheck(); + + private: + int GetStride() const { + if (16 == num_coeffs_) { + return 4; + } else if (64 == num_coeffs_) { + return 8; + } else if (256 == num_coeffs_) { + return 16; + } else if (1024 == num_coeffs_) { + return 32; + } else { + return 0; + } + } + + HbdHtFunc txfm_ref_; + IHbdHtFunc inv_txfm_; + IHbdHtFunc inv_txfm_ref_; + int num_coeffs_; + int tx_type_; + int bit_depth_; + + int16_t *input_; + int32_t *coeffs_; + uint16_t *output_; + uint16_t *output_ref_; +}; + +void AV1HighbdInvHTNxN::RunBitexactCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int stride = GetStride(); + const int num_tests = 20000; + const uint16_t mask = (1 << bit_depth_) - 1; + + for (int i = 0; i < num_tests; ++i) { + for (int j = 0; j < num_coeffs_; ++j) { + input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask); + output_ref_[j] = rnd.Rand16() & mask; + output_[j] = output_ref_[j]; + } + + txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_); + inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_); + ASM_REGISTER_STATE_CHECK( + inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_)); + + for (int j = 0; j < num_coeffs_; ++j) { + EXPECT_EQ(output_ref_[j], output_[j]) + << "Not bit-exact result at index: " << j << " At test block: " << i; + } + } +} + +TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); } + +using std::tr1::make_tuple; + +#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH +#define PARAM_LIST_4X4 \ + &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \ + &av1_inv_txfm2d_add_4x4_c, 16 + +#define PARAM_LIST_8X8 \ + &av1_fwd_txfm2d_8x8_c, &av1_inv_txfm2d_add_8x8_sse4_1, \ + &av1_inv_txfm2d_add_8x8_c, 64 + +#define PARAM_LIST_16X16 \ + &av1_fwd_txfm2d_16x16_c, &av1_inv_txfm2d_add_16x16_sse4_1, \ + &av1_inv_txfm2d_add_16x16_c, 256 + +const IHbdHtParam kArrayIhtParam[] = { + // 16x16 + make_tuple(PARAM_LIST_16X16, DCT_DCT, 10), + make_tuple(PARAM_LIST_16X16, DCT_DCT, 12), + make_tuple(PARAM_LIST_16X16, ADST_DCT, 10), + make_tuple(PARAM_LIST_16X16, ADST_DCT, 12), + make_tuple(PARAM_LIST_16X16, DCT_ADST, 10), + make_tuple(PARAM_LIST_16X16, DCT_ADST, 12), + make_tuple(PARAM_LIST_16X16, ADST_ADST, 10), + make_tuple(PARAM_LIST_16X16, ADST_ADST, 12), +#if CONFIG_EXT_TX + make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 10), + make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 12), + make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 10), + make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 12), + make_tuple(PARAM_LIST_16X16, FLIPADST_FLIPADST, 10), + make_tuple(PARAM_LIST_16X16, FLIPADST_FLIPADST, 12), + make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 10), + make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 12), + make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 10), + make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 12), +#endif + // 8x8 + make_tuple(PARAM_LIST_8X8, DCT_DCT, 10), + make_tuple(PARAM_LIST_8X8, DCT_DCT, 12), + make_tuple(PARAM_LIST_8X8, ADST_DCT, 10), + make_tuple(PARAM_LIST_8X8, ADST_DCT, 12), + make_tuple(PARAM_LIST_8X8, DCT_ADST, 10), + make_tuple(PARAM_LIST_8X8, DCT_ADST, 12), + make_tuple(PARAM_LIST_8X8, ADST_ADST, 10), + make_tuple(PARAM_LIST_8X8, ADST_ADST, 12), +#if CONFIG_EXT_TX + make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 10), + make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 12), + make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 10), + make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 12), + make_tuple(PARAM_LIST_8X8, FLIPADST_FLIPADST, 10), + make_tuple(PARAM_LIST_8X8, FLIPADST_FLIPADST, 12), + make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 10), + make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 12), + make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 10), + make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 12), +#endif + // 4x4 + make_tuple(PARAM_LIST_4X4, DCT_DCT, 10), + make_tuple(PARAM_LIST_4X4, DCT_DCT, 12), + make_tuple(PARAM_LIST_4X4, ADST_DCT, 10), + make_tuple(PARAM_LIST_4X4, ADST_DCT, 12), + make_tuple(PARAM_LIST_4X4, DCT_ADST, 10), + make_tuple(PARAM_LIST_4X4, DCT_ADST, 12), + make_tuple(PARAM_LIST_4X4, ADST_ADST, 10), + make_tuple(PARAM_LIST_4X4, ADST_ADST, 12), +#if CONFIG_EXT_TX + make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10), + make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12), + make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10), + make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12), + make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10), + make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12), + make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10), + make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12), + make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10), + make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12), +#endif +}; + +INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvHTNxN, + ::testing::ValuesIn(kArrayIhtParam)); +#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH + +#if HAVE_AVX2 && CONFIG_HIGHBITDEPTH +#define PARAM_LIST_32X32 \ + &av1_fwd_txfm2d_32x32_c, &av1_inv_txfm2d_add_32x32_avx2, \ + &av1_inv_txfm2d_add_32x32_c, 1024 + +const IHbdHtParam kArrayIhtParam32x32[] = { + // 32x32 + make_tuple(PARAM_LIST_32X32, DCT_DCT, 10), + make_tuple(PARAM_LIST_32X32, DCT_DCT, 12), +}; + +INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdInvHTNxN, + ::testing::ValuesIn(kArrayIhtParam32x32)); + +#endif // HAVE_AVX2 && CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/av1_inv_txfm1d_test.cc b/third_party/aom/test/av1_inv_txfm1d_test.cc new file mode 100644 index 000000000..9cf33a2fd --- /dev/null +++ b/third_party/aom/test/av1_inv_txfm1d_test.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/av1_txfm_test.h" +#include "av1/common/av1_fwd_txfm1d.h" +#include "av1/common/av1_inv_txfm1d.h" + +using libaom_test::ACMRandom; +using libaom_test::input_base; + +namespace { +const int txfm_type_num = 2; +const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 }; + +const TxfmFunc fwd_txfm_func_ls[][2] = { + { av1_fdct4_new, av1_fadst4_new }, + { av1_fdct8_new, av1_fadst8_new }, + { av1_fdct16_new, av1_fadst16_new }, + { av1_fdct32_new, av1_fadst32_new }, +#if CONFIG_TX64X64 + { av1_fdct64_new, NULL }, +#endif +}; + +const TxfmFunc inv_txfm_func_ls[][2] = { + { av1_idct4_new, av1_iadst4_new }, + { av1_idct8_new, av1_iadst8_new }, + { av1_idct16_new, av1_iadst16_new }, + { av1_idct32_new, av1_iadst32_new }, +#if CONFIG_TX64X64 + { av1_idct64_new, NULL }, +#endif +}; + +// the maximum stage number of fwd/inv 1d dct/adst txfm is 12 +const int8_t cos_bit[12] = { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 }; +const int8_t range_bit[12] = { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 }; + +#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0])) + +TEST(av1_inv_txfm1d, round_trip) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int si = 0; si < ARRAY_SIZE(fwd_txfm_func_ls); ++si) { + int txfm_size = txfm_size_ls[si]; + + for (int ti = 0; ti < txfm_type_num; ++ti) { + TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti]; + TxfmFunc inv_txfm_func = inv_txfm_func_ls[si][ti]; + int max_error = 2; + + if (!fwd_txfm_func) continue; + + const int count_test_block = 5000; + for (int ci = 0; ci < count_test_block; ++ci) { + int32_t input[64]; + int32_t output[64]; + int32_t round_trip_output[64]; + + assert(txfm_size <= ARRAY_SIZE(input)); + + for (int ni = 0; ni < txfm_size; ++ni) { + input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base; + } + + fwd_txfm_func(input, output, cos_bit, range_bit); + inv_txfm_func(output, round_trip_output, cos_bit, range_bit); + + for (int ni = 0; ni < txfm_size; ++ni) { + int node_err = + abs(input[ni] - round_shift(round_trip_output[ni], + get_max_bit(txfm_size) - 1)); + EXPECT_LE(node_err, max_error); + } + } + } + } +} + +} // namespace diff --git a/third_party/aom/test/av1_inv_txfm2d_test.cc b/third_party/aom/test/av1_inv_txfm2d_test.cc new file mode 100644 index 000000000..bb2743af1 --- /dev/null +++ b/third_party/aom/test/av1_inv_txfm2d_test.cc @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include "./av1_rtcd.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "test/av1_txfm_test.h" +#include "av1/common/av1_inv_txfm2d_cfg.h" + +using libaom_test::ACMRandom; +using libaom_test::input_base; +using libaom_test::bd; +using libaom_test::compute_avg_abs_error; +using libaom_test::Fwd_Txfm2d_Func; +using libaom_test::Inv_Txfm2d_Func; + +namespace { + +#if CONFIG_HIGHBITDEPTH +// AV1InvTxfm2dParam argument list: +// tx_type_, tx_size_, max_error_, max_avg_error_ +typedef std::tr1::tuple<TX_TYPE, TX_SIZE, int, double> AV1InvTxfm2dParam; + +class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> { + public: + virtual void SetUp() { + tx_type_ = GET_PARAM(0); + tx_size_ = GET_PARAM(1); + max_error_ = GET_PARAM(2); + max_avg_error_ = GET_PARAM(3); + txfm1d_size_ = libaom_test::get_txfm1d_size(tx_size_); + txfm2d_size_ = txfm1d_size_ * txfm1d_size_; + count_ = 500; + + input_ = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * txfm2d_size_)); + ref_input_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * txfm2d_size_)); + output_ = reinterpret_cast<int32_t *>( + aom_memalign(16, sizeof(int32_t) * txfm2d_size_)); + } + + void RunRoundtripCheck() { + const Fwd_Txfm2d_Func fwd_txfm_func = + libaom_test::fwd_txfm_func_ls[tx_size_]; + const Inv_Txfm2d_Func inv_txfm_func = + libaom_test::inv_txfm_func_ls[tx_size_]; + double avg_abs_error = 0; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int ci = 0; ci < count_; ci++) { + for (int ni = 0; ni < txfm2d_size_; ++ni) { + if (ci == 0) { + int extreme_input = input_base - 1; + input_[ni] = extreme_input; // extreme case + ref_input_[ni] = 0; + } else { + input_[ni] = rnd.Rand16() % input_base; + ref_input_[ni] = 0; + } + } + + fwd_txfm_func(input_, output_, txfm1d_size_, tx_type_, bd); + inv_txfm_func(output_, ref_input_, txfm1d_size_, tx_type_, bd); + + for (int ni = 0; ni < txfm2d_size_; ++ni) { + EXPECT_GE(max_error_, abs(input_[ni] - ref_input_[ni])); + } + avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>( + input_, ref_input_, txfm2d_size_); + } + + avg_abs_error /= count_; + // max_abs_avg_error comes from upper bound of + // printf("txfm1d_size: %d accuracy_avg_abs_error: %f\n", + // txfm1d_size_, avg_abs_error); + EXPECT_GE(max_avg_error_, avg_abs_error); + } + + virtual void TearDown() { + aom_free(input_); + aom_free(output_); + aom_free(ref_input_); + } + + private: + int count_; + int max_error_; + double max_avg_error_; + TX_TYPE tx_type_; + TX_SIZE tx_size_; + int txfm1d_size_; + int txfm2d_size_; + int16_t *input_; + uint16_t *ref_input_; + int32_t *output_; +}; + +TEST_P(AV1InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); } + +const AV1InvTxfm2dParam av1_inv_txfm2d_param[] = { +#if CONFIG_EXT_TX + AV1InvTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(ADST_FLIPADST, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(FLIPADST_ADST, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(FLIPADST_DCT, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(DCT_FLIPADST, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(ADST_FLIPADST, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(FLIPADST_ADST, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(FLIPADST_DCT, TX_16X16, 2, 0.04), + AV1InvTxfm2dParam(DCT_FLIPADST, TX_16X16, 2, 0.04), + AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_16X16, 11, 0.04), + AV1InvTxfm2dParam(ADST_FLIPADST, TX_16X16, 2, 0.04), + AV1InvTxfm2dParam(FLIPADST_ADST, TX_16X16, 2, 0.04), + AV1InvTxfm2dParam(FLIPADST_DCT, TX_32X32, 4, 0.4), + AV1InvTxfm2dParam(DCT_FLIPADST, TX_32X32, 4, 0.4), + AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_32X32, 4, 0.4), + AV1InvTxfm2dParam(ADST_FLIPADST, TX_32X32, 4, 0.4), + AV1InvTxfm2dParam(FLIPADST_ADST, TX_32X32, 4, 0.4), +#endif + AV1InvTxfm2dParam(DCT_DCT, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(ADST_DCT, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(DCT_ADST, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(ADST_ADST, TX_4X4, 2, 0.002), + AV1InvTxfm2dParam(DCT_DCT, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(ADST_DCT, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(DCT_ADST, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(ADST_ADST, TX_8X8, 2, 0.02), + AV1InvTxfm2dParam(DCT_DCT, TX_16X16, 2, 0.04), + AV1InvTxfm2dParam(ADST_DCT, TX_16X16, 2, 0.04), + AV1InvTxfm2dParam(DCT_ADST, TX_16X16, 2, 0.04), + AV1InvTxfm2dParam(ADST_ADST, TX_16X16, 2, 0.04), + AV1InvTxfm2dParam(DCT_DCT, TX_32X32, 4, 0.4), + AV1InvTxfm2dParam(ADST_DCT, TX_32X32, 4, 0.4), + AV1InvTxfm2dParam(DCT_ADST, TX_32X32, 4, 0.4), + AV1InvTxfm2dParam(ADST_ADST, TX_32X32, 4, 0.4) +}; + +INSTANTIATE_TEST_CASE_P(C, AV1InvTxfm2d, + ::testing::ValuesIn(av1_inv_txfm2d_param)); + +#endif // CONFIG_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/av1_inv_txfm_test.cc b/third_party/aom/test/av1_inv_txfm_test.cc new file mode 100644 index 000000000..af3fee872 --- /dev/null +++ b/third_party/aom/test/av1_inv_txfm_test.cc @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/blockd.h" +#include "av1/common/scan.h" +#include "aom/aom_integer.h" +#include "aom_dsp/inv_txfm.h" + +using libaom_test::ACMRandom; + +namespace { +const double kInvSqrt2 = 0.707106781186547524400844362104; + +void reference_idct_1d(const double *in, double *out, int size) { + for (int n = 0; n < size; ++n) { + out[n] = 0; + for (int k = 0; k < size; ++k) { + if (k == 0) + out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size)); + else + out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size)); + } + } +} + +typedef void (*IdctFuncRef)(const double *in, double *out, int size); +typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out); + +class TransTestBase { + public: + virtual ~TransTestBase() {} + + protected: + void RunInvAccuracyCheck() { + tran_low_t *input = new tran_low_t[txfm_size_]; + tran_low_t *output = new tran_low_t[txfm_size_]; + double *ref_input = new double[txfm_size_]; + double *ref_output = new double[txfm_size_]; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + for (int ti = 0; ti < count_test_block; ++ti) { + for (int ni = 0; ni < txfm_size_; ++ni) { + input[ni] = rnd.Rand8() - rnd.Rand8(); + ref_input[ni] = static_cast<double>(input[ni]); + } + + fwd_txfm_(input, output); + fwd_txfm_ref_(ref_input, ref_output, txfm_size_); + + for (int ni = 0; ni < txfm_size_; ++ni) { + EXPECT_LE( + abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))), + max_error_); + } + } + + delete[] input; + delete[] output; + delete[] ref_input; + delete[] ref_output; + } + + double max_error_; + int txfm_size_; + IdctFunc fwd_txfm_; + IdctFuncRef fwd_txfm_ref_; +}; + +typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam; +class AV1InvTxfm : public TransTestBase, + public ::testing::TestWithParam<IdctParam> { + public: + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + fwd_txfm_ref_ = GET_PARAM(1); + txfm_size_ = GET_PARAM(2); + max_error_ = GET_PARAM(3); + } + virtual void TearDown() {} +}; + +TEST_P(AV1InvTxfm, RunInvAccuracyCheck) { RunInvAccuracyCheck(); } + +INSTANTIATE_TEST_CASE_P( + C, AV1InvTxfm, + ::testing::Values(IdctParam(&aom_idct4_c, &reference_idct_1d, 4, 1), + IdctParam(&aom_idct8_c, &reference_idct_1d, 8, 2), + IdctParam(&aom_idct16_c, &reference_idct_1d, 16, 4), + IdctParam(&aom_idct32_c, &reference_idct_1d, 32, 6))); + +#if CONFIG_AV1_ENCODER +typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); +typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); +typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, InvTxfmFunc, TX_SIZE, int> + PartialInvTxfmParam; +#if !CONFIG_ADAPT_SCAN +const int kMaxNumCoeffs = 1024; +#endif +class AV1PartialIDctTest + : public ::testing::TestWithParam<PartialInvTxfmParam> { + public: + virtual ~AV1PartialIDctTest() {} + virtual void SetUp() { + ftxfm_ = GET_PARAM(0); + full_itxfm_ = GET_PARAM(1); + partial_itxfm_ = GET_PARAM(2); + tx_size_ = GET_PARAM(3); + last_nonzero_ = GET_PARAM(4); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + int last_nonzero_; + TX_SIZE tx_size_; + FwdTxfmFunc ftxfm_; + InvTxfmFunc full_itxfm_; + InvTxfmFunc partial_itxfm_; +}; + +#if !CONFIG_ADAPT_SCAN +TEST_P(AV1PartialIDctTest, RunQuantCheck) { + int size; + switch (tx_size_) { + case TX_4X4: size = 4; break; + case TX_8X8: size = 8; break; + case TX_16X16: size = 16; break; + case TX_32X32: size = 32; break; + default: FAIL() << "Wrong Size!"; break; + } + DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]); + + const int count_test_block = 1000; + const int block_size = size * size; + + DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]); + + int max_error = 0; + for (int m = 0; m < count_test_block; ++m) { + // clear out destination buffer + memset(dst1, 0, sizeof(*dst1) * block_size); + memset(dst2, 0, sizeof(*dst2) * block_size); + memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size); + memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (int n = 0; n < count_test_block; ++n) { + // Initialize a test block with input range [-255, 255]. + if (n == 0) { + for (int j = 0; j < block_size; ++j) input_extreme_block[j] = 255; + } else if (n == 1) { + for (int j = 0; j < block_size; ++j) input_extreme_block[j] = -255; + } else { + for (int j = 0; j < block_size; ++j) { + input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; + } + } + + ftxfm_(input_extreme_block, output_ref_block, size); + + // quantization with maximum allowed step sizes + test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336; + for (int j = 1; j < last_nonzero_; ++j) + test_coef_block1[get_scan((const AV1_COMMON *)NULL, tx_size_, DCT_DCT, + 0) + ->scan[j]] = (output_ref_block[j] / 1828) * 1828; + } + + ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size)); + ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size)); + + for (int j = 0; j < block_size; ++j) { + const int diff = dst1[j] - dst2[j]; + const int error = diff * diff; + if (max_error < error) max_error = error; + } + } + + EXPECT_EQ(0, max_error) + << "Error: partial inverse transform produces different results"; +} + +TEST_P(AV1PartialIDctTest, ResultsMatch) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int size; + switch (tx_size_) { + case TX_4X4: size = 4; break; + case TX_8X8: size = 8; break; + case TX_16X16: size = 16; break; + case TX_32X32: size = 32; break; + default: FAIL() << "Wrong Size!"; break; + } + DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]); + const int count_test_block = 1000; + const int max_coeff = 32766 / 4; + const int block_size = size * size; + int max_error = 0; + for (int i = 0; i < count_test_block; ++i) { + // clear out destination buffer + memset(dst1, 0, sizeof(*dst1) * block_size); + memset(dst2, 0, sizeof(*dst2) * block_size); + memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size); + memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size); + int max_energy_leftover = max_coeff * max_coeff; + for (int j = 0; j < last_nonzero_; ++j) { + int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) * + (rnd.Rand16() - 32768) / 65536); + max_energy_leftover -= coef * coef; + if (max_energy_leftover < 0) { + max_energy_leftover = 0; + coef = 0; + } + test_coef_block1[get_scan((const AV1_COMMON *)NULL, tx_size_, DCT_DCT, 0) + ->scan[j]] = coef; + } + + memcpy(test_coef_block2, test_coef_block1, + sizeof(*test_coef_block2) * block_size); + + ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size)); + ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size)); + + for (int j = 0; j < block_size; ++j) { + const int diff = dst1[j] - dst2[j]; + const int error = diff * diff; + if (max_error < error) max_error = error; + } + } + + EXPECT_EQ(0, max_error) + << "Error: partial inverse transform produces different results"; +} +#endif +using std::tr1::make_tuple; + +INSTANTIATE_TEST_CASE_P( + C, AV1PartialIDctTest, + ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c, + &aom_idct32x32_34_add_c, TX_32X32, 34), + make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c, + &aom_idct32x32_1_add_c, TX_32X32, 1), + make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_c, + &aom_idct16x16_10_add_c, TX_16X16, 10), + make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_c, + &aom_idct16x16_1_add_c, TX_16X16, 1), + make_tuple(&aom_fdct8x8_c, &aom_idct8x8_64_add_c, + &aom_idct8x8_12_add_c, TX_8X8, 12), + make_tuple(&aom_fdct8x8_c, &aom_idct8x8_64_add_c, + &aom_idct8x8_1_add_c, TX_8X8, 1), + make_tuple(&aom_fdct4x4_c, &aom_idct4x4_16_add_c, + &aom_idct4x4_1_add_c, TX_4X4, 1))); +#endif // CONFIG_AV1_ENCODER +} // namespace diff --git a/third_party/aom/test/av1_quantize_test.cc b/third_party/aom/test/av1_quantize_test.cc new file mode 100644 index 000000000..b5d1531f5 --- /dev/null +++ b/third_party/aom/test/av1_quantize_test.cc @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <stdlib.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./av1_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "av1/common/scan.h" + +namespace { + +typedef void (*QuantizeFpFunc)( + const tran_low_t *coeff_ptr, intptr_t count, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, int log_scale); + +struct QuantizeFuncParams { + QuantizeFuncParams(QuantizeFpFunc qF = NULL, QuantizeFpFunc qRefF = NULL, + int count = 16) + : qFunc(qF), qFuncRef(qRefF), coeffCount(count) {} + QuantizeFpFunc qFunc; + QuantizeFpFunc qFuncRef; + int coeffCount; +}; + +using libaom_test::ACMRandom; + +const int numTests = 1000; +const int maxSize = 1024; +const int roundFactorRange = 127; +const int dequantRange = 32768; +const int coeffRange = (1 << 20) - 1; + +class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> { + public: + void RunQuantizeTest() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]); + DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]); + DECLARE_ALIGNED(16, int16_t, round_ptr[2]); + DECLARE_ALIGNED(16, int16_t, quant_ptr[2]); + DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]); + DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]); + uint16_t eob; + uint16_t ref_eob; + int err_count_total = 0; + int first_failure = -1; + int skip_block = 0; + int count = params_.coeffCount; + const TX_SIZE txSize = getTxSize(count); + int log_scale = (txSize == TX_32X32); + QuantizeFpFunc quanFunc = params_.qFunc; + QuantizeFpFunc quanFuncRef = params_.qFuncRef; + + const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize]; + for (int i = 0; i < numTests; i++) { + int err_count = 0; + ref_eob = eob = -1; + for (int j = 0; j < count; j++) { + coeff_ptr[j] = rnd(coeffRange); + } + + for (int j = 0; j < 2; j++) { + zbin_ptr[j] = rnd.Rand16(); + quant_shift_ptr[j] = rnd.Rand16(); + // int16_t positive + dequant_ptr[j] = abs(rnd(dequantRange)); + quant_ptr[j] = (1 << 16) / dequant_ptr[j]; + round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7; + } + + quanFuncRef(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr, + &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale); + + ASM_REGISTER_STATE_CHECK( + quanFunc(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob, + scanOrder.scan, scanOrder.iscan, log_scale)); + + for (int j = 0; j < count; ++j) { + err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) | + (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]); + EXPECT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j]) << "qcoeff error: i = " << i + << " j = " << j << "\n"; + EXPECT_EQ(ref_dqcoeff_ptr[j], dqcoeff_ptr[j]) + << "dqcoeff error: i = " << i << " j = " << j << "\n"; + } + EXPECT_EQ(ref_eob, eob) << "eob error: " + << "i = " << i << "\n"; + err_count += (ref_eob != eob); + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Quantization Test, C output doesn't match SSE2 output. " + << "First failed at test case " << first_failure; + } + + void RunEobTest() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]); + DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]); + DECLARE_ALIGNED(16, int16_t, round_ptr[2]); + DECLARE_ALIGNED(16, int16_t, quant_ptr[2]); + DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]); + DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]); + uint16_t eob; + uint16_t ref_eob; + int skip_block = 0; + int count = params_.coeffCount; + const TX_SIZE txSize = getTxSize(count); + int log_scale = (txSize == TX_32X32); + QuantizeFpFunc quanFunc = params_.qFunc; + QuantizeFpFunc quanFuncRef = params_.qFuncRef; + const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize]; + + for (int i = 0; i < numTests; i++) { + ref_eob = eob = -1; + for (int j = 0; j < count; j++) { + coeff_ptr[j] = 0; + } + + coeff_ptr[rnd(count)] = rnd(coeffRange); + coeff_ptr[rnd(count)] = rnd(coeffRange); + coeff_ptr[rnd(count)] = rnd(coeffRange); + + for (int j = 0; j < 2; j++) { + zbin_ptr[j] = rnd.Rand16(); + quant_shift_ptr[j] = rnd.Rand16(); + // int16_t positive + dequant_ptr[j] = abs(rnd(dequantRange)); + quant_ptr[j] = (1 << 16) / dequant_ptr[j]; + round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7; + } + + quanFuncRef(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr, + &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale); + + ASM_REGISTER_STATE_CHECK( + quanFunc(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob, + scanOrder.scan, scanOrder.iscan, log_scale)); + EXPECT_EQ(ref_eob, eob) << "eob error: " + << "i = " << i << "\n"; + } + } + + virtual void SetUp() { params_ = GetParam(); } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + virtual ~AV1QuantizeTest() {} + + private: + TX_SIZE getTxSize(int count) { + switch (count) { + case 16: return TX_4X4; + case 64: return TX_8X8; + case 256: return TX_16X16; + case 1024: return TX_32X32; + default: return TX_4X4; + } + } + + QuantizeFuncParams params_; +}; + +TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); } +TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); } + +#if HAVE_SSE4_1 +#if !CONFIG_AOM_QM +INSTANTIATE_TEST_CASE_P( + SSE4_1, AV1QuantizeTest, + ::testing::Values(QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, + &av1_highbd_quantize_fp_c, 16), + QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, + &av1_highbd_quantize_fp_c, 64), + QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, + &av1_highbd_quantize_fp_c, 256), + QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, + &av1_highbd_quantize_fp_c, 1024))); +#endif // !CONFIG_AOM_QM +#endif // HAVE_SSE4_1 +} // namespace diff --git a/third_party/aom/test/av1_txfm_test.cc b/third_party/aom/test/av1_txfm_test.cc new file mode 100644 index 000000000..1e473b304 --- /dev/null +++ b/third_party/aom/test/av1_txfm_test.cc @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdio.h> +#include "test/av1_txfm_test.h" + +namespace libaom_test { + +int get_txfm1d_size(TX_SIZE tx_size) { return tx_size_wide[tx_size]; } + +void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) { + switch (txfm2d_type) { + case DCT_DCT: + *type0 = TYPE_DCT; + *type1 = TYPE_DCT; + break; + case ADST_DCT: + *type0 = TYPE_ADST; + *type1 = TYPE_DCT; + break; + case DCT_ADST: + *type0 = TYPE_DCT; + *type1 = TYPE_ADST; + break; + case ADST_ADST: + *type0 = TYPE_ADST; + *type1 = TYPE_ADST; + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + *type0 = TYPE_ADST; + *type1 = TYPE_DCT; + break; + case DCT_FLIPADST: + *type0 = TYPE_DCT; + *type1 = TYPE_ADST; + break; + case FLIPADST_FLIPADST: + *type0 = TYPE_ADST; + *type1 = TYPE_ADST; + break; + case ADST_FLIPADST: + *type0 = TYPE_ADST; + *type1 = TYPE_ADST; + break; + case FLIPADST_ADST: + *type0 = TYPE_ADST; + *type1 = TYPE_ADST; + break; +#endif // CONFIG_EXT_TX + default: + *type0 = TYPE_DCT; + *type1 = TYPE_DCT; + assert(0); + break; + } +} + +double invSqrt2 = 1 / pow(2, 0.5); + +void reference_dct_1d(const double *in, double *out, int size) { + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (2 * size)); + } + if (k == 0) out[k] = out[k] * invSqrt2; + } +} + +void reference_adst_1d(const double *in, double *out, int size) { + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size)); + } + } +} + +void reference_hybrid_1d(double *in, double *out, int size, int type) { + if (type == TYPE_DCT) + reference_dct_1d(in, out, size); + else + reference_adst_1d(in, out, size); +} + +void reference_hybrid_2d(double *in, double *out, int size, int type0, + int type1) { + double *tempOut = new double[size * size]; + + for (int r = 0; r < size; r++) { + // out ->tempOut + for (int c = 0; c < size; c++) { + tempOut[r * size + c] = in[c * size + r]; + } + } + + // dct each row: in -> out + for (int r = 0; r < size; r++) { + reference_hybrid_1d(tempOut + r * size, out + r * size, size, type0); + } + + for (int r = 0; r < size; r++) { + // out ->tempOut + for (int c = 0; c < size; c++) { + tempOut[r * size + c] = out[c * size + r]; + } + } + + for (int r = 0; r < size; r++) { + reference_hybrid_1d(tempOut + r * size, out + r * size, size, type1); + } + delete[] tempOut; +} + +template <typename Type> +void fliplr(Type *dest, int stride, int length) { + int i, j; + for (i = 0; i < length; ++i) { + for (j = 0; j < length / 2; ++j) { + const Type tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[i * stride + length - 1 - j]; + dest[i * stride + length - 1 - j] = tmp; + } + } +} + +template <typename Type> +void flipud(Type *dest, int stride, int length) { + int i, j; + for (j = 0; j < length; ++j) { + for (i = 0; i < length / 2; ++i) { + const Type tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(length - 1 - i) * stride + j]; + dest[(length - 1 - i) * stride + j] = tmp; + } + } +} + +template <typename Type> +void fliplrud(Type *dest, int stride, int length) { + int i, j; + for (i = 0; i < length / 2; ++i) { + for (j = 0; j < length; ++j) { + const Type tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(length - 1 - i) * stride + length - 1 - j]; + dest[(length - 1 - i) * stride + length - 1 - j] = tmp; + } + } +} + +template void fliplr<double>(double *dest, int stride, int length); +template void flipud<double>(double *dest, int stride, int length); +template void fliplrud<double>(double *dest, int stride, int length); + +} // namespace libaom_test diff --git a/third_party/aom/test/av1_txfm_test.h b/third_party/aom/test/av1_txfm_test.h new file mode 100644 index 000000000..70f971d09 --- /dev/null +++ b/third_party/aom/test/av1_txfm_test.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AV1_TXFM_TEST_H_ +#define AV1_TXFM_TEST_H_ + +#include <stdio.h> +#include <stdlib.h> +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#endif +#include <math.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "av1/common/enums.h" +#include "av1/common/av1_txfm.h" +#include "./av1_rtcd.h" + +namespace libaom_test { +typedef enum { + TYPE_DCT = 0, + TYPE_ADST, + TYPE_IDCT, + TYPE_IADST, + TYPE_LAST +} TYPE_TXFM; + +int get_txfm1d_size(TX_SIZE tx_size); + +void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1); + +void reference_dct_1d(const double *in, double *out, int size); + +void reference_adst_1d(const double *in, double *out, int size); + +void reference_hybrid_1d(double *in, double *out, int size, int type); + +void reference_hybrid_2d(double *in, double *out, int size, int type0, + int type1); +template <typename Type1, typename Type2> +static double compute_avg_abs_error(const Type1 *a, const Type2 *b, + const int size) { + double error = 0; + for (int i = 0; i < size; i++) { + error += fabs(static_cast<double>(a[i]) - static_cast<double>(b[i])); + } + error = error / size; + return error; +} + +template <typename Type> +void fliplr(Type *dest, int stride, int length); + +template <typename Type> +void flipud(Type *dest, int stride, int length); + +template <typename Type> +void fliplrud(Type *dest, int stride, int length); + +typedef void (*TxfmFunc)(const int32_t *in, int32_t *out, const int8_t *cos_bit, + const int8_t *range_bit); + +typedef void (*Fwd_Txfm2d_Func)(const int16_t *, int32_t *, int, int, int); +typedef void (*Inv_Txfm2d_Func)(const int32_t *, uint16_t *, int, int, int); + +static const int bd = 10; +static const int input_base = (1 << bd); + +#if CONFIG_HIGHBITDEPTH +#if CONFIG_AV1_ENCODER +static const Fwd_Txfm2d_Func fwd_txfm_func_ls[TX_SIZES] = { +#if CONFIG_CB4X4 + NULL, +#endif + av1_fwd_txfm2d_4x4_c, av1_fwd_txfm2d_8x8_c, av1_fwd_txfm2d_16x16_c, + av1_fwd_txfm2d_32x32_c +}; +#endif + +static const Inv_Txfm2d_Func inv_txfm_func_ls[TX_SIZES] = { +#if CONFIG_CB4X4 + NULL, +#endif + av1_inv_txfm2d_add_4x4_c, av1_inv_txfm2d_add_8x8_c, + av1_inv_txfm2d_add_16x16_c, av1_inv_txfm2d_add_32x32_c +}; +#endif // CONFIG_HIGHBITDEPTH + +} // namespace libaom_test +#endif // AV1_TXFM_TEST_H_ diff --git a/third_party/aom/test/av1_wedge_utils_test.cc b/third_party/aom/test/av1_wedge_utils_test.cc new file mode 100644 index 000000000..d4b560fc1 --- /dev/null +++ b/third_party/aom/test/av1_wedge_utils_test.cc @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" + +#include "./aom_dsp_rtcd.h" +#include "./av1_rtcd.h" + +#include "aom_dsp/aom_dsp_common.h" + +#include "av1/common/enums.h" + +#include "test/acm_random.h" +#include "test/function_equivalence_test.h" +#include "test/register_state_check.h" + +#define WEDGE_WEIGHT_BITS 6 +#define MAX_MASK_VALUE (1 << (WEDGE_WEIGHT_BITS)) + +using libaom_test::ACMRandom; +using libaom_test::FunctionEquivalenceTest; + +namespace { + +static const int16_t kInt13Max = (1 << 12) - 1; + +////////////////////////////////////////////////////////////////////////////// +// av1_wedge_sse_from_residuals - functionality +////////////////////////////////////////////////////////////////////////////// + +class WedgeUtilsSSEFuncTest : public testing::Test { + protected: + WedgeUtilsSSEFuncTest() : rng_(ACMRandom::DeterministicSeed()) {} + + static const int kIterations = 1000; + + ACMRandom rng_; +}; + +static void equiv_blend_residuals(int16_t *r, const int16_t *r0, + const int16_t *r1, const uint8_t *m, int N) { + for (int i = 0; i < N; i++) { + const int32_t m0 = m[i]; + const int32_t m1 = MAX_MASK_VALUE - m0; + const int16_t R = m0 * r0[i] + m1 * r1[i]; + // Note that this rounding is designed to match the result + // you would get when actually blending the 2 predictors and computing + // the residuals. + r[i] = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS); + } +} + +static uint64_t equiv_sse_from_residuals(const int16_t *r0, const int16_t *r1, + const uint8_t *m, int N) { + uint64_t acc = 0; + for (int i = 0; i < N; i++) { + const int32_t m0 = m[i]; + const int32_t m1 = MAX_MASK_VALUE - m0; + const int16_t R = m0 * r0[i] + m1 * r1[i]; + const int32_t r = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS); + acc += r * r; + } + return acc; +} + +TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingEquiv) { + DECLARE_ALIGNED(32, uint8_t, s[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, p0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, p1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, p[MAX_SB_SQUARE]); + + DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r_ref[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r_tst[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + s[i] = rng_.Rand8(); + m[i] = rng_(MAX_MASK_VALUE + 1); + } + + const int w = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3); + const int h = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3); + const int N = w * h; + + for (int j = 0; j < N; j++) { + p0[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX); + p1[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX); + } + + aom_blend_a64_mask(p, w, p0, w, p1, w, m, w, h, w, 0, 0); + + aom_subtract_block(h, w, r0, w, s, w, p0, w); + aom_subtract_block(h, w, r1, w, s, w, p1, w); + + aom_subtract_block(h, w, r_ref, w, s, w, p, w); + equiv_blend_residuals(r_tst, r0, r1, m, N); + + for (int i = 0; i < N; ++i) ASSERT_EQ(r_ref[i], r_tst[i]); + + uint64_t ref_sse = aom_sum_squares_i16(r_ref, N); + uint64_t tst_sse = equiv_sse_from_residuals(r0, r1, m, N); + + ASSERT_EQ(ref_sse, tst_sse); + } +} + +static uint64_t sse_from_residuals(const int16_t *r0, const int16_t *r1, + const uint8_t *m, int N) { + uint64_t acc = 0; + for (int i = 0; i < N; i++) { + const int32_t m0 = m[i]; + const int32_t m1 = MAX_MASK_VALUE - m0; + const int32_t r = m0 * r0[i] + m1 * r1[i]; + acc += r * r; + } + return ROUND_POWER_OF_TWO(acc, 2 * WEDGE_WEIGHT_BITS); +} + +TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingMethod) { + DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r1[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN; + d[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN; + m[i] = rng_(MAX_MASK_VALUE + 1); + } + + const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); + + for (int i = 0; i < N; i++) r0[i] = r1[i] + d[i]; + + const uint64_t ref_res = sse_from_residuals(r0, r1, m, N); + const uint64_t tst_res = av1_wedge_sse_from_residuals(r1, d, m, N); + + ASSERT_EQ(ref_res, tst_res); + } +} + +////////////////////////////////////////////////////////////////////////////// +// av1_wedge_sse_from_residuals - optimizations +////////////////////////////////////////////////////////////////////////////// + +typedef uint64_t (*FSSE)(const int16_t *r1, const int16_t *d, const uint8_t *m, + int N); +typedef libaom_test::FuncParam<FSSE> TestFuncsFSSE; + +class WedgeUtilsSSEOptTest : public FunctionEquivalenceTest<FSSE> { + protected: + static const int kIterations = 10000; +}; + +TEST_P(WedgeUtilsSSEOptTest, RandomValues) { + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max; + d[i] = rng_(2 * kInt13Max + 1) - kInt13Max; + m[i] = rng_(MAX_MASK_VALUE + 1); + } + + const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); + + const uint64_t ref_res = params_.ref_func(r1, d, m, N); + uint64_t tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(WedgeUtilsSSEOptTest, ExtremeValues) { + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + if (rng_(2)) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = kInt13Max; + } else { + for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = -kInt13Max; + } + + if (rng_(2)) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = kInt13Max; + } else { + for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = -kInt13Max; + } + + for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE; + + const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); + + const uint64_t ref_res = params_.ref_func(r1, d, m, N); + uint64_t tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, WedgeUtilsSSEOptTest, + ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c, + av1_wedge_sse_from_residuals_sse2))); + +#endif // HAVE_SSE2 + +////////////////////////////////////////////////////////////////////////////// +// av1_wedge_sign_from_residuals +////////////////////////////////////////////////////////////////////////////// + +typedef int (*FSign)(const int16_t *ds, const uint8_t *m, int N, int64_t limit); +typedef libaom_test::FuncParam<FSign> TestFuncsFSign; + +class WedgeUtilsSignOptTest : public FunctionEquivalenceTest<FSign> { + protected: + static const int kIterations = 10000; + static const int kMaxSize = 8196; // Size limited by SIMD implementation. +}; + +TEST_P(WedgeUtilsSignOptTest, RandomValues) { + DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = rng_(2 * kInt13Max + 1) - kInt13Max; + r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max; + m[i] = rng_(MAX_MASK_VALUE + 1); + } + + const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE); + const int N = 64 * (rng_(maxN / 64 - 1) + 1); + + int64_t limit; + limit = (int64_t)aom_sum_squares_i16(r0, N); + limit -= (int64_t)aom_sum_squares_i16(r1, N); + limit *= (1 << WEDGE_WEIGHT_BITS) / 2; + + for (int i = 0; i < N; i++) + ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX); + + const int ref_res = params_.ref_func(ds, m, N, limit); + int tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(WedgeUtilsSignOptTest, ExtremeValues) { + DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + switch (rng_(4)) { + case 0: + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = 0; + r1[i] = kInt13Max; + } + break; + case 1: + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = kInt13Max; + r1[i] = 0; + } + break; + case 2: + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = 0; + r1[i] = -kInt13Max; + } + break; + default: + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = -kInt13Max; + r1[i] = 0; + } + break; + } + + for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE; + + const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE); + const int N = 64 * (rng_(maxN / 64 - 1) + 1); + + int64_t limit; + limit = (int64_t)aom_sum_squares_i16(r0, N); + limit -= (int64_t)aom_sum_squares_i16(r1, N); + limit *= (1 << WEDGE_WEIGHT_BITS) / 2; + + for (int i = 0; i < N; i++) + ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX); + + const int ref_res = params_.ref_func(ds, m, N, limit); + int tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +#if HAVE_SSE2 + +INSTANTIATE_TEST_CASE_P( + SSE2, WedgeUtilsSignOptTest, + ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c, + av1_wedge_sign_from_residuals_sse2))); + +#endif // HAVE_SSE2 + +////////////////////////////////////////////////////////////////////////////// +// av1_wedge_compute_delta_squares +////////////////////////////////////////////////////////////////////////////// + +typedef void (*FDS)(int16_t *d, const int16_t *a, const int16_t *b, int N); +typedef libaom_test::FuncParam<FDS> TestFuncsFDS; + +class WedgeUtilsDeltaSquaresOptTest : public FunctionEquivalenceTest<FDS> { + protected: + static const int kIterations = 10000; +}; + +TEST_P(WedgeUtilsDeltaSquaresOptTest, RandomValues) { + DECLARE_ALIGNED(32, int16_t, a[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, b[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d_ref[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d_tst[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + a[i] = rng_.Rand16(); + b[i] = rng_(2 * INT16_MAX + 1) - INT16_MAX; + } + + const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); + + memset(&d_ref, INT16_MAX, sizeof(d_ref)); + memset(&d_tst, INT16_MAX, sizeof(d_tst)); + + params_.ref_func(d_ref, a, b, N); + ASM_REGISTER_STATE_CHECK(params_.tst_func(d_tst, a, b, N)); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) ASSERT_EQ(d_ref[i], d_tst[i]); + } +} + +#if HAVE_SSE2 + +INSTANTIATE_TEST_CASE_P( + SSE2, WedgeUtilsDeltaSquaresOptTest, + ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c, + av1_wedge_compute_delta_squares_sse2))); + +#endif // HAVE_SSE2 + +} // namespace diff --git a/third_party/aom/test/avg_test.cc b/third_party/aom/test/avg_test.cc new file mode 100644 index 000000000..b040f6a34 --- /dev/null +++ b/third_party/aom/test/avg_test.cc @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <limits.h> +#include <stdio.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "aom_mem/aom_mem.h" + +using libaom_test::ACMRandom; + +namespace { +class AverageTestBase : public ::testing::Test { + public: + AverageTestBase(int width, int height) : width_(width), height_(height) {} + + static void SetUpTestCase() { + source_data_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBlockSize)); + } + + static void TearDownTestCase() { + aom_free(source_data_); + source_data_ = NULL; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + // Handle blocks up to 4 blocks 64x64 with stride up to 128 + static const int kDataAlignment = 16; + static const int kDataBlockSize = 64 * 128; + + virtual void SetUp() { + source_stride_ = (width_ + 31) & ~31; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + // Sum Pixels + static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) { + unsigned int average = 0; + for (int h = 0; h < 8; ++h) + for (int w = 0; w < 8; ++w) average += source[h * pitch + w]; + return ((average + 32) >> 6); + } + + static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) { + unsigned int average = 0; + for (int h = 0; h < 4; ++h) + for (int w = 0; w < 4; ++w) average += source[h * pitch + w]; + return ((average + 8) >> 4); + } + + void FillConstant(uint8_t fill_constant) { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = fill_constant; + } + } + + void FillRandom() { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = rnd_.Rand8(); + } + } + + int width_, height_; + static uint8_t *source_data_; + int source_stride_; + + ACMRandom rnd_; +}; +typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch); + +typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc; + +class AverageTest : public AverageTestBase, + public ::testing::WithParamInterface<AvgFunc> { + public: + AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} + + protected: + void CheckAverages() { + const int block_size = GET_PARAM(3); + unsigned int expected = 0; + if (block_size == 8) { + expected = + ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_); + } else if (block_size == 4) { + expected = + ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_); + } + + ASM_REGISTER_STATE_CHECK( + GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_)); + unsigned int actual = + GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_); + + EXPECT_EQ(expected, actual); + } +}; + +typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref, + const int ref_stride, const int height); + +typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam; + +class IntProRowTest : public AverageTestBase, + public ::testing::WithParamInterface<IntProRowParam> { + public: + IntProRowTest() + : AverageTestBase(16, GET_PARAM(0)), hbuf_asm_(NULL), hbuf_c_(NULL) { + asm_func_ = GET_PARAM(1); + c_func_ = GET_PARAM(2); + } + + protected: + virtual void SetUp() { + hbuf_asm_ = reinterpret_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16)); + hbuf_c_ = reinterpret_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16)); + } + + virtual void TearDown() { + aom_free(hbuf_c_); + hbuf_c_ = NULL; + aom_free(hbuf_asm_); + hbuf_asm_ = NULL; + } + + void RunComparison() { + ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_)); + ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_)); + EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16)) + << "Output mismatch"; + } + + private: + IntProRowFunc asm_func_; + IntProRowFunc c_func_; + int16_t *hbuf_asm_; + int16_t *hbuf_c_; +}; + +typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width); + +typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam; + +class IntProColTest : public AverageTestBase, + public ::testing::WithParamInterface<IntProColParam> { + public: + IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) { + asm_func_ = GET_PARAM(1); + c_func_ = GET_PARAM(2); + } + + protected: + void RunComparison() { + ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_)); + ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_)); + EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch"; + } + + private: + IntProColFunc asm_func_; + IntProColFunc c_func_; + int16_t sum_asm_; + int16_t sum_c_; +}; + +typedef int (*SatdFunc)(const int16_t *coeffs, int length); +typedef std::tr1::tuple<int, SatdFunc> SatdTestParam; + +class SatdTest : public ::testing::Test, + public ::testing::WithParamInterface<SatdTestParam> { + protected: + virtual void SetUp() { + satd_size_ = GET_PARAM(0); + satd_func_ = GET_PARAM(1); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(*src_) * satd_size_)); + ASSERT_TRUE(src_ != NULL); + } + + virtual void TearDown() { + libaom_test::ClearSystemState(); + aom_free(src_); + } + + void FillConstant(const int16_t val) { + for (int i = 0; i < satd_size_; ++i) src_[i] = val; + } + + void FillRandom() { + for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16(); + } + + void Check(int expected) { + int total; + ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_)); + EXPECT_EQ(expected, total); + } + + int satd_size_; + + private: + int16_t *src_; + SatdFunc satd_func_; + ACMRandom rnd_; +}; + +uint8_t *AverageTestBase::source_data_ = NULL; + +TEST_P(AverageTest, MinValue) { + FillConstant(0); + CheckAverages(); +} + +TEST_P(AverageTest, MaxValue) { + FillConstant(255); + CheckAverages(); +} + +TEST_P(AverageTest, Random) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + for (int i = 0; i < 1000; i++) { + FillRandom(); + CheckAverages(); + } +} + +TEST_P(IntProRowTest, MinValue) { + FillConstant(0); + RunComparison(); +} + +TEST_P(IntProRowTest, MaxValue) { + FillConstant(255); + RunComparison(); +} + +TEST_P(IntProRowTest, Random) { + FillRandom(); + RunComparison(); +} + +TEST_P(IntProColTest, MinValue) { + FillConstant(0); + RunComparison(); +} + +TEST_P(IntProColTest, MaxValue) { + FillConstant(255); + RunComparison(); +} + +TEST_P(IntProColTest, Random) { + FillRandom(); + RunComparison(); +} + +TEST_P(SatdTest, MinValue) { + const int kMin = -32640; + const int expected = -kMin * satd_size_; + FillConstant(kMin); + Check(expected); +} + +TEST_P(SatdTest, MaxValue) { + const int kMax = 32640; + const int expected = kMax * satd_size_; + FillConstant(kMax); + Check(expected); +} + +TEST_P(SatdTest, Random) { + int expected; + switch (satd_size_) { + case 16: expected = 205298; break; + case 64: expected = 1113950; break; + case 256: expected = 4268415; break; + case 1024: expected = 16954082; break; + default: + FAIL() << "Invalid satd size (" << satd_size_ + << ") valid: 16/64/256/1024"; + } + FillRandom(); + Check(expected); +} + +using std::tr1::make_tuple; + +INSTANTIATE_TEST_CASE_P( + C, AverageTest, + ::testing::Values(make_tuple(16, 16, 1, 8, &aom_avg_8x8_c), + make_tuple(16, 16, 1, 4, &aom_avg_4x4_c))); + +INSTANTIATE_TEST_CASE_P(C, SatdTest, + ::testing::Values(make_tuple(16, &aom_satd_c), + make_tuple(64, &aom_satd_c), + make_tuple(256, &aom_satd_c), + make_tuple(1024, &aom_satd_c))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, AverageTest, + ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_sse2), + make_tuple(16, 16, 5, 8, &aom_avg_8x8_sse2), + make_tuple(32, 32, 15, 8, &aom_avg_8x8_sse2), + make_tuple(16, 16, 0, 4, &aom_avg_4x4_sse2), + make_tuple(16, 16, 5, 4, &aom_avg_4x4_sse2), + make_tuple(32, 32, 15, 4, &aom_avg_4x4_sse2))); + +INSTANTIATE_TEST_CASE_P( + SSE2, IntProRowTest, + ::testing::Values(make_tuple(16, &aom_int_pro_row_sse2, &aom_int_pro_row_c), + make_tuple(32, &aom_int_pro_row_sse2, &aom_int_pro_row_c), + make_tuple(64, &aom_int_pro_row_sse2, + &aom_int_pro_row_c))); + +INSTANTIATE_TEST_CASE_P( + SSE2, IntProColTest, + ::testing::Values(make_tuple(16, &aom_int_pro_col_sse2, &aom_int_pro_col_c), + make_tuple(32, &aom_int_pro_col_sse2, &aom_int_pro_col_c), + make_tuple(64, &aom_int_pro_col_sse2, + &aom_int_pro_col_c))); + +INSTANTIATE_TEST_CASE_P(SSE2, SatdTest, + ::testing::Values(make_tuple(16, &aom_satd_sse2), + make_tuple(64, &aom_satd_sse2), + make_tuple(256, &aom_satd_sse2), + make_tuple(1024, &aom_satd_sse2))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, AverageTest, + ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_neon), + make_tuple(16, 16, 5, 8, &aom_avg_8x8_neon), + make_tuple(32, 32, 15, 8, &aom_avg_8x8_neon), + make_tuple(16, 16, 0, 4, &aom_avg_4x4_neon), + make_tuple(16, 16, 5, 4, &aom_avg_4x4_neon), + make_tuple(32, 32, 15, 4, &aom_avg_4x4_neon))); + +INSTANTIATE_TEST_CASE_P( + NEON, IntProRowTest, + ::testing::Values(make_tuple(16, &aom_int_pro_row_neon, &aom_int_pro_row_c), + make_tuple(32, &aom_int_pro_row_neon, &aom_int_pro_row_c), + make_tuple(64, &aom_int_pro_row_neon, + &aom_int_pro_row_c))); + +INSTANTIATE_TEST_CASE_P( + NEON, IntProColTest, + ::testing::Values(make_tuple(16, &aom_int_pro_col_neon, &aom_int_pro_col_c), + make_tuple(32, &aom_int_pro_col_neon, &aom_int_pro_col_c), + make_tuple(64, &aom_int_pro_col_neon, + &aom_int_pro_col_c))); + +INSTANTIATE_TEST_CASE_P(NEON, SatdTest, + ::testing::Values(make_tuple(16, &aom_satd_neon), + make_tuple(64, &aom_satd_neon), + make_tuple(256, &aom_satd_neon), + make_tuple(1024, &aom_satd_neon))); +#endif + +#if HAVE_MSA +INSTANTIATE_TEST_CASE_P( + MSA, AverageTest, + ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_msa), + make_tuple(16, 16, 5, 8, &aom_avg_8x8_msa), + make_tuple(32, 32, 15, 8, &aom_avg_8x8_msa), + make_tuple(16, 16, 0, 4, &aom_avg_4x4_msa), + make_tuple(16, 16, 5, 4, &aom_avg_4x4_msa), + make_tuple(32, 32, 15, 4, &aom_avg_4x4_msa))); +#endif + +} // namespace diff --git a/third_party/aom/test/binary_codes_test.cc b/third_party/aom/test/binary_codes_test.cc new file mode 100644 index 000000000..385ec7687 --- /dev/null +++ b/third_party/aom/test/binary_codes_test.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "aom/aom_integer.h" +#include "aom_dsp/bitreader.h" +#include "aom_dsp/bitwriter.h" +#include "aom_dsp/binary_codes_reader.h" +#include "aom_dsp/binary_codes_writer.h" + +using libaom_test::ACMRandom; + +namespace { + +// Test for Bilevel code with reference +TEST(AV1, TestPrimitiveRefbilivel) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int kBufferSize = 65536; + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + const uint16_t kRanges = 8; + const uint16_t kNearRanges = 8; + const uint16_t kReferences = 8; + const uint16_t kValues = 16; + const uint16_t range_vals[kRanges] = { 1, 13, 64, 120, 230, 420, 1100, 8000 }; + uint16_t enc_values[kRanges][kNearRanges][kReferences][kValues][4]; + aom_start_encode(&bw, bw_buffer); + for (int n = 0; n < kRanges; ++n) { + const uint16_t range = range_vals[n]; + for (int p = 0; p < kNearRanges; ++p) { + const uint16_t near_range = 1 + rnd(range); + for (int r = 0; r < kReferences; ++r) { + const uint16_t ref = rnd(range); + for (int v = 0; v < kValues; ++v) { + const uint16_t value = rnd(range); + enc_values[n][p][r][v][0] = range; + enc_values[n][p][r][v][1] = near_range; + enc_values[n][p][r][v][2] = ref; + enc_values[n][p][r][v][3] = value; + aom_write_primitive_refbilevel(&bw, range, near_range, ref, value); + } + } + } + } + aom_stop_encode(&bw); + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL); + GTEST_ASSERT_GE(aom_reader_tell(&br), 0u); + GTEST_ASSERT_LE(aom_reader_tell(&br), 1u); + for (int n = 0; n < kRanges; ++n) { + for (int p = 0; p < kNearRanges; ++p) { + for (int r = 0; r < kReferences; ++r) { + for (int v = 0; v < kValues; ++v) { + const uint16_t range = enc_values[n][p][r][v][0]; + const uint16_t near_range = enc_values[n][p][r][v][1]; + const uint16_t ref = enc_values[n][p][r][v][2]; + const uint16_t value = + aom_read_primitive_refbilevel(&br, range, near_range, ref); + GTEST_ASSERT_EQ(value, enc_values[n][p][r][v][3]); + } + } + } + } +} + +// Test for Finite subexponential code with reference +TEST(AV1, TestPrimitiveRefsubexpfin) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int kBufferSize = 65536; + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + const uint16_t kRanges = 8; + const uint16_t kSubexpParams = 6; + const uint16_t kReferences = 8; + const uint16_t kValues = 16; + uint16_t enc_values[kRanges][kSubexpParams][kReferences][kValues][4]; + const uint16_t range_vals[kRanges] = { 1, 13, 64, 120, 230, 420, 1100, 8000 }; + aom_start_encode(&bw, bw_buffer); + for (int n = 0; n < kRanges; ++n) { + const uint16_t range = range_vals[n]; + for (int k = 0; k < kSubexpParams; ++k) { + for (int r = 0; r < kReferences; ++r) { + const uint16_t ref = rnd(range); + for (int v = 0; v < kValues; ++v) { + const uint16_t value = rnd(range); + enc_values[n][k][r][v][0] = range; + enc_values[n][k][r][v][1] = k; + enc_values[n][k][r][v][2] = ref; + enc_values[n][k][r][v][3] = value; + aom_write_primitive_refsubexpfin(&bw, range, k, ref, value); + } + } + } + } + aom_stop_encode(&bw); + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL); + GTEST_ASSERT_GE(aom_reader_tell(&br), 0u); + GTEST_ASSERT_LE(aom_reader_tell(&br), 1u); + for (int n = 0; n < kRanges; ++n) { + for (int k = 0; k < kSubexpParams; ++k) { + for (int r = 0; r < kReferences; ++r) { + for (int v = 0; v < kValues; ++v) { + const uint16_t range = enc_values[n][k][r][v][0]; + assert(k == enc_values[n][k][r][v][1]); + const uint16_t ref = enc_values[n][k][r][v][2]; + const uint16_t value = + aom_read_primitive_refsubexpfin(&br, range, k, ref); + GTEST_ASSERT_EQ(value, enc_values[n][k][r][v][3]); + } + } + } + } +} +// TODO(debargha): Adds tests for other primitives +} // namespace diff --git a/third_party/aom/test/blend_a64_mask_1d_test.cc b/third_party/aom/test/blend_a64_mask_1d_test.cc new file mode 100644 index 000000000..66e741a74 --- /dev/null +++ b/third_party/aom/test/blend_a64_mask_1d_test.cc @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/register_state_check.h" +#include "test/function_equivalence_test.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "aom/aom_integer.h" + +#include "./av1_rtcd.h" + +#include "av1/common/enums.h" + +#include "aom_dsp/blend.h" + +using libaom_test::FunctionEquivalenceTest; + +namespace { + +template <typename F, typename T> +class BlendA64Mask1DTest : public FunctionEquivalenceTest<F> { + public: + static const int kIterations = 10000; + static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides + static const int kMaxHeight = MAX_SB_SIZE; + static const int kBufSize = kMaxWidth * kMaxHeight; + static const int kMaxMaskWidth = 2 * MAX_SB_SIZE; + static const int kMaxMaskSize = kMaxMaskWidth; + + virtual ~BlendA64Mask1DTest() {} + + virtual void Execute(const T *p_src0, const T *p_src1) = 0; + + void Common() { + w_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1); + h_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1); + + dst_offset_ = this->rng_(33); + dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + src0_offset_ = this->rng_(33); + src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + src1_offset_ = this->rng_(33); + src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + T *p_src0; + T *p_src1; + + switch (this->rng_(3)) { + case 0: // Separate sources + p_src0 = src0_; + p_src1 = src1_; + break; + case 1: // src0 == dst + p_src0 = dst_tst_; + src0_stride_ = dst_stride_; + src0_offset_ = dst_offset_; + p_src1 = src1_; + break; + case 2: // src1 == dst + p_src0 = src0_; + p_src1 = dst_tst_; + src1_stride_ = dst_stride_; + src1_offset_ = dst_offset_; + break; + default: FAIL(); + } + + Execute(p_src0, p_src1); + + for (int r = 0; r < h_; ++r) { + for (int c = 0; c < w_; ++c) { + ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c], + dst_tst_[dst_offset_ + r * dst_stride_ + c]); + } + } + } + + T dst_ref_[kBufSize]; + T dst_tst_[kBufSize]; + uint32_t dst_stride_; + uint32_t dst_offset_; + + T src0_[kBufSize]; + uint32_t src0_stride_; + uint32_t src0_offset_; + + T src1_[kBufSize]; + uint32_t src1_stride_; + uint32_t src1_offset_; + + uint8_t mask_[kMaxMaskSize]; + + int w_; + int h_; +}; + +////////////////////////////////////////////////////////////////////////////// +// 8 bit version +////////////////////////////////////////////////////////////////////////////// + +typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, + uint32_t src1_stride, const uint8_t *mask, int h, int w); +typedef libaom_test::FuncParam<F8B> TestFuncs; + +class BlendA64Mask1DTest8B : public BlendA64Mask1DTest<F8B, uint8_t> { + protected: + void Execute(const uint8_t *p_src0, const uint8_t *p_src1) { + params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_, + src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, + h_, w_); + ASM_REGISTER_STATE_CHECK(params_.tst_func( + dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_, + src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, h_, w_)); + } +}; + +TEST_P(BlendA64Mask1DTest8B, RandomValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand8(); + dst_tst_[i] = rng_.Rand8(); + + src0_[i] = rng_.Rand8(); + src1_[i] = rng_.Rand8(); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + Common(); + } +} + +TEST_P(BlendA64Mask1DTest8B, ExtremeValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(2) + 254; + dst_tst_[i] = rng_(2) + 254; + src0_[i] = rng_(2) + 254; + src1_[i] = rng_(2) + 254; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; + + Common(); + } +} + +static void blend_a64_hmask_ref(uint8_t *dst, uint32_t dst_stride, + const uint8_t *src0, uint32_t src0_stride, + const uint8_t *src1, uint32_t src1_stride, + const uint8_t *mask, int h, int w) { + uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize] + [BlendA64Mask1DTest8B::kMaxMaskSize]; + + for (int row = 0; row < h; ++row) + for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col]; + + aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride, + &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, h, w, + 0, 0); +} + +static void blend_a64_vmask_ref(uint8_t *dst, uint32_t dst_stride, + const uint8_t *src0, uint32_t src0_stride, + const uint8_t *src1, uint32_t src1_stride, + const uint8_t *mask, int h, int w) { + uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize] + [BlendA64Mask1DTest8B::kMaxMaskSize]; + + for (int row = 0; row < h; ++row) + for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row]; + + aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride, + &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, h, w, + 0, 0); +} + +INSTANTIATE_TEST_CASE_P( + C, BlendA64Mask1DTest8B, + ::testing::Values(TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_c), + TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_c))); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, BlendA64Mask1DTest8B, + ::testing::Values( + TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_sse4_1), + TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_sse4_1))); +#endif // HAVE_SSE4_1 + +#if CONFIG_HIGHBITDEPTH +////////////////////////////////////////////////////////////////////////////// +// High bit-depth version +////////////////////////////////////////////////////////////////////////////// + +typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, + uint32_t src1_stride, const uint8_t *mask, int h, int w, + int bd); +typedef libaom_test::FuncParam<FHBD> TestFuncsHBD; + +class BlendA64Mask1DTestHBD : public BlendA64Mask1DTest<FHBD, uint16_t> { + protected: + void Execute(const uint16_t *p_src0, const uint16_t *p_src1) { + params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, + CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, + CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, + mask_, h_, w_, bit_depth_); + ASM_REGISTER_STATE_CHECK(params_.tst_func( + CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_, + CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, + CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_, h_, w_, + bit_depth_)); + } + + int bit_depth_; +}; + +TEST_P(BlendA64Mask1DTestHBD, RandomValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + switch (rng_(3)) { + case 0: bit_depth_ = 8; break; + case 1: bit_depth_ = 10; break; + default: bit_depth_ = 12; break; + } + + const int hi = 1 << bit_depth_; + + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(hi); + dst_tst_[i] = rng_(hi); + src0_[i] = rng_(hi); + src1_[i] = rng_(hi); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + Common(); + } +} + +TEST_P(BlendA64Mask1DTestHBD, ExtremeValues) { + for (int iter = 0; iter < 1000 && !HasFatalFailure(); ++iter) { + switch (rng_(3)) { + case 0: bit_depth_ = 8; break; + case 1: bit_depth_ = 10; break; + default: bit_depth_ = 12; break; + } + + const int hi = 1 << bit_depth_; + const int lo = hi - 2; + + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(hi - lo) + lo; + dst_tst_[i] = rng_(hi - lo) + lo; + src0_[i] = rng_(hi - lo) + lo; + src1_[i] = rng_(hi - lo) + lo; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; + + Common(); + } +} + +static void highbd_blend_a64_hmask_ref( + uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, + const uint8_t *mask, int h, int w, int bd) { + uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize] + [BlendA64Mask1DTestHBD::kMaxMaskSize]; + + for (int row = 0; row < h; ++row) + for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col]; + + aom_highbd_blend_a64_mask_c( + dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0], + BlendA64Mask1DTestHBD::kMaxMaskSize, h, w, 0, 0, bd); +} + +static void highbd_blend_a64_vmask_ref( + uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, + const uint8_t *mask, int h, int w, int bd) { + uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize] + [BlendA64Mask1DTestHBD::kMaxMaskSize]; + + for (int row = 0; row < h; ++row) + for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row]; + + aom_highbd_blend_a64_mask_c( + dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0], + BlendA64Mask1DTestHBD::kMaxMaskSize, h, w, 0, 0, bd); +} + +INSTANTIATE_TEST_CASE_P( + C, BlendA64Mask1DTestHBD, + ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref, + aom_highbd_blend_a64_hmask_c), + TestFuncsHBD(highbd_blend_a64_vmask_ref, + aom_highbd_blend_a64_vmask_c))); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, BlendA64Mask1DTestHBD, + ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref, + aom_highbd_blend_a64_hmask_sse4_1), + TestFuncsHBD(highbd_blend_a64_vmask_ref, + aom_highbd_blend_a64_vmask_sse4_1))); +#endif // HAVE_SSE4_1 + +#endif // CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/blend_a64_mask_test.cc b/third_party/aom/test/blend_a64_mask_test.cc new file mode 100644 index 000000000..fef124d34 --- /dev/null +++ b/third_party/aom/test/blend_a64_mask_test.cc @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/register_state_check.h" +#include "test/function_equivalence_test.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "aom/aom_integer.h" + +#include "./av1_rtcd.h" + +#include "av1/common/enums.h" + +#include "aom_dsp/blend.h" + +using libaom_test::FunctionEquivalenceTest; + +namespace { + +template <typename F, typename T> +class BlendA64MaskTest : public FunctionEquivalenceTest<F> { + protected: + static const int kIterations = 10000; + static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides + static const int kMaxHeight = MAX_SB_SIZE; + static const int kBufSize = kMaxWidth * kMaxHeight; + static const int kMaxMaskWidth = 2 * MAX_SB_SIZE; + static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth; + + virtual ~BlendA64MaskTest() {} + + virtual void Execute(const T *p_src0, const T *p_src1) = 0; + + void Common() { + w_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1); + h_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1); + + subx_ = this->rng_(2); + suby_ = this->rng_(2); + + dst_offset_ = this->rng_(33); + dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + src0_offset_ = this->rng_(33); + src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + src1_offset_ = this->rng_(33); + src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + mask_stride_ = + this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1); + + T *p_src0; + T *p_src1; + + switch (this->rng_(3)) { + case 0: // Separate sources + p_src0 = src0_; + p_src1 = src1_; + break; + case 1: // src0 == dst + p_src0 = dst_tst_; + src0_stride_ = dst_stride_; + src0_offset_ = dst_offset_; + p_src1 = src1_; + break; + case 2: // src1 == dst + p_src0 = src0_; + p_src1 = dst_tst_; + src1_stride_ = dst_stride_; + src1_offset_ = dst_offset_; + break; + default: FAIL(); + } + + Execute(p_src0, p_src1); + + for (int r = 0; r < h_; ++r) { + for (int c = 0; c < w_; ++c) { + ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c], + dst_tst_[dst_offset_ + r * dst_stride_ + c]); + } + } + } + + T dst_ref_[kBufSize]; + T dst_tst_[kBufSize]; + uint32_t dst_stride_; + uint32_t dst_offset_; + + T src0_[kBufSize]; + uint32_t src0_stride_; + uint32_t src0_offset_; + + T src1_[kBufSize]; + uint32_t src1_stride_; + uint32_t src1_offset_; + + uint8_t mask_[kMaxMaskSize]; + size_t mask_stride_; + + int w_; + int h_; + + int suby_; + int subx_; +}; + +////////////////////////////////////////////////////////////////////////////// +// 8 bit version +////////////////////////////////////////////////////////////////////////////// + +typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, + uint32_t src1_stride, const uint8_t *mask, + uint32_t mask_stride, int h, int w, int suby, int subx); +typedef libaom_test::FuncParam<F8B> TestFuncs; + +class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t> { + protected: + void Execute(const uint8_t *p_src0, const uint8_t *p_src1) { + params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_, + src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, + kMaxMaskWidth, h_, w_, suby_, subx_); + ASM_REGISTER_STATE_CHECK(params_.tst_func( + dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_, + src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, kMaxMaskWidth, + h_, w_, suby_, subx_)); + } +}; + +TEST_P(BlendA64MaskTest8B, RandomValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand8(); + dst_tst_[i] = rng_.Rand8(); + + src0_[i] = rng_.Rand8(); + src1_[i] = rng_.Rand8(); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + Common(); + } +} + +TEST_P(BlendA64MaskTest8B, ExtremeValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(2) + 254; + dst_tst_[i] = rng_(2) + 254; + src0_[i] = rng_(2) + 254; + src1_[i] = rng_(2) + 254; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; + + Common(); + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P(SSE4_1, BlendA64MaskTest8B, + ::testing::Values(TestFuncs( + aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1))); +#endif // HAVE_SSE4_1 + +#if CONFIG_HIGHBITDEPTH +////////////////////////////////////////////////////////////////////////////// +// High bit-depth version +////////////////////////////////////////////////////////////////////////////// + +typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, + uint32_t src1_stride, const uint8_t *mask, + uint32_t mask_stride, int h, int w, int suby, int subx, + int bd); +typedef libaom_test::FuncParam<FHBD> TestFuncsHBD; + +class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t> { + protected: + void Execute(const uint16_t *p_src0, const uint16_t *p_src1) { + params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, + CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, + CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, + mask_, kMaxMaskWidth, h_, w_, suby_, subx_, bit_depth_); + ASM_REGISTER_STATE_CHECK(params_.tst_func( + CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_, + CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, + CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_, + kMaxMaskWidth, h_, w_, suby_, subx_, bit_depth_)); + } + + int bit_depth_; +}; + +TEST_P(BlendA64MaskTestHBD, RandomValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + switch (rng_(3)) { + case 0: bit_depth_ = 8; break; + case 1: bit_depth_ = 10; break; + default: bit_depth_ = 12; break; + } + + const int hi = 1 << bit_depth_; + + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(hi); + dst_tst_[i] = rng_(hi); + src0_[i] = rng_(hi); + src1_[i] = rng_(hi); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + Common(); + } +} + +TEST_P(BlendA64MaskTestHBD, ExtremeValues) { + for (int iter = 0; iter < 1000 && !HasFatalFailure(); ++iter) { + switch (rng_(3)) { + case 0: bit_depth_ = 8; break; + case 1: bit_depth_ = 10; break; + default: bit_depth_ = 12; break; + } + + const int hi = 1 << bit_depth_; + const int lo = hi - 2; + + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(hi - lo) + lo; + dst_tst_[i] = rng_(hi - lo) + lo; + src0_[i] = rng_(hi - lo) + lo; + src1_[i] = rng_(hi - lo) + lo; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; + + Common(); + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, BlendA64MaskTestHBD, + ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, + aom_highbd_blend_a64_mask_sse4_1))); +#endif // HAVE_SSE4_1 +#endif // CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/boolcoder_test.cc b/third_party/aom/test/boolcoder_test.cc new file mode 100644 index 000000000..4d9d7aaf4 --- /dev/null +++ b/third_party/aom/test/boolcoder_test.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "aom/aom_integer.h" +#include "aom_dsp/bitreader.h" +#include "aom_dsp/bitwriter.h" + +using libaom_test::ACMRandom; + +namespace { +const int num_tests = 10; +} // namespace + +TEST(AV1, TestBitIO) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int n = 0; n < num_tests; ++n) { + for (int method = 0; method <= 7; ++method) { // we generate various proba + const int kBitsToTest = 1000; + uint8_t probas[kBitsToTest]; + + for (int i = 0; i < kBitsToTest; ++i) { + const int parity = i & 1; + /* clang-format off */ + probas[i] = + (method == 0) ? 0 : (method == 1) ? 255 : + (method == 2) ? 128 : + (method == 3) ? rnd.Rand8() : + (method == 4) ? (parity ? 0 : 255) : + // alternate between low and high proba: + (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) : + (method == 6) ? + (parity ? rnd(64) : 255 - rnd(64)) : + (parity ? rnd(32) : 255 - rnd(32)); + /* clang-format on */ + } + for (int bit_method = 0; bit_method <= 3; ++bit_method) { + const int random_seed = 6432; + const int kBufferSize = 10000; + ACMRandom bit_rnd(random_seed); + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + aom_start_encode(&bw, bw_buffer); + + int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0; + for (int i = 0; i < kBitsToTest; ++i) { + if (bit_method == 2) { + bit = (i & 1); + } else if (bit_method == 3) { + bit = bit_rnd(2); + } + aom_write(&bw, bit, static_cast<int>(probas[i])); + } + + aom_stop_encode(&bw); + +#if !CONFIG_DAALA_EC + // First bit should be zero + GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0); +#endif + + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL); + bit_rnd.Reset(random_seed); + for (int i = 0; i < kBitsToTest; ++i) { + if (bit_method == 2) { + bit = (i & 1); + } else if (bit_method == 3) { + bit = bit_rnd(2); + } + GTEST_ASSERT_EQ(aom_read(&br, probas[i], NULL), bit) + << "pos: " << i << " / " << kBitsToTest + << " bit_method: " << bit_method << " method: " << method; + } + } + } + } +} + +#if CONFIG_DAALA_EC +#define FRAC_DIFF_TOTAL_ERROR 0.07 +#else +#define FRAC_DIFF_TOTAL_ERROR 0.2 +#endif + +TEST(AV1, TestTell) { + const int kBufferSize = 10000; + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + const int kSymbols = 1024; + // Coders are noisier at low probabilities, so we start at p = 4. + for (int p = 4; p < 256; p++) { + double probability = p / 256.; + aom_start_encode(&bw, bw_buffer); + for (int i = 0; i < kSymbols; i++) { + aom_write(&bw, 0, p); + } + aom_stop_encode(&bw); + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL); + uint32_t last_tell = aom_reader_tell(&br); + uint32_t last_tell_frac = aom_reader_tell_frac(&br); + double frac_diff_total = 0; + GTEST_ASSERT_GE(aom_reader_tell(&br), 0u); + GTEST_ASSERT_LE(aom_reader_tell(&br), 1u); + for (int i = 0; i < kSymbols; i++) { + aom_read(&br, p, NULL); + uint32_t tell = aom_reader_tell(&br); + uint32_t tell_frac = aom_reader_tell_frac(&br); + GTEST_ASSERT_GE(tell, last_tell) << "tell: " << tell + << ", last_tell: " << last_tell; + GTEST_ASSERT_GE(tell_frac, last_tell_frac) + << "tell_frac: " << tell_frac + << ", last_tell_frac: " << last_tell_frac; + // Frac tell should round up to tell. + GTEST_ASSERT_EQ(tell, (tell_frac + 7) >> 3); + last_tell = tell; + frac_diff_total += + fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability)); + last_tell_frac = tell_frac; + } + const uint32_t expected = (uint32_t)(-kSymbols * log2(probability)); + // Last tell should be close to the expected value. + GTEST_ASSERT_LE(last_tell, expected + 20) << " last_tell: " << last_tell; + // The average frac_diff error should be pretty small. + GTEST_ASSERT_LE(frac_diff_total / kSymbols, FRAC_DIFF_TOTAL_ERROR) + << " frac_diff_total: " << frac_diff_total; + } +} diff --git a/third_party/aom/test/borders_test.cc b/third_party/aom/test/borders_test.cc new file mode 100644 index 000000000..076f91404 --- /dev/null +++ b/third_party/aom/test/borders_test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <climits> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class BordersTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> { + protected: + BordersTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~BordersTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AOME_SET_CPUUSED, 1); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + if (pkt->data.frame.flags & AOM_FRAME_IS_KEY) { + } + } +}; + +TEST_P(BordersTest, TestEncodeHighBitrate) { + // Validate that this non multiple of 64 wide clip encodes and decodes + // without a mismatch when passing in a very low max q. This pushes + // the encoder to producing lots of big partitions which will likely + // extend into the border and test the border condition. + cfg_.g_lag_in_frames = 25; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 2000; + cfg_.rc_max_quantizer = 10; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 40); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +TEST_P(BordersTest, TestLowBitrate) { + // Validate that this clip encodes and decodes without a mismatch + // when passing in a very high min q. This pushes the encoder to producing + // lots of small partitions which might will test the other condition. + + cfg_.g_lag_in_frames = 25; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 200; + cfg_.rc_min_quantizer = 40; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 40); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_CASE(BordersTest, + ::testing::Values(::libaom_test::kTwoPassGood)); +} // namespace diff --git a/third_party/aom/test/clear_system_state.h b/third_party/aom/test/clear_system_state.h new file mode 100644 index 000000000..4f3c1eed0 --- /dev/null +++ b/third_party/aom/test/clear_system_state.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_CLEAR_SYSTEM_STATE_H_ +#define TEST_CLEAR_SYSTEM_STATE_H_ + +#include "./aom_config.h" +#if ARCH_X86 || ARCH_X86_64 +#include "aom_ports/x86.h" +#endif + +namespace libaom_test { + +// Reset system to a known state. This function should be used for all non-API +// test cases. +inline void ClearSystemState() { +#if ARCH_X86 || ARCH_X86_64 + aom_reset_mmx_state(); +#endif +} + +} // namespace libaom_test +#endif // TEST_CLEAR_SYSTEM_STATE_H_ diff --git a/third_party/aom/test/clpf_test.cc b/third_party/aom/test/clpf_test.cc new file mode 100644 index 000000000..2c0f8cf7f --- /dev/null +++ b/third_party/aom/test/clpf_test.cc @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <cstdlib> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./av1_rtcd.h" +#include "aom_ports/aom_timer.h" +#include "av1/common/od_dering.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { + +typedef void (*clpf_block_t)(uint8_t *dst, const uint16_t *src, int dstride, + int sstride, int sizex, int sizey, + unsigned int strength, unsigned int bitdepth); + +typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int> + clpf_block_param_t; + +class CDEFClpfBlockTest : public ::testing::TestWithParam<clpf_block_param_t> { + public: + virtual ~CDEFClpfBlockTest() {} + virtual void SetUp() { + clpf = GET_PARAM(0); + ref_clpf = GET_PARAM(1); + sizex = GET_PARAM(2); + sizey = GET_PARAM(3); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + int sizex; + int sizey; + clpf_block_t clpf; + clpf_block_t ref_clpf; +}; + +typedef CDEFClpfBlockTest CDEFClpfSpeedTest; + +#if CONFIG_HIGHBITDEPTH +typedef void (*clpf_block_hbd_t)(uint16_t *dst, const uint16_t *src, + int dstride, int sstride, int sizex, int sizey, + unsigned int strength, unsigned int bitdepth); + +typedef std::tr1::tuple<clpf_block_hbd_t, clpf_block_hbd_t, int, int> + clpf_block_hbd_param_t; + +class CDEFClpfBlockHbdTest + : public ::testing::TestWithParam<clpf_block_hbd_param_t> { + public: + virtual ~CDEFClpfBlockHbdTest() {} + virtual void SetUp() { + clpf = GET_PARAM(0); + ref_clpf = GET_PARAM(1); + sizex = GET_PARAM(2); + sizey = GET_PARAM(3); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + int sizex; + int sizey; + clpf_block_hbd_t clpf; + clpf_block_hbd_t ref_clpf; +}; + +typedef CDEFClpfBlockHbdTest ClpfHbdSpeedTest; +#endif + +template <typename pixel> +void test_clpf(int w, int h, unsigned int depth, unsigned int iterations, + void (*clpf)(pixel *dst, const uint16_t *src, int dstride, + int sstride, int sizex, int sizey, + unsigned int strength, unsigned int bitdepth), + void (*ref_clpf)(pixel *dst, const uint16_t *src, int dstride, + int sstride, int sizex, int sizey, + unsigned int strength, unsigned int bitdepth)) { + const int size = 24; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, s[size * size]); + DECLARE_ALIGNED(16, pixel, d[size * size]); + DECLARE_ALIGNED(16, pixel, ref_d[size * size]); + memset(ref_d, 0, size * size * sizeof(*ref_d)); + memset(d, 0, size * size * sizeof(*d)); + + int error = 0, pos = 0, xpos = 8, ypos = 8; + unsigned int strength = 0, bits, level, count, damp = 0, boundary = 0; + + assert(size >= w + 16 && size >= h + 16); + assert(depth >= 8); + + // Test every combination of: + // * Input with up to <depth> bits of noise + // * Noise level around every value from 0 to (1<<depth)-1 + // * All strengths + // * All dampings + // * Boundaries + // If clpf and ref_clpf are the same, we're just testing speed + for (boundary = 0; boundary < 16; boundary++) { + for (count = 0; count < iterations; count++) { + for (level = 0; level < (1U << depth) && !error; + level += (1 + 4 * !!boundary) << (depth - 8)) { + for (bits = 1; bits <= depth && !error; bits++) { + for (damp = 4 + depth - 8; damp < depth - 1 && !error; damp++) { + for (int i = 0; i < size * size; i++) + s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, + (1 << depth) - 1); + if (boundary) { + if (boundary & 1) { // Left + for (int i = 0; i < size; i++) + for (int j = 0; j < xpos; j++) + s[i * size + j] = OD_DERING_VERY_LARGE; + } + if (boundary & 2) { // Right + for (int i = 0; i < size; i++) + for (int j = xpos + w; j < size; j++) + s[i * size + j] = OD_DERING_VERY_LARGE; + } + if (boundary & 4) { // Above + for (int i = 0; i < ypos; i++) + for (int j = 0; j < size; j++) + s[i * size + j] = OD_DERING_VERY_LARGE; + } + if (boundary & 8) { // Below + for (int i = ypos + h; i < size; i++) + for (int j = 0; j < size; j++) + s[i * size + j] = OD_DERING_VERY_LARGE; + } + } + for (strength = depth - 8; strength < depth - 5 && !error; + strength += !error) { + ref_clpf(ref_d + ypos * size + xpos, s + ypos * size + xpos, size, + size, w, h, 1 << strength, damp); + if (clpf != ref_clpf) + ASM_REGISTER_STATE_CHECK(clpf(d + ypos * size + xpos, + s + ypos * size + xpos, size, + size, w, h, 1 << strength, damp)); + if (ref_clpf != clpf) { + for (pos = 0; pos < size * size && !error; pos++) { + error = ref_d[pos] != d[pos]; + } + } + } + } + } + } + } + } + + pos--; + EXPECT_EQ(0, error) + << "Error: CDEFClpfBlockTest, SIMD and C mismatch." << std::endl + << "First error at " << pos % size << "," << pos / size << " (" + << (int16_t)ref_d[pos] << " != " << (int16_t)d[pos] << ") " << std::endl + << "strength: " << (1 << strength) << std::endl + << "damping: " << damp << std::endl + << "depth: " << depth << std::endl + << "boundary: " << boundary << std::endl + << "w: " << w << std::endl + << "h: " << h << std::endl + << "A=" << (pos > 2 * size ? (int16_t)s[pos - 2 * size] : -1) << std::endl + << "B=" << (pos > size ? (int16_t)s[pos - size] : -1) << std::endl + << "C=" << (pos % size - 2 >= 0 ? (int16_t)s[pos - 2] : -1) << std::endl + << "D=" << (pos % size - 1 >= 0 ? (int16_t)s[pos - 1] : -1) << std::endl + << "X=" << (int16_t)s[pos] << std::endl + << "E=" << (pos % size + 1 < size ? (int16_t)s[pos + 1] : -1) << std::endl + << "F=" << (pos % size + 2 < size ? (int16_t)s[pos + 2] : -1) << std::endl + << "G=" << (pos + size < size * size ? (int16_t)s[pos + size] : -1) + << std::endl + << "H=" + << (pos + 2 * size < size * size ? (int16_t)s[pos + 2 * size] : -1) + << std::endl; +} + +template <typename pixel> +void test_clpf_speed(int w, int h, unsigned int depth, unsigned int iterations, + void (*clpf)(pixel *dst, const uint16_t *src, int dstride, + int sstride, int sizex, int sizey, + unsigned int strength, unsigned int bitdepth), + void (*ref_clpf)(pixel *dst, const uint16_t *src, + int dstride, int sstride, int sizex, + int sizey, unsigned int strength, + unsigned int bitdepth)) { + aom_usec_timer ref_timer; + aom_usec_timer timer; + + aom_usec_timer_start(&ref_timer); + test_clpf(w, h, depth, iterations, ref_clpf, ref_clpf); + aom_usec_timer_mark(&ref_timer); + int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer_start(&timer); + test_clpf(w, h, depth, iterations, clpf, clpf); + aom_usec_timer_mark(&timer); + int elapsed_time = (int)aom_usec_timer_elapsed(&timer); + +#if 0 + std::cout << "[ ] C time = " << ref_elapsed_time / 1000 + << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl; +#endif + + EXPECT_GT(ref_elapsed_time, elapsed_time) + << "Error: CDEFClpfSpeedTest, SIMD slower than C." << std::endl + << "C time: " << ref_elapsed_time << " us" << std::endl + << "SIMD time: " << elapsed_time << " us" << std::endl; +} + +TEST_P(CDEFClpfBlockTest, TestSIMDNoMismatch) { + test_clpf(sizex, sizey, 8, 1, clpf, ref_clpf); +} + +TEST_P(CDEFClpfSpeedTest, DISABLED_TestSpeed) { + test_clpf_speed(sizex, sizey, 8, 16, clpf, ref_clpf); +} + +#if CONFIG_HIGHBITDEPTH +TEST_P(CDEFClpfBlockHbdTest, TestSIMDNoMismatch) { + test_clpf(sizex, sizey, 12, 1, clpf, ref_clpf); +} + +TEST_P(ClpfHbdSpeedTest, DISABLED_TestSpeed) { + test_clpf_speed(sizex, sizey, 12, 4, clpf, ref_clpf); +} +#endif + +using std::tr1::make_tuple; + +// VS compiling for 32 bit targets does not support vector types in +// structs as arguments, which makes the v256 type of the intrinsics +// hard to support, so optimizations for this target are disabled. +#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__) +// Test all supported architectures and block sizes +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, CDEFClpfBlockTest, + ::testing::Values( + make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8), + make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 4), + make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 8), + make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 4), + make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 8), + make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 4), + make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 8), + make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 4))); +#endif + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P( + SSSE3, CDEFClpfBlockTest, + ::testing::Values( + make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 8), + make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 4), + make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 8), + make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 4), + make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 8), + make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 4), + make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 8), + make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 4))); +#endif + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, CDEFClpfBlockTest, + ::testing::Values( + make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 8), + make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 4), + make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 8), + make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 4), + make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 8), + make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 4), + make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 8), + make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 4))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, CDEFClpfBlockTest, + ::testing::Values( + make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8), + make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 4), + make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 8), + make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 4), + make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 8), + make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 4), + make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 8), + make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 4))); +#endif + +#if CONFIG_HIGHBITDEPTH +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, CDEFClpfBlockHbdTest, + ::testing::Values( + make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8), + make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 4), + make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 8), + make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 4), + make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8), + make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 4), + make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 8), + make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 4))); +#endif + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P( + SSSE3, CDEFClpfBlockHbdTest, + ::testing::Values( + make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8), + make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 4), + make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 8), + make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 4), + make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8), + make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 4), + make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 8), + make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 4))); +#endif + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, CDEFClpfBlockHbdTest, + ::testing::Values( + make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8), + make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 4), + make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 8), + make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 4), + make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8), + make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 4), + make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 8), + make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 4))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, CDEFClpfBlockHbdTest, + ::testing::Values( + make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8), + make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 4), + make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 8), + make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 4), + make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8), + make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 4), + make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 8), + make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 4))); +#endif +#endif // CONFIG_HIGHBITDEPTH + +// Test speed for all supported architectures +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, CDEFClpfSpeedTest, + ::testing::Values(make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8), + make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, + 8))); +#endif + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P(SSSE3, CDEFClpfSpeedTest, + ::testing::Values(make_tuple(&aom_clpf_block_ssse3, + &aom_clpf_block_c, 8, 8), + make_tuple(&aom_clpf_hblock_ssse3, + &aom_clpf_hblock_c, 8, + 8))); +#endif + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFClpfSpeedTest, + ::testing::Values(make_tuple(&aom_clpf_block_sse4_1, + &aom_clpf_block_c, 8, 8), + make_tuple(&aom_clpf_hblock_sse4_1, + &aom_clpf_hblock_c, 8, + 8))); + +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, CDEFClpfSpeedTest, + ::testing::Values(make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8), + make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, + 8))); +#endif + +#if CONFIG_HIGHBITDEPTH +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, ClpfHbdSpeedTest, + ::testing::Values( + make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8), + make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8))); +#endif + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P( + SSSE3, ClpfHbdSpeedTest, + ::testing::Values( + make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8), + make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8))); +#endif + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, ClpfHbdSpeedTest, + ::testing::Values( + make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8), + make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, ClpfHbdSpeedTest, + ::testing::Values( + make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8), + make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8))); +#endif +#endif // CONFIG_HIGHBITDEPTH +#endif // defined(_WIN64) || !defined(_MSC_VER) + +} // namespace diff --git a/third_party/aom/test/codec_factory.h b/third_party/aom/test/codec_factory.h new file mode 100644 index 000000000..d2f20b832 --- /dev/null +++ b/third_party/aom/test/codec_factory.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_CODEC_FACTORY_H_ +#define TEST_CODEC_FACTORY_H_ + +#include "./aom_config.h" +#include "aom/aom_decoder.h" +#include "aom/aom_encoder.h" +#if CONFIG_AV1_ENCODER +#include "aom/aomcx.h" +#endif +#if CONFIG_AV1_DECODER +#include "aom/aomdx.h" +#endif + +#include "test/decode_test_driver.h" +#include "test/encode_test_driver.h" +namespace libaom_test { + +const int kCodecFactoryParam = 0; + +class CodecFactory { + public: + CodecFactory() {} + + virtual ~CodecFactory() {} + + virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const = 0; + + virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg, + const aom_codec_flags_t flags) const = 0; + + virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg, + unsigned long deadline, + const unsigned long init_flags, + TwopassStatsStore *stats) const = 0; + + virtual aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg, + int usage) const = 0; +}; + +/* Provide CodecTestWith<n>Params classes for a variable number of parameters + * to avoid having to include a pointer to the CodecFactory in every test + * definition. + */ +template <class T1> +class CodecTestWithParam + : public ::testing::TestWithParam< + std::tr1::tuple<const libaom_test::CodecFactory *, T1> > {}; + +template <class T1, class T2> +class CodecTestWith2Params + : public ::testing::TestWithParam< + std::tr1::tuple<const libaom_test::CodecFactory *, T1, T2> > {}; + +template <class T1, class T2, class T3> +class CodecTestWith3Params + : public ::testing::TestWithParam< + std::tr1::tuple<const libaom_test::CodecFactory *, T1, T2, T3> > {}; + +/* + * AV1 Codec Definitions + */ +#if CONFIG_AV1 +class AV1Decoder : public Decoder { + public: + explicit AV1Decoder(aom_codec_dec_cfg_t cfg) : Decoder(cfg) {} + + AV1Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag) + : Decoder(cfg, flag) {} + + protected: + virtual aom_codec_iface_t *CodecInterface() const { +#if CONFIG_AV1_DECODER + return &aom_codec_av1_dx_algo; +#else + return NULL; +#endif + } +}; + +class AV1Encoder : public Encoder { + public: + AV1Encoder(aom_codec_enc_cfg_t cfg, unsigned long deadline, + const unsigned long init_flags, TwopassStatsStore *stats) + : Encoder(cfg, deadline, init_flags, stats) {} + + protected: + virtual aom_codec_iface_t *CodecInterface() const { +#if CONFIG_AV1_ENCODER + return &aom_codec_av1_cx_algo; +#else + return NULL; +#endif + } +}; + +class AV1CodecFactory : public CodecFactory { + public: + AV1CodecFactory() : CodecFactory() {} + + virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const { + return CreateDecoder(cfg, 0); + } + + virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg, + const aom_codec_flags_t flags) const { +#if CONFIG_AV1_DECODER + return new AV1Decoder(cfg, flags); +#else + (void)cfg; + (void)flags; + return NULL; +#endif + } + + virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg, + unsigned long deadline, + const unsigned long init_flags, + TwopassStatsStore *stats) const { +#if CONFIG_AV1_ENCODER + return new AV1Encoder(cfg, deadline, init_flags, stats); +#else + (void)cfg; + (void)deadline; + (void)init_flags; + (void)stats; + return NULL; +#endif + } + + virtual aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg, + int usage) const { +#if CONFIG_AV1_ENCODER + return aom_codec_enc_config_default(&aom_codec_av1_cx_algo, cfg, usage); +#else + (void)cfg; + (void)usage; + return AOM_CODEC_INCAPABLE; +#endif + } +}; + +const libaom_test::AV1CodecFactory kAV1; + +#define AV1_INSTANTIATE_TEST_CASE(test, ...) \ + INSTANTIATE_TEST_CASE_P( \ + AV1, test, \ + ::testing::Combine( \ + ::testing::Values(static_cast<const libaom_test::CodecFactory *>( \ + &libaom_test::kAV1)), \ + __VA_ARGS__)) +#else +#define AV1_INSTANTIATE_TEST_CASE(test, ...) +#endif // CONFIG_AV1 + +} // namespace libaom_test +#endif // TEST_CODEC_FACTORY_H_ diff --git a/third_party/aom/test/convolve_test.cc b/third_party/aom/test/convolve_test.cc new file mode 100644 index 000000000..a84ef4ec8 --- /dev/null +++ b/third_party/aom/test/convolve_test.cc @@ -0,0 +1,1345 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "aom_dsp/aom_dsp_common.h" +#include "aom_dsp/aom_filter.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/mem.h" +#include "aom_ports/aom_timer.h" +#include "av1/common/filter.h" + +namespace { + +static const unsigned int kMaxDimension = MAX_SB_SIZE; + +typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h); + +struct ConvolveFunctions { + ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8, + ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg, + ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8, + ConvolveFunc sh8_avg, ConvolveFunc sv8, + ConvolveFunc sv8_avg, ConvolveFunc shv8, + ConvolveFunc shv8_avg, int bd) + : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), + v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8), + sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg), + use_highbd_(bd) {} + + ConvolveFunc copy_; + ConvolveFunc avg_; + ConvolveFunc h8_; + ConvolveFunc v8_; + ConvolveFunc hv8_; + ConvolveFunc h8_avg_; + ConvolveFunc v8_avg_; + ConvolveFunc hv8_avg_; + ConvolveFunc sh8_; // scaled horiz + ConvolveFunc sv8_; // scaled vert + ConvolveFunc shv8_; // scaled horiz/vert + ConvolveFunc sh8_avg_; // scaled avg horiz + ConvolveFunc sv8_avg_; // scaled avg vert + ConvolveFunc shv8_avg_; // scaled avg horiz/vert + int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth. +}; + +typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; + +#if CONFIG_AV1 && CONFIG_EXT_PARTITION +#define ALL_SIZES(convolve_fn) \ + make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \ + make_tuple(128, 128, &convolve_fn), make_tuple(4, 4, &convolve_fn), \ + make_tuple(8, 4, &convolve_fn), make_tuple(4, 8, &convolve_fn), \ + make_tuple(8, 8, &convolve_fn), make_tuple(16, 8, &convolve_fn), \ + make_tuple(8, 16, &convolve_fn), make_tuple(16, 16, &convolve_fn), \ + make_tuple(32, 16, &convolve_fn), make_tuple(16, 32, &convolve_fn), \ + make_tuple(32, 32, &convolve_fn), make_tuple(64, 32, &convolve_fn), \ + make_tuple(32, 64, &convolve_fn), make_tuple(64, 64, &convolve_fn) +#else +#define ALL_SIZES(convolve_fn) \ + make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \ + make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \ + make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \ + make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \ + make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \ + make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \ + make_tuple(64, 64, &convolve_fn) +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + +// Reference 8-tap subpixel filter, slightly modified to fit into this test. +#define AV1_FILTER_WEIGHT 128 +#define AV1_FILTER_SHIFT 7 +uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; } + +void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride, + const int16_t *HFilter, const int16_t *VFilter, + uint8_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, unsigned int output_height) { + // Between passes, we use an intermediate buffer whose height is extended to + // have enough horizontally filtered values as input for the vertical pass. + // This buffer is allocated to be big enough for the largest block type we + // support. + const int kInterp_Extend = 4; + const unsigned int intermediate_height = + (kInterp_Extend - 1) + output_height + kInterp_Extend; + unsigned int i, j; + + assert(intermediate_height > 7); + + // Size of intermediate_buffer is max_intermediate_height * filter_max_width, + // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height + // + kInterp_Extend + // = 3 + 16 + 4 + // = 23 + // and filter_max_width = 16 + // + uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension]; + const int intermediate_next_stride = + 1 - static_cast<int>(intermediate_height * output_width); + + // Horizontal pass (src -> transposed intermediate). + uint8_t *output_ptr = intermediate_buffer; + const int src_next_row_stride = src_stride - output_width; + src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); + for (i = 0; i < intermediate_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) + + (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) + + (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) + + (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) + + (AV1_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT); + ++src_ptr; + output_ptr += intermediate_height; + } + src_ptr += src_next_row_stride; + output_ptr += intermediate_next_stride; + } + + // Vertical pass (transposed intermediate -> dst). + src_ptr = intermediate_buffer; + const int dst_next_row_stride = dst_stride - output_width; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) + + (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) + + (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) + + (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) + + (AV1_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT); + src_ptr += intermediate_height; + } + src_ptr += intermediate_next_stride; + dst_ptr += dst_next_row_stride; + } +} + +void block2d_average_c(uint8_t *src, unsigned int src_stride, + uint8_t *output_ptr, unsigned int output_stride, + unsigned int output_width, unsigned int output_height) { + unsigned int i, j; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; + } + output_ptr += output_stride; + } +} + +void filter_average_block2d_8_c(const uint8_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, const int16_t *VFilter, + uint8_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, + unsigned int output_height) { + uint8_t tmp[kMaxDimension * kMaxDimension]; + + assert(output_width <= kMaxDimension); + assert(output_height <= kMaxDimension); + filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension, + output_width, output_height); + block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width, + output_height); +} + +#if CONFIG_HIGHBITDEPTH +void highbd_filter_block2d_8_c(const uint16_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, const int16_t *VFilter, + uint16_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, + unsigned int output_height, int bd) { + // Between passes, we use an intermediate buffer whose height is extended to + // have enough horizontally filtered values as input for the vertical pass. + // This buffer is allocated to be big enough for the largest block type we + // support. + const int kInterp_Extend = 4; + const unsigned int intermediate_height = + (kInterp_Extend - 1) + output_height + kInterp_Extend; + + /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, + * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height + * + kInterp_Extend + * = 3 + 16 + 4 + * = 23 + * and filter_max_width = 16 + */ + uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension]; + const int intermediate_next_stride = + 1 - static_cast<int>(intermediate_height * output_width); + + // Horizontal pass (src -> transposed intermediate). + { + uint16_t *output_ptr = intermediate_buffer; + const int src_next_row_stride = src_stride - output_width; + unsigned int i, j; + src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); + for (i = 0; i < intermediate_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) + + (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) + + (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) + + (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) + + (AV1_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd); + ++src_ptr; + output_ptr += intermediate_height; + } + src_ptr += src_next_row_stride; + output_ptr += intermediate_next_stride; + } + } + + // Vertical pass (transposed intermediate -> dst). + { + const uint16_t *interm_ptr = intermediate_buffer; + const int dst_next_row_stride = dst_stride - output_width; + unsigned int i, j; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + const int temp = + (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) + + (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) + + (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) + + (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) + + (AV1_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd); + interm_ptr += intermediate_height; + } + interm_ptr += intermediate_next_stride; + dst_ptr += dst_next_row_stride; + } + } +} + +void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride, + uint16_t *output_ptr, unsigned int output_stride, + unsigned int output_width, + unsigned int output_height) { + unsigned int i, j; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; + } + output_ptr += output_stride; + } +} + +void highbd_filter_average_block2d_8_c( + const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, + const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, unsigned int output_height, int bd) { + uint16_t tmp[kMaxDimension * kMaxDimension]; + + assert(output_width <= kMaxDimension); + assert(output_height <= kMaxDimension); + highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, + kMaxDimension, output_width, output_height, bd); + highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, + output_width, output_height); +} +#endif // CONFIG_HIGHBITDEPTH + +class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { + public: + static void SetUpTestCase() { + // Force input_ to be unaligned, output to be 16 byte aligned. + input_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kInputBufferSize + 1)) + + 1; + output_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kOutputBufferSize)); + output_ref_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kOutputBufferSize)); +#if CONFIG_HIGHBITDEPTH + input16_ = reinterpret_cast<uint16_t *>(aom_memalign( + kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) + + 1; + output16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); + output16_ref_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); +#endif + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + static void TearDownTestCase() { + aom_free(input_ - 1); + input_ = NULL; + aom_free(output_); + output_ = NULL; + aom_free(output_ref_); + output_ref_ = NULL; +#if CONFIG_HIGHBITDEPTH + aom_free(input16_ - 1); + input16_ = NULL; + aom_free(output16_); + output16_ = NULL; + aom_free(output16_ref_); + output16_ref_ = NULL; +#endif + } + + protected: + static const int kDataAlignment = 16; + static const int kOuterBlockSize = 4 * kMaxDimension; + static const int kInputStride = kOuterBlockSize; + static const int kOutputStride = kOuterBlockSize; + static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; + static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; + + int Width() const { return GET_PARAM(0); } + int Height() const { return GET_PARAM(1); } + int BorderLeft() const { + const int center = (kOuterBlockSize - Width()) / 2; + return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); + } + int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } + + bool IsIndexInBorder(int i) { + return (i < BorderTop() * kOuterBlockSize || + i >= (BorderTop() + Height()) * kOuterBlockSize || + i % kOuterBlockSize < BorderLeft() || + i % kOuterBlockSize >= (BorderLeft() + Width())); + } + + virtual void SetUp() { + UUT_ = GET_PARAM(2); +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ != 0) + mask_ = (1 << UUT_->use_highbd_) - 1; + else + mask_ = 255; +#endif + /* Set up guard blocks for an inner block centered in the outer block */ + for (int i = 0; i < kOutputBufferSize; ++i) { + if (IsIndexInBorder(i)) + output_[i] = 255; + else + output_[i] = 0; + } + + ::libaom_test::ACMRandom prng; + for (int i = 0; i < kInputBufferSize; ++i) { + if (i & 1) { + input_[i] = 255; +#if CONFIG_HIGHBITDEPTH + input16_[i] = mask_; +#endif + } else { + input_[i] = prng.Rand8Extremes(); +#if CONFIG_HIGHBITDEPTH + input16_[i] = prng.Rand16() & mask_; +#endif + } + } + } + + void SetConstantInput(int value) { + memset(input_, value, kInputBufferSize); +#if CONFIG_HIGHBITDEPTH + aom_memset16(input16_, value, kInputBufferSize); +#endif + } + + void CopyOutputToRef() { + memcpy(output_ref_, output_, kOutputBufferSize); +#if CONFIG_HIGHBITDEPTH + // Copy 16-bit pixels values. The effective number of bytes is double. + memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize); +#endif + } + + void CheckGuardBlocks() { + for (int i = 0; i < kOutputBufferSize; ++i) { + if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]); + } + } + + uint8_t *input() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0) { + return input_ + offset; + } else { + return CONVERT_TO_BYTEPTR(input16_) + offset; + } +#else + return input_ + offset; +#endif + } + + uint8_t *output() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0) { + return output_ + offset; + } else { + return CONVERT_TO_BYTEPTR(output16_) + offset; + } +#else + return output_ + offset; +#endif + } + + uint8_t *output_ref() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0) { + return output_ref_ + offset; + } else { + return CONVERT_TO_BYTEPTR(output16_ref_) + offset; + } +#else + return output_ref_ + offset; +#endif + } + + uint16_t lookup(uint8_t *list, int index) const { +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0) { + return list[index]; + } else { + return CONVERT_TO_SHORTPTR(list)[index]; + } +#else + return list[index]; +#endif + } + + void assign_val(uint8_t *list, int index, uint16_t val) const { +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0) { + list[index] = (uint8_t)val; + } else { + CONVERT_TO_SHORTPTR(list)[index] = val; + } +#else + list[index] = (uint8_t)val; +#endif + } + + void wrapper_filter_average_block2d_8_c( + const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, + const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, unsigned int output_height) { +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0) { + filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr, + dst_stride, output_width, output_height); + } else { + highbd_filter_average_block2d_8_c( + CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter, + CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height, + UUT_->use_highbd_); + } +#else + filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr, + dst_stride, output_width, output_height); +#endif + } + + void wrapper_filter_block2d_8_c( + const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, + const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, unsigned int output_height) { +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0) { + filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr, + dst_stride, output_width, output_height); + } else { + highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, + HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr), + dst_stride, output_width, output_height, + UUT_->use_highbd_); + } +#else + filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr, + dst_stride, output_width, output_height); +#endif + } + + const ConvolveFunctions *UUT_; + static uint8_t *input_; + static uint8_t *output_; + static uint8_t *output_ref_; +#if CONFIG_HIGHBITDEPTH + static uint16_t *input16_; + static uint16_t *output16_; + static uint16_t *output16_ref_; + int mask_; +#endif +}; + +uint8_t *ConvolveTest::input_ = NULL; +uint8_t *ConvolveTest::output_ = NULL; +uint8_t *ConvolveTest::output_ref_ = NULL; +#if CONFIG_HIGHBITDEPTH +uint16_t *ConvolveTest::input16_ = NULL; +uint16_t *ConvolveTest::output16_ = NULL; +uint16_t *ConvolveTest::output16_ref_ = NULL; +#endif + +TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); } + +TEST_P(ConvolveTest, Copy) { + uint8_t *const in = input(); + uint8_t *const out = output(); + + ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride, + NULL, 0, NULL, 0, Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(out, y * kOutputStride + x), + lookup(in, y * kInputStride + x)) + << "(" << x << "," << y << ")"; +} + +TEST_P(ConvolveTest, Avg) { + uint8_t *const in = input(); + uint8_t *const out = output(); + uint8_t *const out_ref = output_ref(); + CopyOutputToRef(); + + ASM_REGISTER_STATE_CHECK(UUT_->avg_(in, kInputStride, out, kOutputStride, + NULL, 0, NULL, 0, Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(out, y * kOutputStride + x), + ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) + + lookup(out_ref, y * kOutputStride + x), + 1)) + << "(" << x << "," << y << ")"; +} + +TEST_P(ConvolveTest, CopyHoriz) { + uint8_t *const in = input(); + uint8_t *const out = output(); + DECLARE_ALIGNED(256, const int16_t, + filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 }; + + ASM_REGISTER_STATE_CHECK(UUT_->sh8_(in, kInputStride, out, kOutputStride, + filter8, 16, filter8, 16, Width(), + Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(out, y * kOutputStride + x), + lookup(in, y * kInputStride + x)) + << "(" << x << "," << y << ")"; +} + +TEST_P(ConvolveTest, CopyVert) { + uint8_t *const in = input(); + uint8_t *const out = output(); + DECLARE_ALIGNED(256, const int16_t, + filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 }; + + ASM_REGISTER_STATE_CHECK(UUT_->sv8_(in, kInputStride, out, kOutputStride, + filter8, 16, filter8, 16, Width(), + Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(out, y * kOutputStride + x), + lookup(in, y * kInputStride + x)) + << "(" << x << "," << y << ")"; +} + +TEST_P(ConvolveTest, Copy2D) { + uint8_t *const in = input(); + uint8_t *const out = output(); + DECLARE_ALIGNED(256, const int16_t, + filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 }; + + ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride, + filter8, 16, filter8, 16, Width(), + Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(out, y * kOutputStride + x), + lookup(in, y * kInputStride + x)) + << "(" << x << "," << y << ")"; +} + +const int kNumFilterBanks = SWITCHABLE_FILTERS; +const int kNumFilters = 16; + +TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + const InterpFilter filter = (InterpFilter)filter_bank; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter); +#if CONFIG_DUAL_FILTER + const InterpFilterParams filter_params = + av1_get_interp_filter_params(filter); + if (filter_params.taps != SUBPEL_TAPS) continue; +#endif + for (int i = 0; i < kNumFilters; i++) { + const int p0 = filters[i][0] + filters[i][1]; + const int p1 = filters[i][2] + filters[i][3]; + const int p2 = filters[i][4] + filters[i][5]; + const int p3 = filters[i][6] + filters[i][7]; + EXPECT_LE(p0, 128); + EXPECT_LE(p1, 128); + EXPECT_LE(p2, 128); + EXPECT_LE(p3, 128); + EXPECT_LE(p0 + p3, 128); + EXPECT_LE(p0 + p3 + p1, 128); + EXPECT_LE(p0 + p3 + p1 + p2, 128); + EXPECT_EQ(p0 + p1 + p2 + p3, 128); + } + } +} + +const int16_t kInvalidFilter[8] = { 0 }; + +TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { + uint8_t *const in = input(); + uint8_t *const out = output(); +#if CONFIG_HIGHBITDEPTH + uint8_t ref8[kOutputStride * kMaxDimension]; + uint16_t ref16[kOutputStride * kMaxDimension]; + uint8_t *ref; + if (UUT_->use_highbd_ == 0) { + ref = ref8; + } else { + ref = CONVERT_TO_BYTEPTR(ref16); + } +#else + uint8_t ref[kOutputStride * kMaxDimension]; +#endif + + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + const InterpFilter filter = (InterpFilter)filter_bank; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter); +#if CONFIG_DUAL_FILTER + const InterpFilterParams filter_params = + av1_get_interp_filter_params(filter); + if (filter_params.taps != SUBPEL_TAPS) continue; +#endif + + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], + filters[filter_y], ref, kOutputStride, + Width(), Height()); + + if (filter_x && filter_y) + ASM_REGISTER_STATE_CHECK(UUT_->hv8_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + filters[filter_y], 16, Width(), Height())); + else if (filter_y) + ASM_REGISTER_STATE_CHECK( + UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter, + 16, filters[filter_y], 16, Width(), Height())); + else if (filter_x) + ASM_REGISTER_STATE_CHECK( + UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x], + 16, kInvalidFilter, 16, Width(), Height())); + else + ASM_REGISTER_STATE_CHECK( + UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter, + 0, kInvalidFilter, 0, Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(ref, y * kOutputStride + x), + lookup(out, y * kOutputStride + x)) + << "mismatch at (" << x << "," << y << "), " + << "filters (" << filter_bank << "," << filter_x << "," + << filter_y << ")"; + } + } + } +} + +TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { + uint8_t *const in = input(); + uint8_t *const out = output(); +#if CONFIG_HIGHBITDEPTH + uint8_t ref8[kOutputStride * kMaxDimension]; + uint16_t ref16[kOutputStride * kMaxDimension]; + uint8_t *ref; + if (UUT_->use_highbd_ == 0) { + ref = ref8; + } else { + ref = CONVERT_TO_BYTEPTR(ref16); + } +#else + uint8_t ref[kOutputStride * kMaxDimension]; +#endif + + // Populate ref and out with some random data + ::libaom_test::ACMRandom prng; + for (int y = 0; y < Height(); ++y) { + for (int x = 0; x < Width(); ++x) { + uint16_t r; +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { + r = prng.Rand8Extremes(); + } else { + r = prng.Rand16() & mask_; + } +#else + r = prng.Rand8Extremes(); +#endif + + assign_val(out, y * kOutputStride + x, r); + assign_val(ref, y * kOutputStride + x, r); + } + } + + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + const InterpFilter filter = (InterpFilter)filter_bank; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter); +#if CONFIG_DUAL_FILTER + const InterpFilterParams filter_params = + av1_get_interp_filter_params(filter); + if (filter_params.taps != SUBPEL_TAPS) continue; +#endif + + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + wrapper_filter_average_block2d_8_c(in, kInputStride, filters[filter_x], + filters[filter_y], ref, + kOutputStride, Width(), Height()); + + if (filter_x && filter_y) + ASM_REGISTER_STATE_CHECK(UUT_->hv8_avg_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + filters[filter_y], 16, Width(), Height())); + else if (filter_y) + ASM_REGISTER_STATE_CHECK(UUT_->v8_avg_( + in, kInputStride, out, kOutputStride, kInvalidFilter, 16, + filters[filter_y], 16, Width(), Height())); + else if (filter_x) + ASM_REGISTER_STATE_CHECK(UUT_->h8_avg_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + kInvalidFilter, 16, Width(), Height())); + else + ASM_REGISTER_STATE_CHECK( + UUT_->avg_(in, kInputStride, out, kOutputStride, kInvalidFilter, + 0, kInvalidFilter, 0, Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(ref, y * kOutputStride + x), + lookup(out, y * kOutputStride + x)) + << "mismatch at (" << x << "," << y << "), " + << "filters (" << filter_bank << "," << filter_x << "," + << filter_y << ")"; + } + } + } +} + +TEST_P(ConvolveTest, FilterExtremes) { + uint8_t *const in = input(); + uint8_t *const out = output(); +#if CONFIG_HIGHBITDEPTH + uint8_t ref8[kOutputStride * kMaxDimension]; + uint16_t ref16[kOutputStride * kMaxDimension]; + uint8_t *ref; + if (UUT_->use_highbd_ == 0) { + ref = ref8; + } else { + ref = CONVERT_TO_BYTEPTR(ref16); + } +#else + uint8_t ref[kOutputStride * kMaxDimension]; +#endif + + // Populate ref and out with some random data + ::libaom_test::ACMRandom prng; + for (int y = 0; y < Height(); ++y) { + for (int x = 0; x < Width(); ++x) { + uint16_t r; +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { + r = prng.Rand8Extremes(); + } else { + r = prng.Rand16() & mask_; + } +#else + r = prng.Rand8Extremes(); +#endif + assign_val(out, y * kOutputStride + x, r); + assign_val(ref, y * kOutputStride + x, r); + } + } + + for (int axis = 0; axis < 2; axis++) { + int seed_val = 0; + while (seed_val < 256) { + for (int y = 0; y < 8; ++y) { + for (int x = 0; x < 8; ++x) { +#if CONFIG_HIGHBITDEPTH + assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, + ((seed_val >> (axis ? y : x)) & 1) * mask_); +#else + assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, + ((seed_val >> (axis ? y : x)) & 1) * 255); +#endif + if (axis) seed_val++; + } + if (axis) + seed_val -= 8; + else + seed_val++; + } + if (axis) seed_val += 8; + + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + const InterpFilter filter = (InterpFilter)filter_bank; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter); +#if CONFIG_DUAL_FILTER + const InterpFilterParams filter_params = + av1_get_interp_filter_params(filter); + if (filter_params.taps != SUBPEL_TAPS) continue; +#endif + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], + filters[filter_y], ref, kOutputStride, + Width(), Height()); + if (filter_x && filter_y) + ASM_REGISTER_STATE_CHECK(UUT_->hv8_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + filters[filter_y], 16, Width(), Height())); + else if (filter_y) + ASM_REGISTER_STATE_CHECK(UUT_->v8_( + in, kInputStride, out, kOutputStride, kInvalidFilter, 16, + filters[filter_y], 16, Width(), Height())); + else if (filter_x) + ASM_REGISTER_STATE_CHECK(UUT_->h8_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + kInvalidFilter, 16, Width(), Height())); + else + ASM_REGISTER_STATE_CHECK(UUT_->copy_( + in, kInputStride, out, kOutputStride, kInvalidFilter, 0, + kInvalidFilter, 0, Width(), Height())); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(ref, y * kOutputStride + x), + lookup(out, y * kOutputStride + x)) + << "mismatch at (" << x << "," << y << "), " + << "filters (" << filter_bank << "," << filter_x << "," + << filter_y << ")"; + } + } + } + } + } +} + +/* This test exercises that enough rows and columns are filtered with every + possible initial fractional positions and scaling steps. */ +TEST_P(ConvolveTest, CheckScalingFiltering) { + uint8_t *const in = input(); + uint8_t *const out = output(); + const InterpKernel *const eighttap = + (const InterpKernel *)av1_get_interp_filter_kernel(EIGHTTAP_REGULAR); + + SetConstantInput(127); + + for (int frac = 0; frac < 16; ++frac) { + for (int step = 1; step <= 32; ++step) { + /* Test the horizontal and vertical filters in combination. */ + ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride, + eighttap[frac], step, eighttap[frac], + step, Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) { + for (int x = 0; x < Width(); ++x) { + ASSERT_EQ(lookup(in, y * kInputStride + x), + lookup(out, y * kOutputStride + x)) + << "x == " << x << ", y == " << y << ", frac == " << frac + << ", step == " << step; + } + } + } + } +} + +TEST_P(ConvolveTest, DISABLED_Copy_Speed) { + const uint8_t *const in = input(); + uint8_t *const out = output(); + const int kNumTests = 5000000; + const int width = Width(); + const int height = Height(); + aom_usec_timer timer; + + aom_usec_timer_start(&timer); + for (int n = 0; n < kNumTests; ++n) { + UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width, + height); + } + aom_usec_timer_mark(&timer); + + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("convolve_copy_%dx%d_%d: %d us\n", width, height, + UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); +} + +TEST_P(ConvolveTest, DISABLED_Avg_Speed) { + const uint8_t *const in = input(); + uint8_t *const out = output(); + const int kNumTests = 5000000; + const int width = Width(); + const int height = Height(); + aom_usec_timer timer; + + aom_usec_timer_start(&timer); + for (int n = 0; n < kNumTests; ++n) { + UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width, + height); + } + aom_usec_timer_mark(&timer); + + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("convolve_avg_%dx%d_%d: %d us\n", width, height, + UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); +} + +TEST_P(ConvolveTest, DISABLED_Speed) { + uint8_t *const in = input(); + uint8_t *const out = output(); +#if CONFIG_HIGHBITDEPTH + uint8_t ref8[kOutputStride * kMaxDimension]; + uint16_t ref16[kOutputStride * kMaxDimension]; + uint8_t *ref; + if (UUT_->use_highbd_ == 0) { + ref = ref8; + } else { + ref = CONVERT_TO_BYTEPTR(ref16); + } +#else + uint8_t ref[kOutputStride * kMaxDimension]; +#endif + + // Populate ref and out with some random data + ::libaom_test::ACMRandom prng; + for (int y = 0; y < Height(); ++y) { + for (int x = 0; x < Width(); ++x) { + uint16_t r; +#if CONFIG_HIGHBITDEPTH + if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { + r = prng.Rand8Extremes(); + } else { + r = prng.Rand16() & mask_; + } +#else + r = prng.Rand8Extremes(); +#endif + + assign_val(out, y * kOutputStride + x, r); + assign_val(ref, y * kOutputStride + x, r); + } + } + + const InterpFilter filter = (InterpFilter)1; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter); + wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1], + out, kOutputStride, Width(), Height()); + + aom_usec_timer timer; + int tests_num = 1000; + + aom_usec_timer_start(&timer); + while (tests_num > 0) { + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + const InterpFilter filter = (InterpFilter)filter_bank; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter); +#if CONFIG_DUAL_FILTER + const InterpFilterParams filter_params = + av1_get_interp_filter_params(filter); + if (filter_params.taps != SUBPEL_TAPS) continue; +#endif + + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + if (filter_x && filter_y) + ASM_REGISTER_STATE_CHECK(UUT_->hv8_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + filters[filter_y], 16, Width(), Height())); + if (filter_y) + ASM_REGISTER_STATE_CHECK( + UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter, + 16, filters[filter_y], 16, Width(), Height())); + else if (filter_x) + ASM_REGISTER_STATE_CHECK(UUT_->h8_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + kInvalidFilter, 16, Width(), Height())); + } + } + } + tests_num--; + } + aom_usec_timer_mark(&timer); + + const int elapsed_time = + static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000); + printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(), + UUT_->use_highbd_, elapsed_time); +} + +using std::tr1::make_tuple; + +#if CONFIG_HIGHBITDEPTH +#define WRAP(func, bd) \ + void wrap_##func##_##bd( \ + const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ + ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \ + const int16_t *filter_y, int filter_y_stride, int w, int h) { \ + aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \ + filter_x_stride, filter_y, filter_y_stride, w, h, bd); \ + } +#if HAVE_SSE2 && ARCH_X86_64 +WRAP(convolve_copy_sse2, 8) +WRAP(convolve_avg_sse2, 8) +WRAP(convolve_copy_sse2, 10) +WRAP(convolve_avg_sse2, 10) +WRAP(convolve_copy_sse2, 12) +WRAP(convolve_avg_sse2, 12) +WRAP(convolve8_horiz_sse2, 8) +WRAP(convolve8_avg_horiz_sse2, 8) +WRAP(convolve8_vert_sse2, 8) +WRAP(convolve8_avg_vert_sse2, 8) +WRAP(convolve8_sse2, 8) +WRAP(convolve8_avg_sse2, 8) +WRAP(convolve8_horiz_sse2, 10) +WRAP(convolve8_avg_horiz_sse2, 10) +WRAP(convolve8_vert_sse2, 10) +WRAP(convolve8_avg_vert_sse2, 10) +WRAP(convolve8_sse2, 10) +WRAP(convolve8_avg_sse2, 10) +WRAP(convolve8_horiz_sse2, 12) +WRAP(convolve8_avg_horiz_sse2, 12) +WRAP(convolve8_vert_sse2, 12) +WRAP(convolve8_avg_vert_sse2, 12) +WRAP(convolve8_sse2, 12) +WRAP(convolve8_avg_sse2, 12) +#endif // HAVE_SSE2 && ARCH_X86_64 + +WRAP(convolve_copy_c, 8) +WRAP(convolve_avg_c, 8) +WRAP(convolve8_horiz_c, 8) +WRAP(convolve8_avg_horiz_c, 8) +WRAP(convolve8_vert_c, 8) +WRAP(convolve8_avg_vert_c, 8) +WRAP(convolve8_c, 8) +WRAP(convolve8_avg_c, 8) +WRAP(convolve_copy_c, 10) +WRAP(convolve_avg_c, 10) +WRAP(convolve8_horiz_c, 10) +WRAP(convolve8_avg_horiz_c, 10) +WRAP(convolve8_vert_c, 10) +WRAP(convolve8_avg_vert_c, 10) +WRAP(convolve8_c, 10) +WRAP(convolve8_avg_c, 10) +WRAP(convolve_copy_c, 12) +WRAP(convolve_avg_c, 12) +WRAP(convolve8_horiz_c, 12) +WRAP(convolve8_avg_horiz_c, 12) +WRAP(convolve8_vert_c, 12) +WRAP(convolve8_avg_vert_c, 12) +WRAP(convolve8_c, 12) +WRAP(convolve8_avg_c, 12) + +#if HAVE_AVX2 +WRAP(convolve_copy_avx2, 8) +WRAP(convolve_avg_avx2, 8) +WRAP(convolve8_horiz_avx2, 8) +WRAP(convolve8_avg_horiz_avx2, 8) +WRAP(convolve8_vert_avx2, 8) +WRAP(convolve8_avg_vert_avx2, 8) +WRAP(convolve8_avx2, 8) +WRAP(convolve8_avg_avx2, 8) + +WRAP(convolve_copy_avx2, 10) +WRAP(convolve_avg_avx2, 10) +WRAP(convolve8_avx2, 10) +WRAP(convolve8_horiz_avx2, 10) +WRAP(convolve8_vert_avx2, 10) +WRAP(convolve8_avg_avx2, 10) +WRAP(convolve8_avg_horiz_avx2, 10) +WRAP(convolve8_avg_vert_avx2, 10) + +WRAP(convolve_copy_avx2, 12) +WRAP(convolve_avg_avx2, 12) +WRAP(convolve8_avx2, 12) +WRAP(convolve8_horiz_avx2, 12) +WRAP(convolve8_vert_avx2, 12) +WRAP(convolve8_avg_avx2, 12) +WRAP(convolve8_avg_horiz_avx2, 12) +WRAP(convolve8_avg_vert_avx2, 12) +#endif // HAVE_AVX2 + +#undef WRAP + +const ConvolveFunctions convolve8_c( + wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8, + wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, + wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, + wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, + wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, + wrap_convolve8_avg_c_8, 8); +const ConvolveFunctions convolve10_c( + wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10, + wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10, + wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10, + wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, + wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, + wrap_convolve8_avg_c_10, 10); +const ConvolveFunctions convolve12_c( + wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12, + wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12, + wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12, + wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, + wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, + wrap_convolve8_avg_c_12, 12); +const ConvolveParam kArrayConvolve_c[] = { + ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c) +}; + +#else +const ConvolveFunctions convolve8_c( + aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_c, + aom_convolve8_avg_horiz_c, aom_convolve8_vert_c, aom_convolve8_avg_vert_c, + aom_convolve8_c, aom_convolve8_avg_c, aom_scaled_horiz_c, + aom_scaled_avg_horiz_c, aom_scaled_vert_c, aom_scaled_avg_vert_c, + aom_scaled_2d_c, aom_scaled_avg_2d_c, 0); +const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) }; +#endif +INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c)); + +#if HAVE_SSE2 && ARCH_X86_64 +#if CONFIG_HIGHBITDEPTH +const ConvolveFunctions convolve8_sse2( + wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8, + wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, + wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, + wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, + wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, + wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, + wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); +const ConvolveFunctions convolve10_sse2( + wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10, + wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, + wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, + wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, + wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, + wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, + wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); +const ConvolveFunctions convolve12_sse2( + wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12, + wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, + wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, + wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, + wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, + wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, + wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); +const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2), + ALL_SIZES(convolve10_sse2), + ALL_SIZES(convolve12_sse2) }; +#else +const ConvolveFunctions convolve8_sse2( + aom_convolve_copy_sse2, aom_convolve_avg_sse2, aom_convolve8_horiz_sse2, + aom_convolve8_avg_horiz_sse2, aom_convolve8_vert_sse2, + aom_convolve8_avg_vert_sse2, aom_convolve8_sse2, aom_convolve8_avg_sse2, + aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c, + aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0); + +const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) }; +#endif // CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve_sse2)); +#endif + +#if HAVE_SSSE3 +const ConvolveFunctions convolve8_ssse3( + aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_ssse3, + aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_ssse3, + aom_convolve8_avg_vert_ssse3, aom_convolve8_ssse3, aom_convolve8_avg_ssse3, + aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c, + aom_scaled_avg_vert_c, aom_scaled_2d_ssse3, aom_scaled_avg_2d_c, 0); + +const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) }; +INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_ssse3)); +#endif + +#if HAVE_AVX2 +#if CONFIG_HIGHBITDEPTH +const ConvolveFunctions convolve8_avx2( + wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8, + wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8, + wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8, + wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8, + wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, + wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); +const ConvolveFunctions convolve10_avx2( + wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10, + wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10, + wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10, + wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10, + wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, + wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, + wrap_convolve8_avg_c_10, 10); +const ConvolveFunctions convolve12_avx2( + wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12, + wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12, + wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12, + wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12, + wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, + wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, + wrap_convolve8_avg_c_12, 12); +const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2), + ALL_SIZES(convolve10_avx2), + ALL_SIZES(convolve12_avx2) }; +#else +const ConvolveFunctions convolve8_avx2( + aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_avx2, + aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_avx2, + aom_convolve8_avg_vert_ssse3, aom_convolve8_avx2, aom_convolve8_avg_ssse3, + aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c, + aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0); + +const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) }; +#endif // CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_avx2)); +#endif // HAVE_AVX2 + +// TODO(any): Make NEON versions support 128x128 128x64 64x128 block sizes +#if HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION) +#if HAVE_NEON_ASM +const ConvolveFunctions convolve8_neon( + aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon, + aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon, + aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon, + aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c, + aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0); +#else // HAVE_NEON +const ConvolveFunctions convolve8_neon( + aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon, + aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon, + aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon, + aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c, + aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0); +#endif // HAVE_NEON_ASM + +const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) }; +INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_neon)); +#endif // HAVE_NEON + +// TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes +#if HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION) +const ConvolveFunctions convolve8_dspr2( + aom_convolve_copy_dspr2, aom_convolve_avg_dspr2, aom_convolve8_horiz_dspr2, + aom_convolve8_avg_horiz_dspr2, aom_convolve8_vert_dspr2, + aom_convolve8_avg_vert_dspr2, aom_convolve8_dspr2, aom_convolve8_avg_dspr2, + aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c, + aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0); + +const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) }; +INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_dspr2)); +#endif // HAVE_DSPR2 + +// TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes +#if HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION) +const ConvolveFunctions convolve8_msa( + aom_convolve_copy_msa, aom_convolve_avg_msa, aom_convolve8_horiz_msa, + aom_convolve8_avg_horiz_msa, aom_convolve8_vert_msa, + aom_convolve8_avg_vert_msa, aom_convolve8_msa, aom_convolve8_avg_msa, + aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c, + aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0); + +const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) }; +INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_msa)); +#endif // HAVE_MSA +} // namespace diff --git a/third_party/aom/test/cpu_speed_test.cc b/third_party/aom/test/cpu_speed_test.cc new file mode 100644 index 000000000..9b7966462 --- /dev/null +++ b/third_party/aom/test/cpu_speed_test.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { + +const int kMaxPSNR = 100; + +class CpuSpeedTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + CpuSpeedTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR), + tune_content_(AOM_CONTENT_DEFAULT) {} + virtual ~CpuSpeedTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 25; + cfg_.rc_end_usage = AOM_VBR; + } else { + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = AOM_CBR; + } + } + + virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = kMaxPSNR; } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_TUNE_CONTENT, tune_content_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + if (pkt->data.psnr.psnr[0] < min_psnr_) min_psnr_ = pkt->data.psnr.psnr[0]; + } + + void TestQ0(); + void TestScreencastQ0(); + void TestTuneScreen(); + void TestEncodeHighBitrate(); + void TestLowBitrate(); + + ::libaom_test::TestMode encoding_mode_; + int set_cpu_used_; + double min_psnr_; + int tune_content_; +}; + +void CpuSpeedTest::TestQ0() { + // Validate that this non multiple of 64 wide clip encodes and decodes + // without a mismatch when passing in a very low max q. This pushes + // the encoder to producing lots of big partitions which will likely + // extend into the border and test the border condition. + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 400; + cfg_.rc_max_quantizer = 0; + cfg_.rc_min_quantizer = 0; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 10); + + init_flags_ = AOM_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_GE(min_psnr_, kMaxPSNR); +} + +void CpuSpeedTest::TestScreencastQ0() { + ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 10); + cfg_.g_timebase = video.timebase(); + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 400; + cfg_.rc_max_quantizer = 0; + cfg_.rc_min_quantizer = 0; + + init_flags_ = AOM_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_GE(min_psnr_, kMaxPSNR); +} + +void CpuSpeedTest::TestTuneScreen() { + ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 10); + cfg_.g_timebase = video.timebase(); + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_target_bitrate = 2000; + cfg_.rc_max_quantizer = 63; + cfg_.rc_min_quantizer = 0; + tune_content_ = AOM_CONTENT_SCREEN; + + init_flags_ = AOM_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +void CpuSpeedTest::TestEncodeHighBitrate() { + // Validate that this non multiple of 64 wide clip encodes and decodes + // without a mismatch when passing in a very low max q. This pushes + // the encoder to producing lots of big partitions which will likely + // extend into the border and test the border condition. + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 12000; + cfg_.rc_max_quantizer = 10; + cfg_.rc_min_quantizer = 0; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 10); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +void CpuSpeedTest::TestLowBitrate() { + // Validate that this clip encodes and decodes without a mismatch + // when passing in a very high min q. This pushes the encoder to producing + // lots of small partitions which might will test the other condition. + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 200; + cfg_.rc_min_quantizer = 40; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 10); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +TEST_P(CpuSpeedTest, TestQ0) { TestQ0(); } +TEST_P(CpuSpeedTest, TestScreencastQ0) { TestScreencastQ0(); } +TEST_P(CpuSpeedTest, TestTuneScreen) { TestTuneScreen(); } +TEST_P(CpuSpeedTest, TestEncodeHighBitrate) { TestEncodeHighBitrate(); } +TEST_P(CpuSpeedTest, TestLowBitrate) { TestLowBitrate(); } + +class CpuSpeedTestLarge : public CpuSpeedTest {}; + +TEST_P(CpuSpeedTestLarge, TestQ0) { TestQ0(); } +TEST_P(CpuSpeedTestLarge, TestScreencastQ0) { TestScreencastQ0(); } +TEST_P(CpuSpeedTestLarge, TestTuneScreen) { TestTuneScreen(); } +TEST_P(CpuSpeedTestLarge, TestEncodeHighBitrate) { TestEncodeHighBitrate(); } +TEST_P(CpuSpeedTestLarge, TestLowBitrate) { TestLowBitrate(); } + +AV1_INSTANTIATE_TEST_CASE(CpuSpeedTest, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Range(1, 3)); +AV1_INSTANTIATE_TEST_CASE(CpuSpeedTestLarge, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Range(0, 1)); +} // namespace diff --git a/third_party/aom/test/datarate_test.cc b/third_party/aom/test/datarate_test.cc new file mode 100644 index 000000000..48be4a46d --- /dev/null +++ b/third_party/aom/test/datarate_test.cc @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "./aom_config.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "aom/aom_codec.h" + +namespace { + +class DatarateTestLarge + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + public: + DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {} + + protected: + virtual ~DatarateTestLarge() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + ResetModel(); + } + + virtual void ResetModel() { + last_pts_ = 0; + bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; + frame_number_ = 0; + tot_frame_number_ = 0; + first_drop_ = 0; + num_drops_ = 0; + // Denoiser is off by default. + denoiser_on_ = 0; + bits_total_ = 0; + denoiser_offon_test_ = 0; + denoiser_offon_period_ = -1; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 0) encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + + if (denoiser_offon_test_) { + ASSERT_GT(denoiser_offon_period_, 0) + << "denoiser_offon_period_ is not positive."; + if ((video->frame() + 1) % denoiser_offon_period_ == 0) { + // Flip denoiser_on_ periodically + denoiser_on_ ^= 1; + } + } + + encoder->Control(AV1E_SET_NOISE_SENSITIVITY, denoiser_on_); + + const aom_rational_t tb = video->timebase(); + timebase_ = static_cast<double>(tb.num) / tb.den; + duration_ = 0; + } + + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + // Time since last timestamp = duration. + aom_codec_pts_t duration = pkt->data.frame.pts - last_pts_; + + if (duration > 1) { + // If first drop not set and we have a drop set it to this time. + if (!first_drop_) first_drop_ = last_pts_ + 1; + // Update the number of frame drops. + num_drops_ += static_cast<int>(duration - 1); + // Update counter for total number of frames (#frames input to encoder). + // Needed for setting the proper layer_id below. + tot_frame_number_ += static_cast<int>(duration - 1); + } + + // Add to the buffer the bits we'd expect from a constant bitrate server. + bits_in_buffer_model_ += static_cast<int64_t>( + duration * timebase_ * cfg_.rc_target_bitrate * 1000); + + // Buffer should not go negative. + ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame " + << pkt->data.frame.pts; + + const size_t frame_size_in_bits = pkt->data.frame.sz * 8; + + // Update the total encoded bits. + bits_total_ += frame_size_in_bits; + + // Update the most recent pts. + last_pts_ = pkt->data.frame.pts; + ++frame_number_; + ++tot_frame_number_; + } + + virtual void EndPassHook(void) { + duration_ = (last_pts_ + 1) * timebase_; + // Effective file datarate: + effective_datarate_ = (bits_total_ / 1000.0) / duration_; + } + + aom_codec_pts_t last_pts_; + double timebase_; + int frame_number_; // Counter for number of non-dropped/encoded frames. + int tot_frame_number_; // Counter for total number of input frames. + int64_t bits_total_; + double duration_; + double effective_datarate_; + int set_cpu_used_; + int64_t bits_in_buffer_model_; + aom_codec_pts_t first_drop_; + int num_drops_; + int denoiser_on_; + int denoiser_offon_test_; + int denoiser_offon_period_; +}; + +// Check basic rate targeting for VBR mode. +TEST_P(DatarateTestLarge, BasicRateTargetingVBR) { + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_error_resilient = 0; + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 140); + for (int i = 400; i <= 800; i += 400) { + cfg_.rc_target_bitrate = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.75) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.25) + << " The datarate for the file is greater than target by too much!"; + } +} + +// Check basic rate targeting for CBR, +TEST_P(DatarateTestLarge, BasicRateTargeting) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 140); + for (int i = 150; i < 800; i += 400) { + cfg_.rc_target_bitrate = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; + } +} + +// Check basic rate targeting for CBR. +TEST_P(DatarateTestLarge, BasicRateTargeting444) { + ::libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140); + + cfg_.g_profile = 1; + cfg_.g_timebase = video.timebase(); + + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + + for (int i = 250; i < 900; i += 400) { + cfg_.rc_target_bitrate = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 1.15) + << " The datarate for the file missed the target!" + << cfg_.rc_target_bitrate << " " << effective_datarate_; + } +} + +// Check that (1) the first dropped frame gets earlier and earlier +// as the drop frame threshold is increased, and (2) that the total number of +// frame drops does not decrease as we increase frame drop threshold. +// Use a lower qp-max to force some frame drops. +TEST_P(DatarateTestLarge, ChangingDropFrameThresh) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_dropframe_thresh = 10; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 50; + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_target_bitrate = 200; + cfg_.g_lag_in_frames = 0; + // TODO(marpan): Investigate datarate target failures with a smaller keyframe + // interval (128). + cfg_.kf_max_dist = 9999; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 100); + + const int kDropFrameThreshTestStep = 30; + aom_codec_pts_t last_drop = 140; + int last_num_drops = 0; + for (int i = 40; i < 100; i += kDropFrameThreshTestStep) { + cfg_.rc_dropframe_thresh = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; + ASSERT_LE(first_drop_, last_drop) + << " The first dropped frame for drop_thresh " << i + << " > first dropped frame for drop_thresh " + << i - kDropFrameThreshTestStep; + ASSERT_GE(num_drops_, last_num_drops * 0.85) + << " The number of dropped frames for drop_thresh " << i + << " < number of dropped frames for drop_thresh " + << i - kDropFrameThreshTestStep; + last_drop = first_drop_; + last_num_drops = num_drops_; + } +} + +AV1_INSTANTIATE_TEST_CASE(DatarateTestLarge, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kRealTime), + ::testing::Range(2, 9, 2)); +} // namespace diff --git a/third_party/aom/test/dct16x16_test.cc b/third_party/aom/test/dct16x16_test.cc new file mode 100644 index 000000000..89263ce89 --- /dev/null +++ b/third_party/aom/test/dct16x16_test.cc @@ -0,0 +1,876 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/entropy.h" +#include "av1/common/scan.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem.h" +#include "aom_ports/msvc.h" // for round() + +using libaom_test::ACMRandom; + +namespace { + +const int kNumCoeffs = 256; +const double C1 = 0.995184726672197; +const double C2 = 0.98078528040323; +const double C3 = 0.956940335732209; +const double C4 = 0.923879532511287; +const double C5 = 0.881921264348355; +const double C6 = 0.831469612302545; +const double C7 = 0.773010453362737; +const double C8 = 0.707106781186548; +const double C9 = 0.634393284163646; +const double C10 = 0.555570233019602; +const double C11 = 0.471396736825998; +const double C12 = 0.38268343236509; +const double C13 = 0.290284677254462; +const double C14 = 0.195090322016128; +const double C15 = 0.098017140329561; + +void butterfly_16x16_dct_1d(double input[16], double output[16]) { + double step[16]; + double intermediate[16]; + double temp1, temp2; + + // step 1 + step[0] = input[0] + input[15]; + step[1] = input[1] + input[14]; + step[2] = input[2] + input[13]; + step[3] = input[3] + input[12]; + step[4] = input[4] + input[11]; + step[5] = input[5] + input[10]; + step[6] = input[6] + input[9]; + step[7] = input[7] + input[8]; + step[8] = input[7] - input[8]; + step[9] = input[6] - input[9]; + step[10] = input[5] - input[10]; + step[11] = input[4] - input[11]; + step[12] = input[3] - input[12]; + step[13] = input[2] - input[13]; + step[14] = input[1] - input[14]; + step[15] = input[0] - input[15]; + + // step 2 + output[0] = step[0] + step[7]; + output[1] = step[1] + step[6]; + output[2] = step[2] + step[5]; + output[3] = step[3] + step[4]; + output[4] = step[3] - step[4]; + output[5] = step[2] - step[5]; + output[6] = step[1] - step[6]; + output[7] = step[0] - step[7]; + + temp1 = step[8] * C7; + temp2 = step[15] * C9; + output[8] = temp1 + temp2; + + temp1 = step[9] * C11; + temp2 = step[14] * C5; + output[9] = temp1 - temp2; + + temp1 = step[10] * C3; + temp2 = step[13] * C13; + output[10] = temp1 + temp2; + + temp1 = step[11] * C15; + temp2 = step[12] * C1; + output[11] = temp1 - temp2; + + temp1 = step[11] * C1; + temp2 = step[12] * C15; + output[12] = temp2 + temp1; + + temp1 = step[10] * C13; + temp2 = step[13] * C3; + output[13] = temp2 - temp1; + + temp1 = step[9] * C5; + temp2 = step[14] * C11; + output[14] = temp2 + temp1; + + temp1 = step[8] * C9; + temp2 = step[15] * C7; + output[15] = temp2 - temp1; + + // step 3 + step[0] = output[0] + output[3]; + step[1] = output[1] + output[2]; + step[2] = output[1] - output[2]; + step[3] = output[0] - output[3]; + + temp1 = output[4] * C14; + temp2 = output[7] * C2; + step[4] = temp1 + temp2; + + temp1 = output[5] * C10; + temp2 = output[6] * C6; + step[5] = temp1 + temp2; + + temp1 = output[5] * C6; + temp2 = output[6] * C10; + step[6] = temp2 - temp1; + + temp1 = output[4] * C2; + temp2 = output[7] * C14; + step[7] = temp2 - temp1; + + step[8] = output[8] + output[11]; + step[9] = output[9] + output[10]; + step[10] = output[9] - output[10]; + step[11] = output[8] - output[11]; + + step[12] = output[12] + output[15]; + step[13] = output[13] + output[14]; + step[14] = output[13] - output[14]; + step[15] = output[12] - output[15]; + + // step 4 + output[0] = (step[0] + step[1]); + output[8] = (step[0] - step[1]); + + temp1 = step[2] * C12; + temp2 = step[3] * C4; + temp1 = temp1 + temp2; + output[4] = 2 * (temp1 * C8); + + temp1 = step[2] * C4; + temp2 = step[3] * C12; + temp1 = temp2 - temp1; + output[12] = 2 * (temp1 * C8); + + output[2] = 2 * ((step[4] + step[5]) * C8); + output[14] = 2 * ((step[7] - step[6]) * C8); + + temp1 = step[4] - step[5]; + temp2 = step[6] + step[7]; + output[6] = (temp1 + temp2); + output[10] = (temp1 - temp2); + + intermediate[8] = step[8] + step[14]; + intermediate[9] = step[9] + step[15]; + + temp1 = intermediate[8] * C12; + temp2 = intermediate[9] * C4; + temp1 = temp1 - temp2; + output[3] = 2 * (temp1 * C8); + + temp1 = intermediate[8] * C4; + temp2 = intermediate[9] * C12; + temp1 = temp2 + temp1; + output[13] = 2 * (temp1 * C8); + + output[9] = 2 * ((step[10] + step[11]) * C8); + + intermediate[11] = step[10] - step[11]; + intermediate[12] = step[12] + step[13]; + intermediate[13] = step[12] - step[13]; + intermediate[14] = step[8] - step[14]; + intermediate[15] = step[9] - step[15]; + + output[15] = (intermediate[11] + intermediate[12]); + output[1] = -(intermediate[11] - intermediate[12]); + + output[7] = 2 * (intermediate[13] * C8); + + temp1 = intermediate[14] * C12; + temp2 = intermediate[15] * C4; + temp1 = temp1 - temp2; + output[11] = -2 * (temp1 * C8); + + temp1 = intermediate[14] * C4; + temp2 = intermediate[15] * C12; + temp1 = temp2 + temp1; + output[5] = 2 * (temp1 * C8); +} + +void reference_16x16_dct_2d(int16_t input[256], double output[256]) { + // First transform columns + for (int i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (int j = 0; j < 16; ++j) temp_in[j] = input[j * 16 + i]; + butterfly_16x16_dct_1d(temp_in, temp_out); + for (int j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j]; + } + // Then transform rows + for (int i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (int j = 0; j < 16; ++j) temp_in[j] = output[j + i * 16]; + butterfly_16x16_dct_1d(temp_in, temp_out); + // Scale by some magic number + for (int j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j] / 2; + } +} + +typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); +typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); +typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, + int tx_type); +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); + +typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t> Dct16x16Param; +typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t> Ht16x16Param; +typedef std::tr1::tuple<IdctFunc, IdctFunc, int, aom_bit_depth_t> + Idct16x16Param; + +void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride, + int /*tx_type*/) { + aom_fdct16x16_c(in, out, stride); +} + +void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, + int /*tx_type*/) { + aom_idct16x16_256_add_c(in, dest, stride); +} + +void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht16x16_c(in, out, stride, tx_type); +} + +void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, + int tx_type) { + av1_iht16x16_256_add_c(in, dest, stride, tx_type); +} + +#if CONFIG_HIGHBITDEPTH +void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10); +} + +void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12); +} +#endif // CONFIG_HIGHBITDEPTH + +class Trans16x16TestBase { + public: + virtual ~Trans16x16TestBase() {} + + protected: + virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0; + + virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0; + + void RunAccuracyCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + uint32_t max_error = 0; + int64_t total_error = 0; + const int count_test_block = 10000; + for (int i = 0; i < count_test_block; ++i) { + DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); +#endif + + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < kNumCoeffs; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + test_input_block[j] = src16[j] - dst16[j]; +#endif + } + } + + ASM_REGISTER_STATE_CHECK( + RunFwdTxfm(test_input_block, test_temp_block, pitch_)); + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif + } + + for (int j = 0; j < kNumCoeffs; ++j) { +#if CONFIG_HIGHBITDEPTH + const int32_t diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + const int32_t diff = dst[j] - src[j]; +#endif + const uint32_t error = diff * diff; + if (max_error < error) max_error = error; + total_error += error; + } + } + + EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error) + << "Error: 16x16 FHT/IHT has an individual round trip error > 1"; + + EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error) + << "Error: 16x16 FHT/IHT has average round trip error > 1 per block"; + } + + void RunCoeffCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < kNumCoeffs; ++j) + input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + + fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_); + ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_)); + + // The minimum quant value is 4. + for (int j = 0; j < kNumCoeffs; ++j) + EXPECT_EQ(output_block[j], output_ref_block[j]); + } + } + + void RunMemCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < kNumCoeffs; ++j) { + input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; + } + if (i == 0) { + for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_; + } else if (i == 1) { + for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_; + } + + fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); + ASM_REGISTER_STATE_CHECK( + RunFwdTxfm(input_extreme_block, output_block, pitch_)); + + // The minimum quant value is 4. + for (int j = 0; j < kNumCoeffs; ++j) { + EXPECT_EQ(output_block[j], output_ref_block[j]); + EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j])) + << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE"; + } + } + } + + void RunQuantCheck(int dc_thred, int ac_thred) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 100000; + DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); + + DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]); +#endif + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < kNumCoeffs; ++j) { + input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; + } + if (i == 0) + for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_; + if (i == 1) + for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_; + + fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); + + // clear reconstructed pixel buffers + memset(dst, 0, kNumCoeffs * sizeof(uint8_t)); + memset(ref, 0, kNumCoeffs * sizeof(uint8_t)); +#if CONFIG_HIGHBITDEPTH + memset(dst16, 0, kNumCoeffs * sizeof(uint16_t)); + memset(ref16, 0, kNumCoeffs * sizeof(uint16_t)); +#endif + + // quantization with maximum allowed step sizes + output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred; + for (int j = 1; j < kNumCoeffs; ++j) + output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred; + if (bit_depth_ == AOM_BITS_8) { + inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_); + ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_)); +#if CONFIG_HIGHBITDEPTH + } else { + inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_, + tx_type_); + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif + } + if (bit_depth_ == AOM_BITS_8) { + for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref[j], dst[j]); +#if CONFIG_HIGHBITDEPTH + } else { + for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref16[j], dst16[j]); +#endif + } + } + } + + void RunInvAccuracyCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); +#endif // CONFIG_HIGHBITDEPTH + + for (int i = 0; i < count_test_block; ++i) { + double out_r[kNumCoeffs]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < kNumCoeffs; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + in[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + in[j] = src16[j] - dst16[j]; +#endif // CONFIG_HIGHBITDEPTH + } + } + + reference_16x16_dct_2d(in, out_r); + for (int j = 0; j < kNumCoeffs; ++j) + coeff[j] = static_cast<tran_low_t>(round(out_r[j])); + + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16)); +#endif // CONFIG_HIGHBITDEPTH + } + + for (int j = 0; j < kNumCoeffs; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + const int diff = dst[j] - src[j]; +#endif // CONFIG_HIGHBITDEPTH + const uint32_t error = diff * diff; + EXPECT_GE(1u, error) << "Error: 16x16 IDCT has error " << error + << " at index " << j; + } + } + } + + void CompareInvReference(IdctFunc ref_txfm, int thresh) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 10000; + const int eob = 10; + const int16_t *scan = av1_default_scan_orders[TX_16X16].scan; + DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]); +#endif // CONFIG_HIGHBITDEPTH + + for (int i = 0; i < count_test_block; ++i) { + for (int j = 0; j < kNumCoeffs; ++j) { + if (j < eob) { + // Random values less than the threshold, either positive or negative + coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2)); + } else { + coeff[scan[j]] = 0; + } + if (bit_depth_ == AOM_BITS_8) { + dst[j] = 0; + ref[j] = 0; +#if CONFIG_HIGHBITDEPTH + } else { + dst16[j] = 0; + ref16[j] = 0; +#endif // CONFIG_HIGHBITDEPTH + } + } + if (bit_depth_ == AOM_BITS_8) { + ref_txfm(coeff, ref, pitch_); + ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); + } else { +#if CONFIG_HIGHBITDEPTH + ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_); + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif // CONFIG_HIGHBITDEPTH + } + + for (int j = 0; j < kNumCoeffs; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j]; +#else + const int diff = dst[j] - ref[j]; +#endif // CONFIG_HIGHBITDEPTH + const uint32_t error = diff * diff; + EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error " + << error << " at index " << j; + } + } + } + + int pitch_; + int tx_type_; + aom_bit_depth_t bit_depth_; + int mask_; + FhtFunc fwd_txfm_ref; + IhtFunc inv_txfm_ref; +}; + +class Trans16x16DCT : public Trans16x16TestBase, + public ::testing::TestWithParam<Dct16x16Param> { + public: + virtual ~Trans16x16DCT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + bit_depth_ = GET_PARAM(3); + pitch_ = 16; + fwd_txfm_ref = fdct16x16_ref; + inv_txfm_ref = idct16x16_ref; + mask_ = (1 << bit_depth_) - 1; + inv_txfm_ref = idct16x16_ref; + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride); + } + void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride); + } + + FdctFunc fwd_txfm_; + IdctFunc inv_txfm_; +}; + +TEST_P(Trans16x16DCT, AccuracyCheck) { RunAccuracyCheck(); } + +TEST_P(Trans16x16DCT, CoeffCheck) { RunCoeffCheck(); } + +TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); } + +TEST_P(Trans16x16DCT, QuantCheck) { + // Use maximally allowed quantization step sizes for DC and AC + // coefficients respectively. + RunQuantCheck(1336, 1828); +} + +TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); } + +class Trans16x16HT : public Trans16x16TestBase, + public ::testing::TestWithParam<Ht16x16Param> { + public: + virtual ~Trans16x16HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + bit_depth_ = GET_PARAM(3); + pitch_ = 16; + fwd_txfm_ref = fht16x16_ref; + inv_txfm_ref = iht16x16_ref; + mask_ = (1 << bit_depth_) - 1; +#if CONFIG_HIGHBITDEPTH + switch (bit_depth_) { + case AOM_BITS_10: inv_txfm_ref = iht16x16_10; break; + case AOM_BITS_12: inv_txfm_ref = iht16x16_12; break; + default: inv_txfm_ref = iht16x16_ref; break; + } +#else + inv_txfm_ref = iht16x16_ref; +#endif + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(); } + +TEST_P(Trans16x16HT, CoeffCheck) { RunCoeffCheck(); } + +TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); } + +TEST_P(Trans16x16HT, QuantCheck) { + // The encoder skips any non-DC intra prediction modes, + // when the quantization step size goes beyond 988. + RunQuantCheck(429, 729); +} + +class InvTrans16x16DCT : public Trans16x16TestBase, + public ::testing::TestWithParam<Idct16x16Param> { + public: + virtual ~InvTrans16x16DCT() {} + + virtual void SetUp() { + ref_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + thresh_ = GET_PARAM(2); + bit_depth_ = GET_PARAM(3); + pitch_ = 16; + mask_ = (1 << bit_depth_) - 1; + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {} + void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride); + } + + IdctFunc ref_txfm_; + IdctFunc inv_txfm_; + int thresh_; +}; + +TEST_P(InvTrans16x16DCT, CompareReference) { + CompareInvReference(ref_txfm_, thresh_); +} + +class PartialTrans16x16Test : public ::testing::TestWithParam< + std::tr1::tuple<FdctFunc, aom_bit_depth_t> > { + public: + virtual ~PartialTrans16x16Test() {} + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + bit_depth_ = GET_PARAM(1); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + aom_bit_depth_t bit_depth_; + FdctFunc fwd_txfm_; +}; + +TEST_P(PartialTrans16x16Test, Extremes) { +#if CONFIG_HIGHBITDEPTH + const int16_t maxval = + static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + const int minval = -maxval; + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]); +} + +TEST_P(PartialTrans16x16Test, Random) { +#if CONFIG_HIGHBITDEPTH + const int16_t maxval = + static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + int sum = 0; + for (int i = 0; i < kNumCoeffs; ++i) { + const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1); + input[i] = val; + sum += val; + } + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ(sum >> 1, output[0]); +} + +using std::tr1::make_tuple; + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT, + ::testing::Values(make_tuple(&aom_fdct16x16_c, + &aom_idct16x16_256_add_c, + 0, AOM_BITS_8))); +#else +INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT, + ::testing::Values(make_tuple(&aom_fdct16x16_c, + &aom_idct16x16_256_add_c, + 0, AOM_BITS_8))); +#endif // CONFIG_HIGHBITDEPTH + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + C, Trans16x16HT, + ::testing::Values( + make_tuple(&av1_highbd_fht16x16_c, &iht16x16_10, 0, AOM_BITS_10), + make_tuple(&av1_highbd_fht16x16_c, &iht16x16_10, 1, AOM_BITS_10), + make_tuple(&av1_highbd_fht16x16_c, &iht16x16_10, 2, AOM_BITS_10), + make_tuple(&av1_highbd_fht16x16_c, &iht16x16_10, 3, AOM_BITS_10), + make_tuple(&av1_highbd_fht16x16_c, &iht16x16_12, 0, AOM_BITS_12), + make_tuple(&av1_highbd_fht16x16_c, &iht16x16_12, 1, AOM_BITS_12), + make_tuple(&av1_highbd_fht16x16_c, &iht16x16_12, 2, AOM_BITS_12), + make_tuple(&av1_highbd_fht16x16_c, &iht16x16_12, 3, AOM_BITS_12), + make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 0, AOM_BITS_8), + make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 1, AOM_BITS_8), + make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 2, AOM_BITS_8), + make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 3, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P( + C, PartialTrans16x16Test, + ::testing::Values(make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_8), + make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_10), + make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_12))); +#else +INSTANTIATE_TEST_CASE_P( + C, Trans16x16HT, + ::testing::Values( + make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 0, AOM_BITS_8), + make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 1, AOM_BITS_8), + make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 2, AOM_BITS_8), + make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 3, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test, + ::testing::Values(make_tuple(&aom_fdct16x16_1_c, + AOM_BITS_8))); +#endif // CONFIG_HIGHBITDEPTH + +#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + NEON, Trans16x16DCT, + ::testing::Values(make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_neon, + 0, AOM_BITS_8))); +#endif + +#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE2, Trans16x16DCT, + ::testing::Values(make_tuple(&aom_fdct16x16_sse2, + &aom_idct16x16_256_add_sse2, 0, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P( + SSE2, Trans16x16HT, + ::testing::Values(make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, + 0, AOM_BITS_8), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, + 1, AOM_BITS_8), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, + 2, AOM_BITS_8), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, + 3, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test, + ::testing::Values(make_tuple(&aom_fdct16x16_1_sse2, + AOM_BITS_8))); +#endif // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(AVX2, PartialTrans16x16Test, + ::testing::Values(make_tuple(&aom_fdct16x16_1_avx2, + AOM_BITS_8))); +#endif // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(SSE2, Trans16x16DCT, + ::testing::Values(make_tuple(&aom_fdct16x16_sse2, + &aom_idct16x16_256_add_c, + 0, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P( + SSE2, Trans16x16HT, + ::testing::Values( + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 0, AOM_BITS_8), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 1, AOM_BITS_8), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 2, AOM_BITS_8), + make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 3, + AOM_BITS_8))); +// TODO(luoyi): +// For this test case, we should test function: aom_highbd_fdct16x16_1_sse2. +// However this function is not available yet. if we mistakely test +// aom_fdct16x16_1_sse2, it could only pass AOM_BITS_8/AOM_BITS_10 but not +// AOM_BITS_12. +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test, + ::testing::Values(make_tuple(&aom_fdct16x16_1_sse2, + AOM_BITS_8))); +#endif // HAVE_SSE2 && CONFIG_HIGHBITDEPTH + +#if HAVE_MSA && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT, + ::testing::Values(make_tuple(&aom_fdct16x16_msa, + &aom_idct16x16_256_add_msa, + 0, AOM_BITS_8))); +#if !CONFIG_EXT_TX +// TODO(yaowu): re-enable this after msa versions are updated to match C. +INSTANTIATE_TEST_CASE_P( + DISABLED_MSA, Trans16x16HT, + ::testing::Values( + make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 0, AOM_BITS_8), + make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 1, AOM_BITS_8), + make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 2, AOM_BITS_8), + make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 3, + AOM_BITS_8))); +#endif // !CONFIG_EXT_TX +INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test, + ::testing::Values(make_tuple(&aom_fdct16x16_1_msa, + AOM_BITS_8))); +#endif // HAVE_MSA && !CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/dct32x32_test.cc b/third_party/aom/test/dct32x32_test.cc new file mode 100644 index 000000000..7c1db6501 --- /dev/null +++ b/third_party/aom/test/dct32x32_test.cc @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/entropy.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem.h" +#include "aom_ports/msvc.h" // for round() + +using libaom_test::ACMRandom; + +namespace { + +const int kNumCoeffs = 1024; +const double kPi = 3.141592653589793238462643383279502884; +void reference_32x32_dct_1d(const double in[32], double out[32]) { + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < 32; k++) { + out[k] = 0.0; + for (int n = 0; n < 32; n++) + out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0); + if (k == 0) out[k] = out[k] * kInvSqrt2; + } +} + +void reference_32x32_dct_2d(const int16_t input[kNumCoeffs], + double output[kNumCoeffs]) { + // First transform columns + for (int i = 0; i < 32; ++i) { + double temp_in[32], temp_out[32]; + for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i]; + reference_32x32_dct_1d(temp_in, temp_out); + for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j]; + } + // Then transform rows + for (int i = 0; i < 32; ++i) { + double temp_in[32], temp_out[32]; + for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; + reference_32x32_dct_1d(temp_in, temp_out); + // Scale by some magic number + for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4; + } +} + +typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); +typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); + +typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, aom_bit_depth_t> + Trans32x32Param; + +class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> { + public: + virtual ~Trans32x32Test() {} + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + version_ = GET_PARAM(2); // 0: high precision forward transform + // 1: low precision version for rd loop + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + int version_; + aom_bit_depth_t bit_depth_; + int mask_; + FwdTxfmFunc fwd_txfm_; + InvTxfmFunc inv_txfm_; +}; + +TEST_P(Trans32x32Test, AccuracyCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + uint32_t max_error = 0; + int64_t total_error = 0; + const int count_test_block = 10000; + DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); +#endif + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < kNumCoeffs; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + test_input_block[j] = src16[j] - dst16[j]; +#endif + } + } + + ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32)); + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK( + inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32)); +#endif + } + + for (int j = 0; j < kNumCoeffs; ++j) { +#if CONFIG_HIGHBITDEPTH + const int32_t diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + const int32_t diff = dst[j] - src[j]; +#endif + const uint32_t error = diff * diff; + if (max_error < error) max_error = error; + total_error += error; + } + } + + if (version_ == 1) { + max_error /= 2; + total_error /= 45; + } + + EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error) + << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1"; + + EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error) + << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block"; +} + +TEST_P(Trans32x32Test, CoeffCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + + DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); + + for (int i = 0; i < count_test_block; ++i) { + for (int j = 0; j < kNumCoeffs; ++j) + input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + + const int stride = 32; + aom_fdct32x32_c(input_block, output_ref_block, stride); + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride)); + + if (version_ == 0) { + for (int j = 0; j < kNumCoeffs; ++j) + EXPECT_EQ(output_block[j], output_ref_block[j]) + << "Error: 32x32 FDCT versions have mismatched coefficients"; + } else { + for (int j = 0; j < kNumCoeffs; ++j) + EXPECT_GE(6, abs(output_block[j] - output_ref_block[j])) + << "Error: 32x32 FDCT rd has mismatched coefficients"; + } + } +} + +TEST_P(Trans32x32Test, MemCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 2000; + + DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < kNumCoeffs; ++j) { + input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_; + } + if (i == 0) { + for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_; + } else if (i == 1) { + for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_; + } + + const int stride = 32; + aom_fdct32x32_c(input_extreme_block, output_ref_block, stride); + ASM_REGISTER_STATE_CHECK( + fwd_txfm_(input_extreme_block, output_block, stride)); + + // The minimum quant value is 4. + for (int j = 0; j < kNumCoeffs; ++j) { + if (version_ == 0) { + EXPECT_EQ(output_block[j], output_ref_block[j]) + << "Error: 32x32 FDCT versions have mismatched coefficients"; + } else { + EXPECT_GE(6, abs(output_block[j] - output_ref_block[j])) + << "Error: 32x32 FDCT rd has mismatched coefficients"; + } + EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j])) + << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE"; + EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j])) + << "Error: 32x32 FDCT has coefficient larger than " + << "4*DCT_MAX_VALUE"; + } + } +} + +TEST_P(Trans32x32Test, InverseAccuracy) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); +#endif + + for (int i = 0; i < count_test_block; ++i) { + double out_r[kNumCoeffs]; + + // Initialize a test block with input range [-255, 255] + for (int j = 0; j < kNumCoeffs; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + in[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + in[j] = src16[j] - dst16[j]; +#endif + } + } + + reference_32x32_dct_2d(in, out_r); + for (int j = 0; j < kNumCoeffs; ++j) + coeff[j] = static_cast<tran_low_t>(round(out_r[j])); + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32)); +#endif + } + for (int j = 0; j < kNumCoeffs; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + const int diff = dst[j] - src[j]; +#endif + const int error = diff * diff; + EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error + << " at index " << j; + } + } +} + +class PartialTrans32x32Test + : public ::testing::TestWithParam< + std::tr1::tuple<FwdTxfmFunc, aom_bit_depth_t> > { + public: + virtual ~PartialTrans32x32Test() {} + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + bit_depth_ = GET_PARAM(1); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + aom_bit_depth_t bit_depth_; + FwdTxfmFunc fwd_txfm_; +}; + +TEST_P(PartialTrans32x32Test, Extremes) { +#if CONFIG_HIGHBITDEPTH + const int16_t maxval = + static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + const int minval = -maxval; + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32)); + EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32)); + EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]); +} + +TEST_P(PartialTrans32x32Test, Random) { +#if CONFIG_HIGHBITDEPTH + const int16_t maxval = + static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + int sum = 0; + for (int i = 0; i < kNumCoeffs; ++i) { + const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1); + input[i] = val; + sum += val; + } + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32)); + EXPECT_EQ(sum >> 3, output[0]); +} + +using std::tr1::make_tuple; + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + C, Trans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c, 0, + AOM_BITS_8), + make_tuple(&aom_fdct32x32_rd_c, &aom_idct32x32_1024_add_c, + 1, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P( + C, PartialTrans32x32Test, + ::testing::Values(make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_8), + make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_10), + make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_12))); +#else +INSTANTIATE_TEST_CASE_P( + C, Trans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c, 0, + AOM_BITS_8), + make_tuple(&aom_fdct32x32_rd_c, &aom_idct32x32_1024_add_c, + 1, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_1_c, + AOM_BITS_8))); +#endif // CONFIG_HIGHBITDEPTH + +#if HAVE_NEON && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + NEON, Trans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_neon, + 0, AOM_BITS_8), + make_tuple(&aom_fdct32x32_rd_c, + &aom_idct32x32_1024_add_neon, 1, AOM_BITS_8))); +#endif // HAVE_NEON && !CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE2, Trans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_sse2, + &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8), + make_tuple(&aom_fdct32x32_rd_sse2, + &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_1_sse2, + AOM_BITS_8))); +#endif // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(AVX2, PartialTrans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_1_avx2, + AOM_BITS_8))); +#endif // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE2, Trans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_sse2, &aom_idct32x32_1024_add_c, + 0, AOM_BITS_8), + make_tuple(&aom_fdct32x32_rd_sse2, + &aom_idct32x32_1024_add_c, 1, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_1_sse2, + AOM_BITS_8))); +#endif // HAVE_SSE2 && CONFIG_HIGHBITDEPTH + +#if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + AVX2, Trans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_avx2, + &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8), + make_tuple(&aom_fdct32x32_rd_avx2, + &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8))); +#endif // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_AVX2 && CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + AVX2, Trans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_avx2, + &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8), + make_tuple(&aom_fdct32x32_rd_avx2, + &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8))); +#endif // HAVE_AVX2 && CONFIG_HIGHBITDEPTH + +#if HAVE_MSA && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + MSA, Trans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_msa, + &aom_idct32x32_1024_add_msa, 0, AOM_BITS_8), + make_tuple(&aom_fdct32x32_rd_msa, + &aom_idct32x32_1024_add_msa, 1, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test, + ::testing::Values(make_tuple(&aom_fdct32x32_1_msa, + AOM_BITS_8))); +#endif // HAVE_MSA && !CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/decode_api_test.cc b/third_party/aom/test/decode_api_test.cc new file mode 100644 index 000000000..6bd72a45d --- /dev/null +++ b/third_party/aom/test/decode_api_test.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "test/ivf_video_source.h" +#include "aom/aomdx.h" +#include "aom/aom_decoder.h" + +namespace { + +#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0])) + +TEST(DecodeAPI, InvalidParams) { + static const aom_codec_iface_t *kCodecs[] = { +#if CONFIG_AV1_DECODER + &aom_codec_av1_dx_algo, +#endif + }; + uint8_t buf[1] = { 0 }; + aom_codec_ctx_t dec; + + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_dec_init(NULL, NULL, NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_dec_init(&dec, NULL, NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, NULL, 0, NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, buf, 0, NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_decode(NULL, buf, NELEMENTS(buf), NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_decode(NULL, NULL, NELEMENTS(buf), NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(NULL)); + EXPECT_TRUE(aom_codec_error(NULL) != NULL); + + for (int i = 0; i < NELEMENTS(kCodecs); ++i) { + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_dec_init(NULL, kCodecs[i], NULL, 0)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, kCodecs[i], NULL, 0)); + EXPECT_EQ(AOM_CODEC_UNSUP_BITSTREAM, + aom_codec_decode(&dec, buf, NELEMENTS(buf), NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_decode(&dec, NULL, NELEMENTS(buf), NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(&dec, buf, 0, NULL, 0)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec)); + } +} + +} // namespace diff --git a/third_party/aom/test/decode_perf_test.cc b/third_party/aom/test/decode_perf_test.cc new file mode 100644 index 000000000..ede4f8849 --- /dev/null +++ b/third_party/aom/test/decode_perf_test.cc @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string> +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/ivf_video_source.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/webm_video_source.h" +#include "aom_ports/aom_timer.h" +#include "./ivfenc.h" +#include "./aom_version.h" + +using std::tr1::make_tuple; + +namespace { + +#define VIDEO_NAME 0 +#define THREADS 1 + +const int kMaxPsnr = 100; +const double kUsecsInSec = 1000000.0; +const char kNewEncodeOutputFile[] = "new_encode.ivf"; + +/* + DecodePerfTest takes a tuple of filename + number of threads to decode with + */ +typedef std::tr1::tuple<const char *, unsigned> DecodePerfParam; + +// TODO(jimbankoski): Add actual test vectors here when available. +// const DecodePerfParam kAV1DecodePerfVectors[] = {}; + +/* + In order to reflect real world performance as much as possible, Perf tests + *DO NOT* do any correctness checks. Please run them alongside correctness + tests to ensure proper codec integrity. Furthermore, in this test we + deliberately limit the amount of system calls we make to avoid OS + preemption. + + TODO(joshualitt) create a more detailed perf measurement test to collect + power/temp/min max frame decode times/etc + */ + +class DecodePerfTest : public ::testing::TestWithParam<DecodePerfParam> {}; + +TEST_P(DecodePerfTest, PerfTest) { + const char *const video_name = GET_PARAM(VIDEO_NAME); + const unsigned threads = GET_PARAM(THREADS); + + libaom_test::WebMVideoSource video(video_name); + video.Init(); + + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.threads = threads; + libaom_test::AV1Decoder decoder(cfg, 0); + + aom_usec_timer t; + aom_usec_timer_start(&t); + + for (video.Begin(); video.cxdata() != NULL; video.Next()) { + decoder.DecodeFrame(video.cxdata(), video.frame_size()); + } + + aom_usec_timer_mark(&t); + const double elapsed_secs = double(aom_usec_timer_elapsed(&t)) / kUsecsInSec; + const unsigned frames = video.frame_number(); + const double fps = double(frames) / elapsed_secs; + + printf("{\n"); + printf("\t\"type\" : \"decode_perf_test\",\n"); + printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); + printf("\t\"videoName\" : \"%s\",\n", video_name); + printf("\t\"threadCount\" : %u,\n", threads); + printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs); + printf("\t\"totalFrames\" : %u,\n", frames); + printf("\t\"framesPerSecond\" : %f\n", fps); + printf("}\n"); +} + +// TODO(jimbankoski): Enabled when we have actual AV1 Decode vectors. +// INSTANTIATE_TEST_CASE_P(AV1, DecodePerfTest, +// ::testing::ValuesIn(kAV1DecodePerfVectors)); + +class AV1NewEncodeDecodePerfTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> { + protected: + AV1NewEncodeDecodePerfTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), speed_(0), + outfile_(0), out_frames_(0) {} + + virtual ~AV1NewEncodeDecodePerfTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + + cfg_.g_lag_in_frames = 25; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_resize_allowed = 0; + cfg_.rc_end_usage = AOM_VBR; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AOME_SET_CPUUSED, speed_); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 2); + } + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + const std::string data_path = getenv("LIBAOM_TEST_DATA_PATH"); + const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile; + outfile_ = fopen(path_to_source.c_str(), "wb"); + ASSERT_TRUE(outfile_ != NULL); + } + + virtual void EndPassHook() { + if (outfile_ != NULL) { + if (!fseek(outfile_, 0, SEEK_SET)) + ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_); + fclose(outfile_); + outfile_ = NULL; + } + } + + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + ++out_frames_; + + // Write initial file header if first frame. + if (pkt->data.frame.pts == 0) + ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_); + + // Write frame header and data. + ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz); + ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_), + pkt->data.frame.sz); + } + + virtual bool DoDecode() { return false; } + + void set_speed(unsigned int speed) { speed_ = speed; } + + private: + libaom_test::TestMode encoding_mode_; + uint32_t speed_; + FILE *outfile_; + uint32_t out_frames_; +}; + +struct EncodePerfTestVideo { + EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_, + uint32_t bitrate_, int frames_) + : name(name_), width(width_), height(height_), bitrate(bitrate_), + frames(frames_) {} + const char *name; + uint32_t width; + uint32_t height; + uint32_t bitrate; + int frames; +}; + +const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = { + EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), +}; + +TEST_P(AV1NewEncodeDecodePerfTest, PerfTest) { + SetUp(); + + // TODO(JBB): Make this work by going through the set of given files. + const int i = 0; + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = kAV1EncodePerfTestVectors[i].bitrate; + + init_flags_ = AOM_CODEC_USE_PSNR; + + const char *video_name = kAV1EncodePerfTestVectors[i].name; + libaom_test::I420VideoSource video( + video_name, kAV1EncodePerfTestVectors[i].width, + kAV1EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0, + kAV1EncodePerfTestVectors[i].frames); + set_speed(2); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + const uint32_t threads = 4; + + libaom_test::IVFVideoSource decode_video(kNewEncodeOutputFile); + decode_video.Init(); + + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.threads = threads; + libaom_test::AV1Decoder decoder(cfg, 0); + + aom_usec_timer t; + aom_usec_timer_start(&t); + + for (decode_video.Begin(); decode_video.cxdata() != NULL; + decode_video.Next()) { + decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size()); + } + + aom_usec_timer_mark(&t); + const double elapsed_secs = + static_cast<double>(aom_usec_timer_elapsed(&t)) / kUsecsInSec; + const unsigned decode_frames = decode_video.frame_number(); + const double fps = static_cast<double>(decode_frames) / elapsed_secs; + + printf("{\n"); + printf("\t\"type\" : \"decode_perf_test\",\n"); + printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); + printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile); + printf("\t\"threadCount\" : %u,\n", threads); + printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs); + printf("\t\"totalFrames\" : %u,\n", decode_frames); + printf("\t\"framesPerSecond\" : %f\n", fps); + printf("}\n"); +} + +AV1_INSTANTIATE_TEST_CASE(AV1NewEncodeDecodePerfTest, + ::testing::Values(::libaom_test::kTwoPassGood)); +} // namespace diff --git a/third_party/aom/test/decode_test_driver.cc b/third_party/aom/test/decode_test_driver.cc new file mode 100644 index 000000000..35c28eafd --- /dev/null +++ b/third_party/aom/test/decode_test_driver.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/register_state_check.h" +#include "test/video_source.h" + +namespace libaom_test { + +const char kVP8Name[] = "WebM Project VP8"; +const char kAV1Name[] = "AOMedia Project AV1 Decoder"; + +aom_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size, + aom_codec_stream_info_t *stream_info) { + return aom_codec_peek_stream_info( + CodecInterface(), cxdata, static_cast<unsigned int>(size), stream_info); +} + +aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) { + return DecodeFrame(cxdata, size, NULL); +} + +aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size, + void *user_priv) { + aom_codec_err_t res_dec; + InitOnce(); + API_REGISTER_STATE_CHECK( + res_dec = aom_codec_decode( + &decoder_, cxdata, static_cast<unsigned int>(size), user_priv, 0)); + return res_dec; +} + +bool Decoder::IsVP8() const { + const char *codec_name = GetDecoderName(); + return strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0; +} + +bool Decoder::IsAV1() const { + const char *codec_name = GetDecoderName(); + return strncmp(kAV1Name, codec_name, sizeof(kAV1Name) - 1) == 0; +} + +void DecoderTest::HandlePeekResult(Decoder *const decoder, + CompressedVideoSource *video, + const aom_codec_err_t res_peek) { + const bool is_vp8 = decoder->IsVP8(); + if (is_vp8) { + /* Vp8's implementation of PeekStream returns an error if the frame you + * pass it is not a keyframe, so we only expect AOM_CODEC_OK on the first + * frame, which must be a keyframe. */ + if (video->frame_number() == 0) + ASSERT_EQ(AOM_CODEC_OK, res_peek) << "Peek return failed: " + << aom_codec_err_to_string(res_peek); + } else { + /* The Av1 implementation of PeekStream returns an error only if the + * data passed to it isn't a valid Av1 chunk. */ + ASSERT_EQ(AOM_CODEC_OK, res_peek) << "Peek return failed: " + << aom_codec_err_to_string(res_peek); + } +} + +void DecoderTest::RunLoop(CompressedVideoSource *video, + const aom_codec_dec_cfg_t &dec_cfg) { + Decoder *const decoder = codec_->CreateDecoder(dec_cfg, flags_); + ASSERT_TRUE(decoder != NULL); + bool end_of_file = false; + + // Decode frames. + for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file; + video->Next()) { + PreDecodeFrameHook(*video, decoder); + + aom_codec_stream_info_t stream_info; + stream_info.sz = sizeof(stream_info); + + if (video->cxdata() != NULL) { + const aom_codec_err_t res_peek = decoder->PeekStream( + video->cxdata(), video->frame_size(), &stream_info); + HandlePeekResult(decoder, video, res_peek); + ASSERT_FALSE(::testing::Test::HasFailure()); + + aom_codec_err_t res_dec = + decoder->DecodeFrame(video->cxdata(), video->frame_size()); + if (!HandleDecodeResult(res_dec, decoder)) break; + } else { + // Signal end of the file to the decoder. + const aom_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0); + ASSERT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + end_of_file = true; + } + + DxDataIterator dec_iter = decoder->GetDxData(); + const aom_image_t *img = NULL; + + // Get decompressed data + while ((img = dec_iter.Next())) + DecompressedFrameHook(*img, video->frame_number()); + } + delete decoder; +} + +void DecoderTest::RunLoop(CompressedVideoSource *video) { + aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t(); + RunLoop(video, dec_cfg); +} + +void DecoderTest::set_cfg(const aom_codec_dec_cfg_t &dec_cfg) { + memcpy(&cfg_, &dec_cfg, sizeof(cfg_)); +} + +void DecoderTest::set_flags(const aom_codec_flags_t flags) { flags_ = flags; } + +} // namespace libaom_test diff --git a/third_party/aom/test/decode_test_driver.h b/third_party/aom/test/decode_test_driver.h new file mode 100644 index 000000000..e7deb389c --- /dev/null +++ b/third_party/aom/test/decode_test_driver.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TEST_DECODE_TEST_DRIVER_H_ +#define TEST_DECODE_TEST_DRIVER_H_ +#include <cstring> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "./aom_config.h" +#include "aom/aom_decoder.h" + +namespace libaom_test { + +class CodecFactory; +class CompressedVideoSource; + +// Provides an object to handle decoding output +class DxDataIterator { + public: + explicit DxDataIterator(aom_codec_ctx_t *decoder) + : decoder_(decoder), iter_(NULL) {} + + const aom_image_t *Next() { return aom_codec_get_frame(decoder_, &iter_); } + + private: + aom_codec_ctx_t *decoder_; + aom_codec_iter_t iter_; +}; + +// Provides a simplified interface to manage one video decoding. +// Similar to Encoder class, the exact services should be added +// as more tests are added. +class Decoder { + public: + explicit Decoder(aom_codec_dec_cfg_t cfg) + : cfg_(cfg), flags_(0), init_done_(false) { + memset(&decoder_, 0, sizeof(decoder_)); + } + + Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag) + : cfg_(cfg), flags_(flag), init_done_(false) { + memset(&decoder_, 0, sizeof(decoder_)); + } + + virtual ~Decoder() { aom_codec_destroy(&decoder_); } + + aom_codec_err_t PeekStream(const uint8_t *cxdata, size_t size, + aom_codec_stream_info_t *stream_info); + + aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size); + + aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size, + void *user_priv); + + DxDataIterator GetDxData() { return DxDataIterator(&decoder_); } + + void Control(int ctrl_id, int arg) { Control(ctrl_id, arg, AOM_CODEC_OK); } + + void Control(int ctrl_id, const void *arg) { + InitOnce(); + const aom_codec_err_t res = aom_codec_control_(&decoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError(); + } + + void Control(int ctrl_id, int arg, aom_codec_err_t expected_value) { + InitOnce(); + const aom_codec_err_t res = aom_codec_control_(&decoder_, ctrl_id, arg); + ASSERT_EQ(expected_value, res) << DecodeError(); + } + + const char *DecodeError() { + const char *detail = aom_codec_error_detail(&decoder_); + return detail ? detail : aom_codec_error(&decoder_); + } + + // Passes the external frame buffer information to libaom. + aom_codec_err_t SetFrameBufferFunctions( + aom_get_frame_buffer_cb_fn_t cb_get, + aom_release_frame_buffer_cb_fn_t cb_release, void *user_priv) { + InitOnce(); + return aom_codec_set_frame_buffer_functions(&decoder_, cb_get, cb_release, + user_priv); + } + + const char *GetDecoderName() const { + return aom_codec_iface_name(CodecInterface()); + } + + bool IsVP8() const; + + bool IsAV1() const; + + aom_codec_ctx_t *GetDecoder() { return &decoder_; } + + protected: + virtual aom_codec_iface_t *CodecInterface() const = 0; + + void InitOnce() { + if (!init_done_) { + const aom_codec_err_t res = + aom_codec_dec_init(&decoder_, CodecInterface(), &cfg_, flags_); + ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError(); + init_done_ = true; + } + } + + aom_codec_ctx_t decoder_; + aom_codec_dec_cfg_t cfg_; + aom_codec_flags_t flags_; + bool init_done_; +}; + +// Common test functionality for all Decoder tests. +class DecoderTest { + public: + // Main decoding loop + virtual void RunLoop(CompressedVideoSource *video); + virtual void RunLoop(CompressedVideoSource *video, + const aom_codec_dec_cfg_t &dec_cfg); + + virtual void set_cfg(const aom_codec_dec_cfg_t &dec_cfg); + virtual void set_flags(const aom_codec_flags_t flags); + + // Hook to be called before decompressing every frame. + virtual void PreDecodeFrameHook(const CompressedVideoSource & /*video*/, + Decoder * /*decoder*/) {} + + // Hook to be called to handle decode result. Return true to continue. + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, + Decoder *decoder) { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + return AOM_CODEC_OK == res_dec; + } + + // Hook to be called on every decompressed frame. + virtual void DecompressedFrameHook(const aom_image_t & /*img*/, + const unsigned int /*frame_number*/) {} + + // Hook to be called on peek result + virtual void HandlePeekResult(Decoder *const decoder, + CompressedVideoSource *video, + const aom_codec_err_t res_peek); + + protected: + explicit DecoderTest(const CodecFactory *codec) + : codec_(codec), cfg_(), flags_(0) {} + + virtual ~DecoderTest() {} + + const CodecFactory *codec_; + aom_codec_dec_cfg_t cfg_; + aom_codec_flags_t flags_; +}; + +} // namespace libaom_test + +#endif // TEST_DECODE_TEST_DRIVER_H_ diff --git a/third_party/aom/test/decode_to_md5.sh b/third_party/aom/test/decode_to_md5.sh new file mode 100755 index 000000000..44c9f5f05 --- /dev/null +++ b/third_party/aom/test/decode_to_md5.sh @@ -0,0 +1,67 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom decode_to_md5 example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to decode_to_md5_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +# $AOM_IVF_FILE and $AV1_IVF_FILE are required. +decode_to_md5_verify_environment() { + if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then + return 1 + fi +} + +# Runs decode_to_md5 on $1 and captures the md5 sum for the final frame. $2 is +# interpreted as codec name and used solely to name the output file. $3 is the +# expected md5 sum: It must match that of the final frame. +decode_to_md5() { + local decoder="${LIBAOM_BIN_PATH}/decode_to_md5${AOM_TEST_EXE_SUFFIX}" + local input_file="$1" + local codec="$2" + local expected_md5="$3" + local output_file="${AOM_TEST_OUTPUT_DIR}/decode_to_md5_${codec}" + + if [ ! -x "${decoder}" ]; then + elog "${decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ + ${devnull} + + [ -e "${output_file}" ] || return 1 + + local md5_last_frame="$(tail -n1 "${output_file}" | awk '{print $1}')" + local actual_md5="$(echo "${md5_last_frame}" | awk '{print $1}')" + [ "${actual_md5}" = "${expected_md5}" ] || return 1 +} + +decode_to_md5_av1() { + # expected MD5 sum for the last frame. + local expected_md5="26d3ef1d60754a1f6acb603c3763efbe" + local file="${AV1_IVF_FILE}" + + if [ "$(av1_decode_available)" = "yes" ]; then + if [ ! -e "${AV1_IVF_FILE}" ]; then + file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf" + encode_yuv_raw_input_av1 "${file}" --ivf + fi + decode_to_md5 "${file}" "av1" "${expected_md5}" + fi +} + +decode_to_md5_tests="decode_to_md5_av1" + +run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}" diff --git a/third_party/aom/test/decode_with_drops.sh b/third_party/aom/test/decode_with_drops.sh new file mode 100755 index 000000000..5978312f2 --- /dev/null +++ b/third_party/aom/test/decode_with_drops.sh @@ -0,0 +1,67 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom decode_with_drops example. To add new tests to +## this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to decode_with_drops_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +# $AOM_IVF_FILE and $AV1_IVF_FILE are required. +decode_with_drops_verify_environment() { + if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then + return 1 + fi +} + +# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely +# to name the output file. $3 is the drop mode, and is passed directly to +# decode_with_drops. +decode_with_drops() { + local decoder="${LIBAOM_BIN_PATH}/decode_with_drops${AOM_TEST_EXE_SUFFIX}" + local input_file="$1" + local codec="$2" + local output_file="${AOM_TEST_OUTPUT_DIR}/decode_with_drops_${codec}" + local drop_mode="$3" + + if [ ! -x "${decoder}" ]; then + elog "${decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ + "${drop_mode}" ${devnull} + + [ -e "${output_file}" ] || return 1 +} + + +# Decodes $AV1_IVF_FILE while dropping frames, twice: once in sequence mode, +# and once in pattern mode. +decode_with_drops_av1() { + if [ "$(av1_decode_available)" = "yes" ]; then + local file="${AV1_IVF_FILE}" + if [ ! -e "${AV1_IVF_FILE}" ]; then + file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf" + encode_yuv_raw_input_av1 "${file}" --ivf + fi + # Drop frames 2 and 3. + decode_with_drops "${file}" "av1" "2-3" + + # Test pattern mode: Drop 3 of every 4 frames. + decode_with_drops "${file}" "av1" "3/4" + fi +} + +decode_with_drops_tests="decode_with_drops_av1" + +run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}" diff --git a/third_party/aom/test/dering_test.cc b/third_party/aom/test/dering_test.cc new file mode 100644 index 000000000..195a60ff8 --- /dev/null +++ b/third_party/aom/test/dering_test.cc @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <cstdlib> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./av1_rtcd.h" +#include "aom_ports/aom_timer.h" +#include "av1/common/od_dering.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { + +typedef std::tr1::tuple<od_filter_dering_direction_func, + od_filter_dering_direction_func, int> + dering_dir_param_t; + +class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> { + public: + virtual ~CDEFDeringDirTest() {} + virtual void SetUp() { + dering = GET_PARAM(0); + ref_dering = GET_PARAM(1); + bsize = GET_PARAM(2); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + int bsize; + od_filter_dering_direction_func dering; + od_filter_dering_direction_func ref_dering; +}; + +typedef CDEFDeringDirTest CDEFDeringSpeedTest; + +void test_dering(int bsize, int iterations, + od_filter_dering_direction_func dering, + od_filter_dering_direction_func ref_dering) { + const int size = 8; + const int ysize = size + 2 * OD_FILT_VBORDER; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, s[ysize * OD_FILT_BSTRIDE]); + DECLARE_ALIGNED(16, static uint16_t, d[size * size]); + DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]); + memset(ref_d, 0, sizeof(ref_d)); + memset(d, 0, sizeof(d)); + + int error = 0, threshold = 0, dir; + int boundary, damping, depth, bits, level, count, + errdepth = 0, errthreshold = 0, errboundary = 0, errdamping = 0; + unsigned int pos = 0; + + for (boundary = 0; boundary < 16; boundary++) { + for (depth = 8; depth <= 12; depth += 2) { + for (damping = 5 + depth - 8; damping < 7 + depth - 8; damping++) { + for (count = 0; count < iterations; count++) { + for (level = 0; level < (1 << depth) && !error; + level += (1 + 4 * !!boundary) << (depth - 8)) { + for (bits = 1; bits <= depth && !error; bits++) { + for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++) + s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, + (1 << depth) - 1); + if (boundary) { + if (boundary & 1) { // Left + for (int i = 0; i < ysize; i++) + for (int j = 0; j < OD_FILT_HBORDER; j++) + s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE; + } + if (boundary & 2) { // Right + for (int i = 0; i < ysize; i++) + for (int j = OD_FILT_HBORDER + size; j < OD_FILT_BSTRIDE; + j++) + s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE; + } + if (boundary & 4) { // Above + for (int i = 0; i < OD_FILT_VBORDER; i++) + for (int j = 0; j < OD_FILT_BSTRIDE; j++) + s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE; + } + if (boundary & 8) { // Below + for (int i = OD_FILT_VBORDER + size; i < ysize; i++) + for (int j = 0; j < OD_FILT_BSTRIDE; j++) + s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE; + } + } + for (dir = 0; dir < 8; dir++) { + for (threshold = 0; threshold < 64 << (depth - 8) && !error; + threshold += (1 + 4 * !!boundary) << (depth - 8)) { + ref_dering(ref_d, size, s + OD_FILT_HBORDER + + OD_FILT_VBORDER * OD_FILT_BSTRIDE, + threshold, dir, damping); + // If dering and ref_dering are the same, we're just testing + // speed + if (dering != ref_dering) + ASM_REGISTER_STATE_CHECK(dering( + d, size, + s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE, + threshold, dir, damping)); + if (ref_dering != dering) { + for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error; + pos++) { + error = ref_d[pos] != d[pos]; + errdepth = depth; + errthreshold = threshold; + errboundary = boundary; + errdamping = damping; + } + } + } + } + } + } + } + } + } + } + + pos--; + EXPECT_EQ(0, error) << "Error: CDEFDeringDirTest, SIMD and C mismatch." + << std::endl + << "First error at " << pos % size << "," << pos / size + << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos] + << ") " << std::endl + << "threshold: " << errthreshold << std::endl + << "damping: " << errdamping << std::endl + << "depth: " << errdepth << std::endl + << "size: " << bsize << std::endl + << "boundary: " << errboundary << std::endl + << std::endl; +} + +void test_dering_speed(int bsize, int iterations, + od_filter_dering_direction_func dering, + od_filter_dering_direction_func ref_dering) { + aom_usec_timer ref_timer; + aom_usec_timer timer; + + aom_usec_timer_start(&ref_timer); + test_dering(bsize, iterations, ref_dering, ref_dering); + aom_usec_timer_mark(&ref_timer); + int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer_start(&timer); + test_dering(bsize, iterations, dering, dering); + aom_usec_timer_mark(&timer); + int elapsed_time = (int)aom_usec_timer_elapsed(&timer); + +#if 0 + std::cout << "[ ] C time = " << ref_elapsed_time / 1000 + << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl; +#endif + + EXPECT_GT(ref_elapsed_time, elapsed_time) + << "Error: CDEFDeringSpeedTest, SIMD slower than C." << std::endl + << "C time: " << ref_elapsed_time << " us" << std::endl + << "SIMD time: " << elapsed_time << " us" << std::endl; +} + +typedef int (*find_dir_t)(const od_dering_in *img, int stride, int32_t *var, + int coeff_shift); + +typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t; + +class CDEFDeringFindDirTest + : public ::testing::TestWithParam<find_dir_param_t> { + public: + virtual ~CDEFDeringFindDirTest() {} + virtual void SetUp() { + finddir = GET_PARAM(0); + ref_finddir = GET_PARAM(1); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + find_dir_t finddir; + find_dir_t ref_finddir; +}; + +typedef CDEFDeringFindDirTest CDEFDeringFindDirSpeedTest; + +void test_finddir(int (*finddir)(const od_dering_in *img, int stride, + int32_t *var, int coeff_shift), + int (*ref_finddir)(const od_dering_in *img, int stride, + int32_t *var, int coeff_shift)) { + const int size = 8; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, s[size * size]); + + int error = 0; + int depth, bits, level, count, errdepth = 0; + int ref_res = 0, res = 0; + int32_t ref_var = 0, var = 0; + + for (depth = 8; depth <= 12 && !error; depth += 2) { + for (count = 0; count < 512 && !error; count++) { + for (level = 0; level < (1 << depth) && !error; + level += 1 << (depth - 8)) { + for (bits = 1; bits <= depth && !error; bits++) { + for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++) + s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, + (1 << depth) - 1); + for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++) + ref_res = ref_finddir(s, size, &ref_var, depth - 8); + if (finddir != ref_finddir) + ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8)); + if (ref_finddir != finddir) { + if (res != ref_res || var != ref_var) error = 1; + errdepth = depth; + } + } + } + } + } + + EXPECT_EQ(0, error) << "Error: CDEFDeringFindDirTest, SIMD and C mismatch." + << std::endl + << "return: " << res << " : " << ref_res << std::endl + << "var: " << var << " : " << ref_var << std::endl + << "depth: " << errdepth << std::endl + << std::endl; +} + +void test_finddir_speed(int (*finddir)(const od_dering_in *img, int stride, + int32_t *var, int coeff_shift), + int (*ref_finddir)(const od_dering_in *img, int stride, + int32_t *var, int coeff_shift)) { + aom_usec_timer ref_timer; + aom_usec_timer timer; + + aom_usec_timer_start(&ref_timer); + test_finddir(ref_finddir, ref_finddir); + aom_usec_timer_mark(&ref_timer); + int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer_start(&timer); + test_finddir(finddir, finddir); + aom_usec_timer_mark(&timer); + int elapsed_time = (int)aom_usec_timer_elapsed(&timer); + +#if 0 + std::cout << "[ ] C time = " << ref_elapsed_time / 1000 + << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl; +#endif + + EXPECT_GT(ref_elapsed_time, elapsed_time) + << "Error: CDEFDeringFindDirSpeedTest, SIMD slower than C." << std::endl + << "C time: " << ref_elapsed_time << " us" << std::endl + << "SIMD time: " << elapsed_time << " us" << std::endl; +} + +TEST_P(CDEFDeringDirTest, TestSIMDNoMismatch) { + test_dering(bsize, 1, dering, ref_dering); +} + +TEST_P(CDEFDeringSpeedTest, DISABLED_TestSpeed) { + test_dering_speed(bsize, 4, dering, ref_dering); +} + +TEST_P(CDEFDeringFindDirTest, TestSIMDNoMismatch) { + test_finddir(finddir, ref_finddir); +} + +TEST_P(CDEFDeringFindDirSpeedTest, DISABLED_TestSpeed) { + test_finddir_speed(finddir, ref_finddir); +} + +using std::tr1::make_tuple; + +// VS compiling for 32 bit targets does not support vector types in +// structs as arguments, which makes the v256 type of the intrinsics +// hard to support, so optimizations for this target are disabled. +#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__) +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, CDEFDeringDirTest, + ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2, + &od_filter_dering_direction_4x4_c, 4), + make_tuple(&od_filter_dering_direction_8x8_sse2, + &od_filter_dering_direction_8x8_c, 8))); +INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirTest, + ::testing::Values(make_tuple(&od_dir_find8_sse2, + &od_dir_find8_c))); +#endif +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P( + SSSE3, CDEFDeringDirTest, + ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3, + &od_filter_dering_direction_4x4_c, 4), + make_tuple(&od_filter_dering_direction_8x8_ssse3, + &od_filter_dering_direction_8x8_c, 8))); +INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirTest, + ::testing::Values(make_tuple(&od_dir_find8_ssse3, + &od_dir_find8_c))); +#endif + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, CDEFDeringDirTest, + ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1, + &od_filter_dering_direction_4x4_c, 4), + make_tuple(&od_filter_dering_direction_8x8_sse4_1, + &od_filter_dering_direction_8x8_c, 8))); +INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirTest, + ::testing::Values(make_tuple(&od_dir_find8_sse4_1, + &od_dir_find8_c))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, CDEFDeringDirTest, + ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon, + &od_filter_dering_direction_4x4_c, 4), + make_tuple(&od_filter_dering_direction_8x8_neon, + &od_filter_dering_direction_8x8_c, 8))); +INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirTest, + ::testing::Values(make_tuple(&od_dir_find8_neon, + &od_dir_find8_c))); +#endif + +// Test speed for all supported architectures +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, CDEFDeringSpeedTest, + ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2, + &od_filter_dering_direction_4x4_c, 4), + make_tuple(&od_filter_dering_direction_8x8_sse2, + &od_filter_dering_direction_8x8_c, 8))); +INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirSpeedTest, + ::testing::Values(make_tuple(&od_dir_find8_sse2, + &od_dir_find8_c))); +#endif + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P( + SSSE3, CDEFDeringSpeedTest, + ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3, + &od_filter_dering_direction_4x4_c, 4), + make_tuple(&od_filter_dering_direction_8x8_ssse3, + &od_filter_dering_direction_8x8_c, 8))); +INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirSpeedTest, + ::testing::Values(make_tuple(&od_dir_find8_ssse3, + &od_dir_find8_c))); +#endif + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, CDEFDeringSpeedTest, + ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1, + &od_filter_dering_direction_4x4_c, 4), + make_tuple(&od_filter_dering_direction_8x8_sse4_1, + &od_filter_dering_direction_8x8_c, 8))); +INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirSpeedTest, + ::testing::Values(make_tuple(&od_dir_find8_sse4_1, + &od_dir_find8_c))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, CDEFDeringSpeedTest, + ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon, + &od_filter_dering_direction_4x4_c, 4), + make_tuple(&od_filter_dering_direction_8x8_neon, + &od_filter_dering_direction_8x8_c, 8))); +INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirSpeedTest, + ::testing::Values(make_tuple(&od_dir_find8_neon, + &od_dir_find8_c))); +#endif + +#endif // defined(_WIN64) || !defined(_MSC_VER) +} // namespace diff --git a/third_party/aom/test/divu_small_test.cc b/third_party/aom/test/divu_small_test.cc new file mode 100644 index 000000000..064f8ee45 --- /dev/null +++ b/third_party/aom/test/divu_small_test.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdlib.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "av1/common/odintrin.h" + +using libaom_test::ACMRandom; + +TEST(Daala, TestDIVUuptoMAX) { + for (int d = 1; d <= OD_DIVU_DMAX; d++) { + for (uint32_t x = 1; x <= 1000000; x++) { + GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d)) + << "x=" << x << " d=" << d << " x/d=" << (x / d) + << " != " << OD_DIVU_SMALL(x, d); + } + } +} + +TEST(Daala, TestDIVUrandI31) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int d = 1; d < OD_DIVU_DMAX; d++) { + for (int i = 0; i < 1000000; i++) { + uint32_t x = rnd.Rand31(); + GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d)) + << "x=" << x << " d=" << d << " x/d=" << (x / d) + << " != " << OD_DIVU_SMALL(x, d); + } + } +} diff --git a/third_party/aom/test/encode_api_test.cc b/third_party/aom/test/encode_api_test.cc new file mode 100644 index 000000000..14e43c847 --- /dev/null +++ b/third_party/aom/test/encode_api_test.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "aom/aomcx.h" +#include "aom/aom_encoder.h" + +namespace { + +#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0])) + +TEST(EncodeAPI, InvalidParams) { + static const aom_codec_iface_t *kCodecs[] = { +#if CONFIG_AV1_ENCODER + &aom_codec_av1_cx_algo, +#endif + }; + uint8_t buf[1] = { 0 }; + aom_image_t img; + aom_codec_ctx_t enc; + aom_codec_enc_cfg_t cfg; + + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, 1, 1, 1, buf)); + + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(NULL, NULL, NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, NULL, NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, NULL, 0, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, &img, 0, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(NULL)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_config_default(NULL, NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_config_default(NULL, &cfg, 0)); + EXPECT_TRUE(aom_codec_error(NULL) != NULL); + + for (int i = 0; i < NELEMENTS(kCodecs); ++i) { + SCOPED_TRACE(aom_codec_iface_name(kCodecs[i])); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_init(NULL, kCodecs[i], NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_init(&enc, kCodecs[i], NULL, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_config_default(kCodecs[i], &cfg, 1)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(kCodecs[i], &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, kCodecs[i], &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, NULL, 0, 0, 0, 0)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); + } +} + +} // namespace diff --git a/third_party/aom/test/encode_perf_test.cc b/third_party/aom/test/encode_perf_test.cc new file mode 100644 index 000000000..e2a4f2b71 --- /dev/null +++ b/third_party/aom/test/encode_perf_test.cc @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "./aom_config.h" +#include "./aom_version.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "aom_ports/aom_timer.h" + +namespace { + +const int kMaxPsnr = 100; +const double kUsecsInSec = 1000000.0; + +struct EncodePerfTestVideo { + EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_, + uint32_t bitrate_, int frames_) + : name(name_), width(width_), height(height_), bitrate(bitrate_), + frames(frames_) {} + const char *name; + uint32_t width; + uint32_t height; + uint32_t bitrate; + int frames; +}; + +const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = { + EncodePerfTestVideo("desktop_640_360_30.yuv", 640, 360, 200, 2484), + EncodePerfTestVideo("kirland_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("macmarcomoving_640_480_30.yuv", 640, 480, 200, 987), + EncodePerfTestVideo("macmarcostationary_640_480_30.yuv", 640, 480, 200, 718), + EncodePerfTestVideo("niklas_640_480_30.yuv", 640, 480, 200, 471), + EncodePerfTestVideo("tacomanarrows_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv", 640, 480, 200, + 300), + EncodePerfTestVideo("thaloundeskmtg_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), +}; + +const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 }; +const int kEncodePerfTestThreads[] = { 1, 2, 4 }; + +#define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0])) + +class AV1EncodePerfTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> { + protected: + AV1EncodePerfTest() + : EncoderTest(GET_PARAM(0)), min_psnr_(kMaxPsnr), nframes_(0), + encoding_mode_(GET_PARAM(1)), speed_(0), threads_(1) {} + + virtual ~AV1EncodePerfTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + + cfg_.g_lag_in_frames = 0; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_resize_allowed = 0; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_error_resilient = 1; + cfg_.g_threads = threads_; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 0) { + const int log2_tile_columns = 3; + encoder->Control(AOME_SET_CPUUSED, speed_); + encoder->Control(AV1E_SET_TILE_COLUMNS, log2_tile_columns); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0); + } + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + min_psnr_ = kMaxPsnr; + nframes_ = 0; + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + if (pkt->data.psnr.psnr[0] < min_psnr_) { + min_psnr_ = pkt->data.psnr.psnr[0]; + } + } + + // for performance reasons don't decode + virtual bool DoDecode() { return 0; } + + double min_psnr() const { return min_psnr_; } + + void set_speed(unsigned int speed) { speed_ = speed; } + + void set_threads(unsigned int threads) { threads_ = threads; } + + private: + double min_psnr_; + unsigned int nframes_; + libaom_test::TestMode encoding_mode_; + unsigned speed_; + unsigned int threads_; +}; + +TEST_P(AV1EncodePerfTest, PerfTest) { + for (size_t i = 0; i < NELEMENTS(kAV1EncodePerfTestVectors); ++i) { + for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) { + for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) { + if (kAV1EncodePerfTestVectors[i].width < 512 && + kEncodePerfTestThreads[k] > 1) + continue; + else if (kAV1EncodePerfTestVectors[i].width < 1024 && + kEncodePerfTestThreads[k] > 2) + continue; + + set_threads(kEncodePerfTestThreads[k]); + SetUp(); + + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = kAV1EncodePerfTestVectors[i].bitrate; + + init_flags_ = AOM_CODEC_USE_PSNR; + + const unsigned frames = kAV1EncodePerfTestVectors[i].frames; + const char *video_name = kAV1EncodePerfTestVectors[i].name; + libaom_test::I420VideoSource video( + video_name, kAV1EncodePerfTestVectors[i].width, + kAV1EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0, + kAV1EncodePerfTestVectors[i].frames); + set_speed(kEncodePerfTestSpeeds[j]); + + aom_usec_timer t; + aom_usec_timer_start(&t); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + aom_usec_timer_mark(&t); + const double elapsed_secs = aom_usec_timer_elapsed(&t) / kUsecsInSec; + const double fps = frames / elapsed_secs; + const double minimum_psnr = min_psnr(); + std::string display_name(video_name); + if (kEncodePerfTestThreads[k] > 1) { + char thread_count[32]; + snprintf(thread_count, sizeof(thread_count), "_t-%d", + kEncodePerfTestThreads[k]); + display_name += thread_count; + } + + printf("{\n"); + printf("\t\"type\" : \"encode_perf_test\",\n"); + printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); + printf("\t\"videoName\" : \"%s\",\n", display_name.c_str()); + printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs); + printf("\t\"totalFrames\" : %u,\n", frames); + printf("\t\"framesPerSecond\" : %f,\n", fps); + printf("\t\"minPsnr\" : %f,\n", minimum_psnr); + printf("\t\"speed\" : %d,\n", kEncodePerfTestSpeeds[j]); + printf("\t\"threads\" : %d\n", kEncodePerfTestThreads[k]); + printf("}\n"); + } + } + } +} + +AV1_INSTANTIATE_TEST_CASE(AV1EncodePerfTest, + ::testing::Values(::libaom_test::kRealTime)); +} // namespace diff --git a/third_party/aom/test/encode_test_driver.cc b/third_party/aom/test/encode_test_driver.cc new file mode 100644 index 000000000..80f155ab2 --- /dev/null +++ b/third_party/aom/test/encode_test_driver.cc @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "aom_ports/mem.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/encode_test_driver.h" +#include "test/register_state_check.h" +#include "test/video_source.h" + +namespace libaom_test { +void Encoder::InitEncoder(VideoSource *video) { + aom_codec_err_t res; + const aom_image_t *img = video->img(); + + if (video->img() && !encoder_.priv) { + cfg_.g_w = img->d_w; + cfg_.g_h = img->d_h; + cfg_.g_timebase = video->timebase(); + cfg_.rc_twopass_stats_in = stats_->buf(); + + res = aom_codec_enc_init(&encoder_, CodecInterface(), &cfg_, init_flags_); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + +#if CONFIG_AV1_ENCODER + if (CodecInterface() == &aom_codec_av1_cx_algo) { +// Default to 1 tile column for AV1. With CONFIG_EXT_TILE, the +// default is already the largest possible tile size +#if !CONFIG_EXT_TILE + const int log2_tile_columns = 0; + res = aom_codec_control_(&encoder_, AV1E_SET_TILE_COLUMNS, + log2_tile_columns); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); +#endif // !CONFIG_EXT_TILE + } else +#endif + { + } + } +} + +void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) { + if (video->img()) + EncodeFrameInternal(*video, frame_flags); + else + Flush(); + + // Handle twopass stats + CxDataIterator iter = GetCxData(); + + while (const aom_codec_cx_pkt_t *pkt = iter.Next()) { + if (pkt->kind != AOM_CODEC_STATS_PKT) continue; + + stats_->Append(*pkt); + } +} + +void Encoder::EncodeFrameInternal(const VideoSource &video, + const unsigned long frame_flags) { + aom_codec_err_t res; + const aom_image_t *img = video.img(); + + // Handle frame resizing + if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) { + cfg_.g_w = img->d_w; + cfg_.g_h = img->d_h; + res = aom_codec_enc_config_set(&encoder_, &cfg_); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + // Encode the frame + API_REGISTER_STATE_CHECK(res = aom_codec_encode(&encoder_, img, video.pts(), + video.duration(), frame_flags, + deadline_)); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); +} + +void Encoder::Flush() { + const aom_codec_err_t res = + aom_codec_encode(&encoder_, NULL, 0, 0, 0, deadline_); + if (!encoder_.priv) + ASSERT_EQ(AOM_CODEC_ERROR, res) << EncoderError(); + else + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); +} + +void EncoderTest::InitializeConfig() { + const aom_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0); + dec_cfg_ = aom_codec_dec_cfg_t(); + ASSERT_EQ(AOM_CODEC_OK, res); +} + +void EncoderTest::SetMode(TestMode mode) { + switch (mode) { + case kOnePassGood: + case kTwoPassGood: deadline_ = AOM_DL_GOOD_QUALITY; break; + case kRealTime: + deadline_ = AOM_DL_GOOD_QUALITY; + cfg_.g_lag_in_frames = 0; + break; + default: ASSERT_TRUE(false) << "Unexpected mode " << mode; + } + mode_ = mode; + if (mode == kTwoPassGood) + passes_ = 2; + else + passes_ = 1; +} + +static bool compare_plane(const uint8_t *const buf1, int stride1, + const uint8_t *const buf2, int stride2, int w, int h, + int *const mismatch_row, int *const mismatch_col, + int *const mismatch_pix1, int *const mismatch_pix2) { + int r, c; + + for (r = 0; r < h; ++r) { + for (c = 0; c < w; ++c) { + const int pix1 = buf1[r * stride1 + c]; + const int pix2 = buf2[r * stride2 + c]; + + if (pix1 != pix2) { + if (mismatch_row != NULL) *mismatch_row = r; + if (mismatch_col != NULL) *mismatch_col = c; + if (mismatch_pix1 != NULL) *mismatch_pix1 = pix1; + if (mismatch_pix2 != NULL) *mismatch_pix2 = pix2; + return false; + } + } + } + + return true; +} + +// The function should return "true" most of the time, therefore no early +// break-out is implemented within the match checking process. +static bool compare_img(const aom_image_t *img1, const aom_image_t *img2, + int *const mismatch_row, int *const mismatch_col, + int *const mismatch_plane, int *const mismatch_pix1, + int *const mismatch_pix2) { + const unsigned int w_y = img1->d_w; + const unsigned int h_y = img1->d_h; + const unsigned int w_uv = ROUND_POWER_OF_TWO(w_y, img1->x_chroma_shift); + const unsigned int h_uv = ROUND_POWER_OF_TWO(h_y, img1->y_chroma_shift); + + if (img1->fmt != img2->fmt || img1->cs != img2->cs || + img1->d_w != img2->d_w || img1->d_h != img2->d_h) { + if (mismatch_row != NULL) *mismatch_row = -1; + if (mismatch_col != NULL) *mismatch_col = -1; + return false; + } + + if (!compare_plane(img1->planes[AOM_PLANE_Y], img1->stride[AOM_PLANE_Y], + img2->planes[AOM_PLANE_Y], img2->stride[AOM_PLANE_Y], w_y, + h_y, mismatch_row, mismatch_col, mismatch_pix1, + mismatch_pix2)) { + if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_Y; + return false; + } + + if (!compare_plane(img1->planes[AOM_PLANE_U], img1->stride[AOM_PLANE_U], + img2->planes[AOM_PLANE_U], img2->stride[AOM_PLANE_U], w_uv, + h_uv, mismatch_row, mismatch_col, mismatch_pix1, + mismatch_pix2)) { + if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_U; + return false; + } + + if (!compare_plane(img1->planes[AOM_PLANE_V], img1->stride[AOM_PLANE_V], + img2->planes[AOM_PLANE_V], img2->stride[AOM_PLANE_V], w_uv, + h_uv, mismatch_row, mismatch_col, mismatch_pix1, + mismatch_pix2)) { + if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_U; + return false; + } + + return true; +} + +void EncoderTest::MismatchHook(const aom_image_t *img_enc, + const aom_image_t *img_dec) { + int mismatch_row = 0; + int mismatch_col = 0; + int mismatch_plane = 0; + int mismatch_pix_enc = 0; + int mismatch_pix_dec = 0; + + ASSERT_FALSE(compare_img(img_enc, img_dec, &mismatch_row, &mismatch_col, + &mismatch_plane, &mismatch_pix_enc, + &mismatch_pix_dec)); + + GTEST_FAIL() << "Encode/Decode mismatch found:" << std::endl + << " pixel value enc/dec: " << mismatch_pix_enc << "/" + << mismatch_pix_dec << std::endl + << " plane: " << mismatch_plane << std::endl + << " row/col: " << mismatch_row << "/" + << mismatch_col << std::endl; +} + +void EncoderTest::RunLoop(VideoSource *video) { + aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t(); + + stats_.Reset(); + + ASSERT_TRUE(passes_ == 1 || passes_ == 2); + for (unsigned int pass = 0; pass < passes_; pass++) { + last_pts_ = 0; + + if (passes_ == 1) + cfg_.g_pass = AOM_RC_ONE_PASS; + else if (pass == 0) + cfg_.g_pass = AOM_RC_FIRST_PASS; + else + cfg_.g_pass = AOM_RC_LAST_PASS; + + BeginPassHook(pass); + testing::internal::scoped_ptr<Encoder> encoder( + codec_->CreateEncoder(cfg_, deadline_, init_flags_, &stats_)); + ASSERT_TRUE(encoder.get() != NULL); + + ASSERT_NO_FATAL_FAILURE(video->Begin()); + encoder->InitEncoder(video); + + if (mode_ == kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0); + } + + ASSERT_FALSE(::testing::Test::HasFatalFailure()); + + unsigned long dec_init_flags = 0; // NOLINT + // Use fragment decoder if encoder outputs partitions. + // NOTE: fragment decoder and partition encoder are only supported by VP8. + if (init_flags_ & AOM_CODEC_USE_OUTPUT_PARTITION) + dec_init_flags |= AOM_CODEC_USE_INPUT_FRAGMENTS; + testing::internal::scoped_ptr<Decoder> decoder( + codec_->CreateDecoder(dec_cfg, dec_init_flags)); +#if CONFIG_AV1 && CONFIG_EXT_TILE + if (decoder->IsAV1()) { + // Set dec_cfg.tile_row = -1 and dec_cfg.tile_col = -1 so that the whole + // frame is decoded. + decoder->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder->Control(AV1_SET_DECODE_TILE_COL, -1); + } +#endif + + bool again; + for (again = true; again; video->Next()) { + again = (video->img() != NULL); + + PreEncodeFrameHook(video); + PreEncodeFrameHook(video, encoder.get()); + encoder->EncodeFrame(video, frame_flags_); + + CxDataIterator iter = encoder->GetCxData(); + + bool has_cxdata = false; + bool has_dxdata = false; + while (const aom_codec_cx_pkt_t *pkt = iter.Next()) { + pkt = MutateEncoderOutputHook(pkt); + again = true; + switch (pkt->kind) { + case AOM_CODEC_CX_FRAME_PKT: + has_cxdata = true; + if (decoder.get() != NULL && DoDecode()) { + aom_codec_err_t res_dec = decoder->DecodeFrame( + (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz); + + if (!HandleDecodeResult(res_dec, decoder.get())) break; + + has_dxdata = true; + } + ASSERT_GE(pkt->data.frame.pts, last_pts_); + last_pts_ = pkt->data.frame.pts; + FramePktHook(pkt); + break; + + case AOM_CODEC_PSNR_PKT: PSNRPktHook(pkt); break; + + default: break; + } + } + + // Flush the decoder when there are no more fragments. + if ((init_flags_ & AOM_CODEC_USE_OUTPUT_PARTITION) && has_dxdata) { + const aom_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0); + if (!HandleDecodeResult(res_dec, decoder.get())) break; + } + + if (has_dxdata && has_cxdata) { + const aom_image_t *img_enc = encoder->GetPreviewFrame(); + DxDataIterator dec_iter = decoder->GetDxData(); + const aom_image_t *img_dec = dec_iter.Next(); + if (img_enc && img_dec) { + const bool res = + compare_img(img_enc, img_dec, NULL, NULL, NULL, NULL, NULL); + if (!res) { // Mismatch + MismatchHook(img_enc, img_dec); + } + } + if (img_dec) DecompressedFrameHook(*img_dec, video->pts()); + } + if (!Continue()) break; + } + + EndPassHook(); + + if (!Continue()) break; + } +} + +} // namespace libaom_test diff --git a/third_party/aom/test/encode_test_driver.h b/third_party/aom/test/encode_test_driver.h new file mode 100644 index 000000000..91027b4f6 --- /dev/null +++ b/third_party/aom/test/encode_test_driver.h @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_ENCODE_TEST_DRIVER_H_ +#define TEST_ENCODE_TEST_DRIVER_H_ + +#include <string> +#include <vector> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#if CONFIG_AV1_ENCODER +#include "aom/aomcx.h" +#endif +#include "aom/aom_encoder.h" + +namespace libaom_test { + +class CodecFactory; +class VideoSource; + +enum TestMode { kRealTime, kOnePassGood, kTwoPassGood }; +#define ALL_TEST_MODES \ + ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood, \ + ::libaom_test::kTwoPassGood) + +#define ONE_PASS_TEST_MODES \ + ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood) + +#define TWO_PASS_TEST_MODES ::testing::Values(::libaom_test::kTwoPassGood) + +// Provides an object to handle the libaom get_cx_data() iteration pattern +class CxDataIterator { + public: + explicit CxDataIterator(aom_codec_ctx_t *encoder) + : encoder_(encoder), iter_(NULL) {} + + const aom_codec_cx_pkt_t *Next() { + return aom_codec_get_cx_data(encoder_, &iter_); + } + + private: + aom_codec_ctx_t *encoder_; + aom_codec_iter_t iter_; +}; + +// Implements an in-memory store for libaom twopass statistics +class TwopassStatsStore { + public: + void Append(const aom_codec_cx_pkt_t &pkt) { + buffer_.append(reinterpret_cast<char *>(pkt.data.twopass_stats.buf), + pkt.data.twopass_stats.sz); + } + + aom_fixed_buf_t buf() { + const aom_fixed_buf_t buf = { &buffer_[0], buffer_.size() }; + return buf; + } + + void Reset() { buffer_.clear(); } + + protected: + std::string buffer_; +}; + +// Provides a simplified interface to manage one video encoding pass, given +// a configuration and video source. +// +// TODO(jkoleszar): The exact services it provides and the appropriate +// level of abstraction will be fleshed out as more tests are written. +class Encoder { + public: + Encoder(aom_codec_enc_cfg_t cfg, unsigned long deadline, + const unsigned long init_flags, TwopassStatsStore *stats) + : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) { + memset(&encoder_, 0, sizeof(encoder_)); + } + + virtual ~Encoder() { aom_codec_destroy(&encoder_); } + + CxDataIterator GetCxData() { return CxDataIterator(&encoder_); } + + void InitEncoder(VideoSource *video); + + const aom_image_t *GetPreviewFrame() { + return aom_codec_get_preview_frame(&encoder_); + } + // This is a thin wrapper around aom_codec_encode(), so refer to + // aom_encoder.h for its semantics. + void EncodeFrame(VideoSource *video, const unsigned long frame_flags); + + // Convenience wrapper for EncodeFrame() + void EncodeFrame(VideoSource *video) { EncodeFrame(video, 0); } + + void Control(int ctrl_id, int arg) { + const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, int *arg) { + const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, struct aom_scaling_mode *arg) { + const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + +#if CONFIG_AV1_ENCODER + void Control(int ctrl_id, aom_active_map_t *arg) { + const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } +#endif + + void Config(const aom_codec_enc_cfg_t *cfg) { + const aom_codec_err_t res = aom_codec_enc_config_set(&encoder_, cfg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + cfg_ = *cfg; + } + + void set_deadline(unsigned long deadline) { deadline_ = deadline; } + + protected: + virtual aom_codec_iface_t *CodecInterface() const = 0; + + const char *EncoderError() { + const char *detail = aom_codec_error_detail(&encoder_); + return detail ? detail : aom_codec_error(&encoder_); + } + + // Encode an image + void EncodeFrameInternal(const VideoSource &video, + const unsigned long frame_flags); + + // Flush the encoder on EOS + void Flush(); + + aom_codec_ctx_t encoder_; + aom_codec_enc_cfg_t cfg_; + unsigned long deadline_; + unsigned long init_flags_; + TwopassStatsStore *stats_; +}; + +// Common test functionality for all Encoder tests. +// +// This class is a mixin which provides the main loop common to all +// encoder tests. It provides hooks which can be overridden by subclasses +// to implement each test's specific behavior, while centralizing the bulk +// of the boilerplate. Note that it doesn't inherit the gtest testing +// classes directly, so that tests can be parameterized differently. +class EncoderTest { + protected: + explicit EncoderTest(const CodecFactory *codec) + : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0), + last_pts_(0), mode_(kRealTime) { + // Default to 1 thread. + cfg_.g_threads = 1; + } + + virtual ~EncoderTest() {} + + // Initialize the cfg_ member with the default configuration. + void InitializeConfig(); + + // Map the TestMode enum to the deadline_ and passes_ variables. + void SetMode(TestMode mode); + + // Set encoder flag. + void set_init_flags(unsigned long flag) { // NOLINT(runtime/int) + init_flags_ = flag; + } + + // Main loop + virtual void RunLoop(VideoSource *video); + + // Hook to be called at the beginning of a pass. + virtual void BeginPassHook(unsigned int /*pass*/) {} + + // Hook to be called at the end of a pass. + virtual void EndPassHook() {} + + // Hook to be called before encoding a frame. + virtual void PreEncodeFrameHook(VideoSource * /*video*/) {} + virtual void PreEncodeFrameHook(VideoSource * /*video*/, + Encoder * /*encoder*/) {} + + // Hook to be called on every compressed data packet. + virtual void FramePktHook(const aom_codec_cx_pkt_t * /*pkt*/) {} + + // Hook to be called on every PSNR packet. + virtual void PSNRPktHook(const aom_codec_cx_pkt_t * /*pkt*/) {} + + // Hook to determine whether the encode loop should continue. + virtual bool Continue() const { + return !(::testing::Test::HasFatalFailure() || abort_); + } + + const CodecFactory *codec_; + // Hook to determine whether to decode frame after encoding + virtual bool DoDecode() const { return 1; } + + // Hook to handle encode/decode mismatch + virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2); + + // Hook to be called on every decompressed frame. + virtual void DecompressedFrameHook(const aom_image_t & /*img*/, + aom_codec_pts_t /*pts*/) {} + + // Hook to be called to handle decode result. Return true to continue. + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, + Decoder *decoder) { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + return AOM_CODEC_OK == res_dec; + } + + // Hook that can modify the encoder's output data + virtual const aom_codec_cx_pkt_t *MutateEncoderOutputHook( + const aom_codec_cx_pkt_t *pkt) { + return pkt; + } + + bool abort_; + aom_codec_enc_cfg_t cfg_; + aom_codec_dec_cfg_t dec_cfg_; + unsigned int passes_; + unsigned long deadline_; + TwopassStatsStore stats_; + unsigned long init_flags_; + unsigned long frame_flags_; + aom_codec_pts_t last_pts_; + TestMode mode_; +}; + +} // namespace libaom_test + +#endif // TEST_ENCODE_TEST_DRIVER_H_ diff --git a/third_party/aom/test/encoder_parms_get_to_decoder.cc b/third_party/aom/test/encoder_parms_get_to_decoder.cc new file mode 100644 index 000000000..ca6a24ebe --- /dev/null +++ b/third_party/aom/test/encoder_parms_get_to_decoder.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "av1/av1_dx_iface.c" + +namespace { + +const int kCpuUsed = 2; + +struct EncodePerfTestVideo { + const char *name; + uint32_t width; + uint32_t height; + uint32_t bitrate; + int frames; +}; + +const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = { + { "niklas_1280_720_30.y4m", 1280, 720, 600, 10 }, +}; + +struct EncodeParameters { + int32_t tile_rows; + int32_t tile_cols; + int32_t lossless; + int32_t error_resilient; + int32_t frame_parallel; + aom_color_range_t color_range; + aom_color_space_t cs; + int render_size[2]; + // TODO(JBB): quantizers / bitrate +}; + +const EncodeParameters kAV1EncodeParameterSet[] = { + { 0, 0, 0, 1, 0, AOM_CR_STUDIO_RANGE, AOM_CS_BT_601, { 0, 0 } }, + { 0, 0, 0, 0, 0, AOM_CR_FULL_RANGE, AOM_CS_BT_709, { 0, 0 } }, + { 0, 0, 1, 0, 0, AOM_CR_FULL_RANGE, AOM_CS_BT_2020, { 0, 0 } }, + { 0, 2, 0, 0, 1, AOM_CR_STUDIO_RANGE, AOM_CS_UNKNOWN, { 640, 480 } }, + // TODO(JBB): Test profiles (requires more work). +}; + +class AvxEncoderParmsGetToDecoder + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<EncodeParameters, + EncodePerfTestVideo> { + protected: + AvxEncoderParmsGetToDecoder() + : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {} + + virtual ~AvxEncoderParmsGetToDecoder() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(::libaom_test::kTwoPassGood); + cfg_.g_lag_in_frames = 25; + cfg_.g_error_resilient = encode_parms.error_resilient; + dec_cfg_.threads = 4; + test_video_ = GET_PARAM(2); + cfg_.rc_target_bitrate = test_video_.bitrate; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AV1E_SET_COLOR_SPACE, encode_parms.cs); + encoder->Control(AV1E_SET_COLOR_RANGE, encode_parms.color_range); + encoder->Control(AV1E_SET_LOSSLESS, encode_parms.lossless); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, + encode_parms.frame_parallel); + encoder->Control(AV1E_SET_TILE_ROWS, encode_parms.tile_rows); + encoder->Control(AV1E_SET_TILE_COLUMNS, encode_parms.tile_cols); + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) + encoder->Control(AV1E_SET_RENDER_SIZE, encode_parms.render_size); + } + } + + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) { + aom_codec_ctx_t *const av1_decoder = decoder->GetDecoder(); + aom_codec_alg_priv_t *const priv = + reinterpret_cast<aom_codec_alg_priv_t *>(av1_decoder->priv); + FrameWorkerData *const worker_data = + reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1); + AV1_COMMON *const common = &worker_data->pbi->common; + + if (encode_parms.lossless) { + EXPECT_EQ(0, common->base_qindex); + EXPECT_EQ(0, common->y_dc_delta_q); + EXPECT_EQ(0, common->uv_dc_delta_q); + EXPECT_EQ(0, common->uv_ac_delta_q); + EXPECT_EQ(ONLY_4X4, common->tx_mode); + } + EXPECT_EQ(encode_parms.error_resilient, common->error_resilient_mode); + if (encode_parms.error_resilient) { + EXPECT_EQ(0, common->use_prev_frame_mvs); + } + EXPECT_EQ(encode_parms.color_range, common->color_range); + EXPECT_EQ(encode_parms.cs, common->color_space); + if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) { + EXPECT_EQ(encode_parms.render_size[0], common->render_width); + EXPECT_EQ(encode_parms.render_size[1], common->render_height); + } + EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols); + EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows); + + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + return AOM_CODEC_OK == res_dec; + } + + EncodePerfTestVideo test_video_; + + private: + EncodeParameters encode_parms; +}; + +TEST_P(AvxEncoderParmsGetToDecoder, BitstreamParms) { + init_flags_ = AOM_CODEC_USE_PSNR; + + testing::internal::scoped_ptr<libaom_test::VideoSource> video( + new libaom_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames)); + ASSERT_TRUE(video.get() != NULL); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); +} + +AV1_INSTANTIATE_TEST_CASE(AvxEncoderParmsGetToDecoder, + ::testing::ValuesIn(kAV1EncodeParameterSet), + ::testing::ValuesIn(kAV1EncodePerfTestVectors)); +} // namespace diff --git a/third_party/aom/test/end_to_end_test.cc b/third_party/aom/test/end_to_end_test.cc new file mode 100644 index 000000000..0c8cbe274 --- /dev/null +++ b/third_party/aom/test/end_to_end_test.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { + +const unsigned int kWidth = 160; +const unsigned int kHeight = 90; +const unsigned int kFramerate = 50; +const unsigned int kFrames = 10; +const int kBitrate = 500; +// List of psnr thresholds for speed settings 0-7 and 5 encoding modes +const double kPsnrThreshold[][5] = { +// Note: +// AV1 HBD average PSNR is slightly lower than AV1. +// We make two cases here to enable the testing and +// guard picture quality. +#if CONFIG_AV1_ENCODER && CONFIG_HIGHBITDEPTH + { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 31.0, 36.0, 36.0, 36.0, 36.0 }, + { 31.0, 35.0, 35.0, 35.0, 35.0 }, { 31.0, 34.0, 34.0, 34.0, 34.0 }, + { 31.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 }, + { 30.0, 31.0, 31.0, 31.0, 31.0 }, { 29.0, 30.0, 30.0, 30.0, 30.0 }, +#else + { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 35.0, 36.0, 36.0, 36.0, 36.0 }, + { 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 }, + { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 }, + { 30.0, 31.0, 31.0, 31.0, 31.0 }, { 29.0, 30.0, 30.0, 30.0, 30.0 }, +#endif // CONFIG_HIGHBITDEPTH && CONFIG_AV1_ENCODER +}; + +typedef struct { + const char *filename; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +const TestVideoParam kTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 1 }, + { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 }, + { "park_joy_90p_8_440.yuv", 8, AOM_IMG_FMT_I440, AOM_BITS_8, 1 }, +#if CONFIG_HIGHBITDEPTH + { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 2 }, + { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 3 }, + { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 3 }, + { "park_joy_90p_10_440.yuv", 10, AOM_IMG_FMT_I44016, AOM_BITS_10, 3 }, + { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 }, + { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 3 }, + { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 3 }, + { "park_joy_90p_12_440.yuv", 12, AOM_IMG_FMT_I44016, AOM_BITS_12, 3 }, +#endif // CONFIG_HIGHBITDEPTH +}; + +// Encoding modes tested +const libaom_test::TestMode kEncodingModeVectors[] = { + ::libaom_test::kTwoPassGood, ::libaom_test::kOnePassGood, + ::libaom_test::kRealTime, +}; + +// Speed settings tested +const int kCpuUsedVectors[] = { 1, 2, 3, 5, 6 }; + +int is_extension_y4m(const char *filename) { + const char *dot = strrchr(filename, '.'); + if (!dot || dot == filename) + return 0; + else + return !strcmp(dot, ".y4m"); +} + +class EndToEndTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, + TestVideoParam, int> { + protected: + EndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(2)), + cpu_used_(GET_PARAM(3)), psnr_(0.0), nframes_(0), + encoding_mode_(GET_PARAM(1)) {} + + virtual ~EndToEndTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 5; + cfg_.rc_end_usage = AOM_VBR; + } else { + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + } + dec_cfg_.threads = 4; + } + + virtual void BeginPassHook(unsigned int) { + psnr_ = 0.0; + nframes_ = 0; + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + encoder->Control(AOME_SET_CPUUSED, cpu_used_); +#if CONFIG_PALETTE + // Test screen coding tools at cpu_used = 1 && encoding mode is two-pass. + if (cpu_used_ == 1 && encoding_mode_ == ::libaom_test::kTwoPassGood) + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); + else + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); +#endif // CONFIG_PALETTE + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrThreshold() { + return kPsnrThreshold[cpu_used_][encoding_mode_]; + } + + TestVideoParam test_video_param_; + int cpu_used_; + + private: + double psnr_; + unsigned int nframes_; + libaom_test::TestMode encoding_mode_; +}; + +class EndToEndTestLarge : public EndToEndTest {}; + +TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + testing::internal::scoped_ptr<libaom_test::VideoSource> video; + if (is_extension_y4m(test_video_param_.filename)) { + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + } else { + video.reset(new libaom_test::YUVVideoSource( + test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight, + kFramerate, 1, 0, kFrames)); + } + ASSERT_TRUE(video.get() != NULL); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, GetPsnrThreshold()); +} + +TEST_P(EndToEndTest, EndtoEndPSNRTest) { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + testing::internal::scoped_ptr<libaom_test::VideoSource> video; + if (is_extension_y4m(test_video_param_.filename)) { + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + } else { + video.reset(new libaom_test::YUVVideoSource( + test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight, + kFramerate, 1, 0, kFrames)); + } + ASSERT_TRUE(video.get() != NULL); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, GetPsnrThreshold()); +} + +AV1_INSTANTIATE_TEST_CASE(EndToEndTestLarge, + ::testing::ValuesIn(kEncodingModeVectors), + ::testing::ValuesIn(kTestVectors), + ::testing::ValuesIn(kCpuUsedVectors)); + +AV1_INSTANTIATE_TEST_CASE(EndToEndTest, + ::testing::Values(kEncodingModeVectors[0]), + ::testing::Values(kTestVectors[2]), // 444 + ::testing::Values(kCpuUsedVectors[2])); +} // namespace diff --git a/third_party/aom/test/error_block_test.cc b/third_party/aom/test/error_block_test.cc new file mode 100644 index 000000000..227065fa9 --- /dev/null +++ b/third_party/aom/test/error_block_test.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <cmath> +#include <cstdlib> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./av1_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/entropy.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" + +using libaom_test::ACMRandom; + +namespace { +#if CONFIG_HIGHBITDEPTH +const int kNumIterations = 1000; + +typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, int64_t *ssz, int bps); + +typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t> + ErrorBlockParam; + +class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> { + public: + virtual ~ErrorBlockTest() {} + virtual void SetUp() { + error_block_op_ = GET_PARAM(0); + ref_error_block_op_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + aom_bit_depth_t bit_depth_; + ErrorBlockFunc error_block_op_; + ErrorBlockFunc ref_error_block_op_; +}; + +TEST_P(ErrorBlockTest, OperationCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); + int err_count_total = 0; + int first_failure = -1; + intptr_t block_size; + int64_t ssz; + int64_t ret; + int64_t ref_ssz; + int64_t ref_ret; + const int msb = bit_depth_ + 8 - 1; + for (int i = 0; i < kNumIterations; ++i) { + int err_count = 0; + block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 + for (int j = 0; j < block_size; j++) { + // coeff and dqcoeff will always have at least the same sign, and this + // can be used for optimization, so generate test input precisely. + if (rnd(2)) { + // Positive number + coeff[j] = rnd(1 << msb); + dqcoeff[j] = rnd(1 << msb); + } else { + // Negative number + coeff[j] = -rnd(1 << msb); + dqcoeff[j] = -rnd(1 << msb); + } + } + ref_ret = + ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); + ASM_REGISTER_STATE_CHECK( + ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_)); + err_count += (ref_ret != ret) | (ref_ssz != ssz); + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Error Block Test, C output doesn't match optimized output. " + << "First failed at test case " << first_failure; +} + +TEST_P(ErrorBlockTest, ExtremeValues) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); + int err_count_total = 0; + int first_failure = -1; + intptr_t block_size; + int64_t ssz; + int64_t ret; + int64_t ref_ssz; + int64_t ref_ret; + const int msb = bit_depth_ + 8 - 1; + int max_val = ((1 << msb) - 1); + for (int i = 0; i < kNumIterations; ++i) { + int err_count = 0; + int k = (i / 9) % 9; + + // Change the maximum coeff value, to test different bit boundaries + if (k == 8 && (i % 9) == 0) { + max_val >>= 1; + } + block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 + for (int j = 0; j < block_size; j++) { + if (k < 4) { + // Test at positive maximum values + coeff[j] = k % 2 ? max_val : 0; + dqcoeff[j] = (k >> 1) % 2 ? max_val : 0; + } else if (k < 8) { + // Test at negative maximum values + coeff[j] = k % 2 ? -max_val : 0; + dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0; + } else { + if (rnd(2)) { + // Positive number + coeff[j] = rnd(1 << 14); + dqcoeff[j] = rnd(1 << 14); + } else { + // Negative number + coeff[j] = -rnd(1 << 14); + dqcoeff[j] = -rnd(1 << 14); + } + } + } + ref_ret = + ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); + ASM_REGISTER_STATE_CHECK( + ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_)); + err_count += (ref_ret != ret) | (ref_ssz != ssz); + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Error Block Test, C output doesn't match optimized output. " + << "First failed at test case " << first_failure; +} + +#if HAVE_SSE2 || HAVE_AVX +using std::tr1::make_tuple; + +INSTANTIATE_TEST_CASE_P( + SSE2, ErrorBlockTest, + ::testing::Values(make_tuple(&av1_highbd_block_error_sse2, + &av1_highbd_block_error_c, AOM_BITS_10), + make_tuple(&av1_highbd_block_error_sse2, + &av1_highbd_block_error_c, AOM_BITS_12), + make_tuple(&av1_highbd_block_error_sse2, + &av1_highbd_block_error_c, AOM_BITS_8))); +#endif // HAVE_SSE2 + +#endif // CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/error_resilience_test.cc b/third_party/aom/test/error_resilience_test.cc new file mode 100644 index 000000000..63f10012f --- /dev/null +++ b/third_party/aom/test/error_resilience_test.cc @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +const int kMaxErrorFrames = 12; +const int kMaxDroppableFrames = 12; + +class ErrorResilienceTestLarge + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> { + protected: + ErrorResilienceTestLarge() + : EncoderTest(GET_PARAM(0)), psnr_(0.0), nframes_(0), mismatch_psnr_(0.0), + mismatch_nframes_(0), encoding_mode_(GET_PARAM(1)) { + Reset(); + } + + virtual ~ErrorResilienceTestLarge() {} + + void Reset() { + error_nframes_ = 0; + droppable_nframes_ = 0; + pattern_switch_ = 0; + } + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + psnr_ = 0.0; + nframes_ = 0; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + virtual void PreEncodeFrameHook(libaom_test::VideoSource *video) { + frame_flags_ &= + ~(AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF); + if (droppable_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < droppable_nframes_; ++i) { + if (droppable_frames_[i] == video->frame()) { + std::cout << "Encoding droppable frame: " << droppable_frames_[i] + << "\n"; + frame_flags_ |= (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF); + return; + } + } + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetAverageMismatchPsnr() const { + if (mismatch_nframes_) return mismatch_psnr_ / mismatch_nframes_; + return 0.0; + } + + virtual bool DoDecode() const { + if (error_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < error_nframes_; ++i) { + if (error_frames_[i] == nframes_ - 1) { + std::cout << " Skipping decoding frame: " + << error_frames_[i] << "\n"; + return 0; + } + } + } + return 1; + } + + virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) { + double mismatch_psnr = compute_psnr(img1, img2); + mismatch_psnr_ += mismatch_psnr; + ++mismatch_nframes_; + // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n"; + ::libaom_test::EncoderTest::MismatchHook(img1, img2); + } + + void SetErrorFrames(int num, unsigned int *list) { + if (num > kMaxErrorFrames) + num = kMaxErrorFrames; + else if (num < 0) + num = 0; + error_nframes_ = num; + for (unsigned int i = 0; i < error_nframes_; ++i) + error_frames_[i] = list[i]; + } + + void SetDroppableFrames(int num, unsigned int *list) { + if (num > kMaxDroppableFrames) + num = kMaxDroppableFrames; + else if (num < 0) + num = 0; + droppable_nframes_ = num; + for (unsigned int i = 0; i < droppable_nframes_; ++i) + droppable_frames_[i] = list[i]; + } + + unsigned int GetMismatchFrames() { return mismatch_nframes_; } + + void SetPatternSwitch(int frame_switch) { pattern_switch_ = frame_switch; } + + private: + double psnr_; + unsigned int nframes_; + unsigned int error_nframes_; + unsigned int droppable_nframes_; + unsigned int pattern_switch_; + double mismatch_psnr_; + unsigned int mismatch_nframes_; + unsigned int error_frames_[kMaxErrorFrames]; + unsigned int droppable_frames_[kMaxDroppableFrames]; + libaom_test::TestMode encoding_mode_; +}; + +TEST_P(ErrorResilienceTestLarge, OnVersusOff) { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 10; + + init_flags_ = AOM_CODEC_USE_PSNR; + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 30); + + // Error resilient mode OFF. + cfg_.g_error_resilient = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_resilience_off = GetAveragePsnr(); + EXPECT_GT(psnr_resilience_off, 25.0); + + // Error resilient mode ON. + cfg_.g_error_resilient = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_resilience_on = GetAveragePsnr(); + EXPECT_GT(psnr_resilience_on, 25.0); + + // Test that turning on error resilient mode hurts by 10% at most. + if (psnr_resilience_off > 0.0) { + const double psnr_ratio = psnr_resilience_on / psnr_resilience_off; + EXPECT_GE(psnr_ratio, 0.9); + EXPECT_LE(psnr_ratio, 1.1); + } +} + +// Check for successful decoding and no encoder/decoder mismatch +// if we lose (i.e., drop before decoding) a set of droppable +// frames (i.e., frames that don't update any reference buffers). +// Check both isolated and consecutive loss. +TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 500; + // FIXME(debargha): Fix this to work for any lag. + // Currently this test only works for lag = 0 + cfg_.g_lag_in_frames = 0; + + init_flags_ = AOM_CODEC_USE_PSNR; + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 40); + + // Error resilient mode ON. + cfg_.g_error_resilient = 1; + cfg_.kf_mode = AOM_KF_DISABLED; + + // Set an arbitrary set of error frames same as droppable frames. + // In addition to isolated loss/drop, add a long consecutive series + // (of size 9) of dropped frames. + unsigned int num_droppable_frames = 11; + unsigned int droppable_frame_list[] = { 5, 16, 22, 23, 24, 25, + 26, 27, 28, 29, 30 }; + SetDroppableFrames(num_droppable_frames, droppable_frame_list); + SetErrorFrames(num_droppable_frames, droppable_frame_list); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Test that no mismatches have been found + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(GetMismatchFrames(), (unsigned int)0); + + // Reset previously set of error/droppable frames. + Reset(); + +#if 0 + // TODO(jkoleszar): This test is disabled for the time being as too + // sensitive. It's not clear how to set a reasonable threshold for + // this behavior. + + // Now set an arbitrary set of error frames that are non-droppable + unsigned int num_error_frames = 3; + unsigned int error_frame_list[] = {3, 10, 20}; + SetErrorFrames(num_error_frames, error_frame_list); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Test that dropping an arbitrary set of inter frames does not hurt too much + // Note the Average Mismatch PSNR is the average of the PSNR between + // decoded frame and encoder's version of the same frame for all frames + // with mismatch. + const double psnr_resilience_mismatch = GetAverageMismatchPsnr(); + std::cout << " Mismatch PSNR: " + << psnr_resilience_mismatch << "\n"; + EXPECT_GT(psnr_resilience_mismatch, 20.0); +#endif +} + +AV1_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES); +} // namespace diff --git a/third_party/aom/test/ethread_test.cc b/third_party/aom/test/ethread_test.cc new file mode 100644 index 000000000..5b519f8fe --- /dev/null +++ b/third_party/aom/test/ethread_test.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { +class AVxEncoderThreadTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + AVxEncoderThreadTest() + : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), + encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = 1280; + cfg.h = 720; + decoder_ = codec_->CreateDecoder(cfg, 0); +#if CONFIG_AV1 && CONFIG_EXT_TILE + if (decoder_->IsAV1()) { + decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder_->Control(AV1_SET_DECODE_TILE_COL, -1); + } +#endif + + size_enc_.clear(); + md5_dec_.clear(); + md5_enc_.clear(); + } + virtual ~AVxEncoderThreadTest() { delete decoder_; } + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + + if (encoding_mode_ != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 3; + cfg_.rc_end_usage = AOM_VBR; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + } else { + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_error_resilient = 1; + } + cfg_.rc_max_quantizer = 56; + cfg_.rc_min_quantizer = 0; + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + encoder_initialized_ = false; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/, + ::libaom_test::Encoder *encoder) { + if (!encoder_initialized_) { +#if CONFIG_AV1 && CONFIG_EXT_TILE + encoder->Control(AV1E_SET_TILE_COLUMNS, 1); + if (codec_ == &libaom_test::kAV1) { + // TODO(geza): Start using multiple tile rows when the multi-threaded + // encoder can handle them + encoder->Control(AV1E_SET_TILE_ROWS, 32); + } else { + encoder->Control(AV1E_SET_TILE_ROWS, 0); + } +#else + // Encode 4 tile columns. + encoder->Control(AV1E_SET_TILE_COLUMNS, 2); + encoder->Control(AV1E_SET_TILE_ROWS, 0); +#endif // CONFIG_AV1 && CONFIG_EXT_TILE +#if CONFIG_LOOPFILTERING_ACROSS_TILES + encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0); +#endif // CONFIG_LOOPFILTERING_ACROSS_TILES + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0); + } else { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0); + encoder->Control(AV1E_SET_AQ_MODE, 3); + } + encoder_initialized_ = true; + } + } + + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + size_enc_.push_back(pkt->data.frame.sz); + + ::libaom_test::MD5 md5_enc; + md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz); + md5_enc_.push_back(md5_enc.Get()); + + const aom_codec_err_t res = decoder_->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = decoder_->GetDxData().Next(); + + if (img) { + ::libaom_test::MD5 md5_res; + md5_res.Add(img); + md5_dec_.push_back(md5_res.Get()); + } + } + + void DoTest() { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 18); + cfg_.rc_target_bitrate = 1000; + + // Encode using single thread. + cfg_.g_threads = 1; + init_flags_ = AOM_CODEC_USE_PSNR; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::vector<size_t> single_thr_size_enc; + std::vector<std::string> single_thr_md5_enc; + std::vector<std::string> single_thr_md5_dec; + single_thr_size_enc = size_enc_; + single_thr_md5_enc = md5_enc_; + single_thr_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Encode using multiple threads. + cfg_.g_threads = 4; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::vector<size_t> multi_thr_size_enc; + std::vector<std::string> multi_thr_md5_enc; + std::vector<std::string> multi_thr_md5_dec; + multi_thr_size_enc = size_enc_; + multi_thr_md5_enc = md5_enc_; + multi_thr_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Check that the vectors are equal. + ASSERT_EQ(single_thr_size_enc, multi_thr_size_enc); + ASSERT_EQ(single_thr_md5_enc, multi_thr_md5_enc); + ASSERT_EQ(single_thr_md5_dec, multi_thr_md5_dec); + } + + bool encoder_initialized_; + ::libaom_test::TestMode encoding_mode_; + int set_cpu_used_; + ::libaom_test::Decoder *decoder_; + std::vector<size_t> size_enc_; + std::vector<std::string> md5_enc_; + std::vector<std::string> md5_dec_; +}; + +TEST_P(AVxEncoderThreadTest, EncoderResultTest) { DoTest(); } + +class AVxEncoderThreadTestLarge : public AVxEncoderThreadTest {}; + +TEST_P(AVxEncoderThreadTestLarge, EncoderResultTest) { DoTest(); } + +// For AV1, only test speed 0 to 3. +AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTest, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Range(2, 4)); + +AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTestLarge, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Range(0, 2)); +} // namespace diff --git a/third_party/aom/test/examples.sh b/third_party/aom/test/examples.sh new file mode 100755 index 000000000..d3152be7d --- /dev/null +++ b/third_party/aom/test/examples.sh @@ -0,0 +1,29 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file runs all of the tests for the libaom examples. +## +. $(dirname $0)/tools_common.sh + +example_tests=$(ls $(dirname $0)/*.sh) + +# List of script names to exclude. +exclude_list="examples tools_common decode_to_md5" + +# Filter out the scripts in $exclude_list. +for word in ${exclude_list}; do + example_tests=$(filter_strings "${example_tests}" "${word}" exclude) +done + +for test in ${example_tests}; do + # Source each test script so that exporting variables can be avoided. + AOM_TEST_NAME="$(basename ${test%.*})" + . "${test}" +done diff --git a/third_party/aom/test/fdct4x4_test.cc b/third_party/aom/test/fdct4x4_test.cc new file mode 100644 index 000000000..ed265e84f --- /dev/null +++ b/third_party/aom/test/fdct4x4_test.cc @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" +#include "av1/common/entropy.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); +typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using libaom_test::FhtFunc; + +typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t, int> + Dct4x4Param; +typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x4Param; + +void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride, + int /*tx_type*/) { + aom_fdct4x4_c(in, out, stride); +} + +void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht4x4_c(in, out, stride, tx_type); +} + +void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride, + int /*tx_type*/) { + av1_fwht4x4_c(in, out, stride); +} + +#if CONFIG_HIGHBITDEPTH +void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) { + aom_highbd_idct4x4_16_add_c(in, out, stride, 10); +} + +void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) { + aom_highbd_idct4x4_16_add_c(in, out, stride, 12); +} + +void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 10); +} + +void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 12); +} + +void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) { + aom_highbd_iwht4x4_16_add_c(in, out, stride, 10); +} + +void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) { + aom_highbd_iwht4x4_16_add_c(in, out, stride, 12); +} + +#if HAVE_SSE2 +void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { + aom_highbd_idct4x4_16_add_sse2(in, out, stride, 10); +} + +void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { + aom_highbd_idct4x4_16_add_sse2(in, out, stride, 12); +} +#endif // HAVE_SSE2 +#endif // CONFIG_HIGHBITDEPTH + +class Trans4x4DCT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Dct4x4Param> { + public: + virtual ~Trans4x4DCT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 4; + height_ = 4; + fwd_txfm_ref = fdct4x4_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride); + } + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride); + } + + FdctFunc fwd_txfm_; + IdctFunc inv_txfm_; +}; + +TEST_P(Trans4x4DCT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); } + +TEST_P(Trans4x4DCT, CoeffCheck) { RunCoeffCheck(); } + +TEST_P(Trans4x4DCT, MemCheck) { RunMemCheck(); } + +TEST_P(Trans4x4DCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } + +class Trans4x4HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht4x4Param> { + public: + virtual ~Trans4x4HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 4; + height_ = 4; + fwd_txfm_ref = fht4x4_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(1, 0.005); } + +TEST_P(Trans4x4HT, CoeffCheck) { RunCoeffCheck(); } + +TEST_P(Trans4x4HT, MemCheck) { RunMemCheck(); } + +TEST_P(Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } + +class Trans4x4WHT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Dct4x4Param> { + public: + virtual ~Trans4x4WHT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 4; + height_ = 4; + fwd_txfm_ref = fwht4x4_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride); + } + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride); + } + + FdctFunc fwd_txfm_; + IdctFunc inv_txfm_; +}; + +TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); } + +TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); } + +TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); } + +TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } +using std::tr1::make_tuple; + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + C, Trans4x4DCT, + ::testing::Values( + make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_10, 0, AOM_BITS_10, 16), + make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_12, 0, AOM_BITS_12, 16), + make_tuple(&aom_fdct4x4_c, &aom_idct4x4_16_add_c, 0, AOM_BITS_8, 16))); +#else +INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT, + ::testing::Values(make_tuple(&aom_fdct4x4_c, + &aom_idct4x4_16_add_c, 0, + AOM_BITS_8, 16))); +#endif // CONFIG_HIGHBITDEPTH + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + C, Trans4x4HT, + ::testing::Values( + make_tuple(&av1_highbd_fht4x4_c, &iht4x4_10, 0, AOM_BITS_10, 16), + make_tuple(&av1_highbd_fht4x4_c, &iht4x4_10, 1, AOM_BITS_10, 16), + make_tuple(&av1_highbd_fht4x4_c, &iht4x4_10, 2, AOM_BITS_10, 16), + make_tuple(&av1_highbd_fht4x4_c, &iht4x4_10, 3, AOM_BITS_10, 16), + make_tuple(&av1_highbd_fht4x4_c, &iht4x4_12, 0, AOM_BITS_12, 16), + make_tuple(&av1_highbd_fht4x4_c, &iht4x4_12, 1, AOM_BITS_12, 16), + make_tuple(&av1_highbd_fht4x4_c, &iht4x4_12, 2, AOM_BITS_12, 16), + make_tuple(&av1_highbd_fht4x4_c, &iht4x4_12, 3, AOM_BITS_12, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16))); +#else +INSTANTIATE_TEST_CASE_P( + C, Trans4x4HT, + ::testing::Values( + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16))); +#endif // CONFIG_HIGHBITDEPTH + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + C, Trans4x4WHT, + ::testing::Values( + make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10, 0, AOM_BITS_10, 16), + make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12, 0, AOM_BITS_12, 16), + make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, 0, AOM_BITS_8, 16))); +#else +INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT, + ::testing::Values(make_tuple(&av1_fwht4x4_c, + &aom_iwht4x4_16_add_c, 0, + AOM_BITS_8, 16))); +#endif // CONFIG_HIGHBITDEPTH + +#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT, + ::testing::Values(make_tuple(&aom_fdct4x4_c, + &aom_idct4x4_16_add_neon, + 0, AOM_BITS_8, 16))); +#endif // HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH + +#if HAVE_NEON && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + NEON, Trans4x4HT, + ::testing::Values( + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 0, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 1, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 2, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 3, AOM_BITS_8, 16))); +#endif // HAVE_NEON && !CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, Trans4x4WHT, + ::testing::Values(make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, 0, + AOM_BITS_8, 16), + make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_sse2, 0, + AOM_BITS_8, 16))); +#endif + +#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(SSE2, Trans4x4DCT, + ::testing::Values(make_tuple(&aom_fdct4x4_sse2, + &aom_idct4x4_16_add_sse2, + 0, AOM_BITS_8, 16))); +INSTANTIATE_TEST_CASE_P( + SSE2, Trans4x4HT, + ::testing::Values(make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 0, + AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 1, + AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 2, + AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 3, + AOM_BITS_8, 16))); +#endif // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE2, Trans4x4DCT, + ::testing::Values( + make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, AOM_BITS_10, 16), + make_tuple(&aom_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, AOM_BITS_10, + 16), + make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, AOM_BITS_12, 16), + make_tuple(&aom_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, AOM_BITS_12, + 16), + make_tuple(&aom_fdct4x4_sse2, &aom_idct4x4_16_add_c, 0, AOM_BITS_8, + 16))); + +INSTANTIATE_TEST_CASE_P( + SSE2, Trans4x4HT, + ::testing::Values( + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16))); +#endif // HAVE_SSE2 && CONFIG_HIGHBITDEPTH + +#if HAVE_MSA && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(MSA, Trans4x4DCT, + ::testing::Values(make_tuple(&aom_fdct4x4_msa, + &aom_idct4x4_16_add_msa, 0, + AOM_BITS_8, 16))); +#if !CONFIG_EXT_TX +INSTANTIATE_TEST_CASE_P( + MSA, Trans4x4HT, + ::testing::Values( + make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 0, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 1, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 2, AOM_BITS_8, 16), + make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 3, AOM_BITS_8, + 16))); +#endif // !CONFIG_EXT_TX +#endif // HAVE_MSA && !CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/fdct8x8_test.cc b/third_party/aom/test/fdct8x8_test.cc new file mode 100644 index 000000000..0e86c70aa --- /dev/null +++ b/third_party/aom/test/fdct8x8_test.cc @@ -0,0 +1,699 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/entropy.h" +#include "av1/common/scan.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem.h" + +using libaom_test::ACMRandom; + +namespace { + +const int kNumCoeffs = 64; +const double kPi = 3.141592653589793238462643383279502884; + +const int kSignBiasMaxDiff255 = 1500; +const int kSignBiasMaxDiff15 = 10000; + +typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); +typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); +typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, + int tx_type); +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); + +typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t> Dct8x8Param; +typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t> Ht8x8Param; +typedef std::tr1::tuple<IdctFunc, IdctFunc, int, aom_bit_depth_t> Idct8x8Param; + +void reference_8x8_dct_1d(const double in[8], double out[8]) { + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < 8; k++) { + out[k] = 0.0; + for (int n = 0; n < 8; n++) + out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0); + if (k == 0) out[k] = out[k] * kInvSqrt2; + } +} + +void reference_8x8_dct_2d(const int16_t input[kNumCoeffs], + double output[kNumCoeffs]) { + // First transform columns + for (int i = 0; i < 8; ++i) { + double temp_in[8], temp_out[8]; + for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i]; + reference_8x8_dct_1d(temp_in, temp_out); + for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j]; + } + // Then transform rows + for (int i = 0; i < 8; ++i) { + double temp_in[8], temp_out[8]; + for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8]; + reference_8x8_dct_1d(temp_in, temp_out); + // Scale by some magic number + for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2; + } +} + +void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, + int /*tx_type*/) { + aom_fdct8x8_c(in, out, stride); +} + +void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht8x8_c(in, out, stride, tx_type); +} + +#if CONFIG_HIGHBITDEPTH +void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 10); +} + +void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { + av1_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12); +} + +#endif // CONFIG_HIGHBITDEPTH + +class FwdTrans8x8TestBase { + public: + virtual ~FwdTrans8x8TestBase() {} + + protected: + virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0; + virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0; + + void RunSignBiasCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, int16_t, test_input_block[64]); + DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]); + int count_sign_block[64][2]; + const int count_test_block = 100000; + + memset(count_sign_block, 0, sizeof(count_sign_block)); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 64; ++j) + test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) - + ((rnd.Rand16() >> (16 - bit_depth_)) & mask_); + ASM_REGISTER_STATE_CHECK( + RunFwdTxfm(test_input_block, test_output_block, pitch_)); + + for (int j = 0; j < 64; ++j) { + if (test_output_block[j] < 0) + ++count_sign_block[j][0]; + else if (test_output_block[j] > 0) + ++count_sign_block[j][1]; + } + } + + for (int j = 0; j < 64; ++j) { + const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]); + const int max_diff = kSignBiasMaxDiff255; + EXPECT_LT(diff, max_diff << (bit_depth_ - 8)) + << "Error: 8x8 FDCT/FHT has a sign bias > " + << 1. * max_diff / count_test_block * 100 << "%" + << " for input range [-255, 255] at index " << j + << " count0: " << count_sign_block[j][0] + << " count1: " << count_sign_block[j][1] << " diff: " << diff; + } + + memset(count_sign_block, 0, sizeof(count_sign_block)); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_ / 16, mask_ / 16]. + for (int j = 0; j < 64; ++j) + test_input_block[j] = + ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4); + ASM_REGISTER_STATE_CHECK( + RunFwdTxfm(test_input_block, test_output_block, pitch_)); + + for (int j = 0; j < 64; ++j) { + if (test_output_block[j] < 0) + ++count_sign_block[j][0]; + else if (test_output_block[j] > 0) + ++count_sign_block[j][1]; + } + } + + for (int j = 0; j < 64; ++j) { + const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]); + const int max_diff = kSignBiasMaxDiff15; + EXPECT_LT(diff, max_diff << (bit_depth_ - 8)) + << "Error: 8x8 FDCT/FHT has a sign bias > " + << 1. * max_diff / count_test_block * 100 << "%" + << " for input range [-15, 15] at index " << j + << " count0: " << count_sign_block[j][0] + << " count1: " << count_sign_block[j][1] << " diff: " << diff; + } + } + + void RunRoundTripErrorCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int max_error = 0; + int total_error = 0; + const int count_test_block = 100000; + DECLARE_ALIGNED(16, int16_t, test_input_block[64]); + DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]); + DECLARE_ALIGNED(16, uint8_t, dst[64]); + DECLARE_ALIGNED(16, uint8_t, src[64]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[64]); + DECLARE_ALIGNED(16, uint16_t, src16[64]); +#endif + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < 64; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + test_input_block[j] = src16[j] - dst16[j]; +#endif + } + } + + ASM_REGISTER_STATE_CHECK( + RunFwdTxfm(test_input_block, test_temp_block, pitch_)); + for (int j = 0; j < 64; ++j) { + if (test_temp_block[j] > 0) { + test_temp_block[j] += 2; + test_temp_block[j] /= 4; + test_temp_block[j] *= 4; + } else { + test_temp_block[j] -= 2; + test_temp_block[j] /= 4; + test_temp_block[j] *= 4; + } + } + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif + } + + for (int j = 0; j < 64; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + const int diff = dst[j] - src[j]; +#endif + const int error = diff * diff; + if (max_error < error) max_error = error; + total_error += error; + } + } + + EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error) + << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual" + << " roundtrip error > 1"; + + EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error) + << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip " + << "error > 1/5 per block"; + } + + void RunExtremalCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int max_error = 0; + int total_error = 0; + int total_coeff_error = 0; + const int count_test_block = 100000; + DECLARE_ALIGNED(16, int16_t, test_input_block[64]); + DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]); + DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]); + DECLARE_ALIGNED(16, uint8_t, dst[64]); + DECLARE_ALIGNED(16, uint8_t, src[64]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[64]); + DECLARE_ALIGNED(16, uint16_t, src16[64]); +#endif + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < 64; ++j) { + if (bit_depth_ == AOM_BITS_8) { + if (i == 0) { + src[j] = 255; + dst[j] = 0; + } else if (i == 1) { + src[j] = 0; + dst[j] = 255; + } else { + src[j] = rnd.Rand8() % 2 ? 255 : 0; + dst[j] = rnd.Rand8() % 2 ? 255 : 0; + } + test_input_block[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + if (i == 0) { + src16[j] = mask_; + dst16[j] = 0; + } else if (i == 1) { + src16[j] = 0; + dst16[j] = mask_; + } else { + src16[j] = rnd.Rand8() % 2 ? mask_ : 0; + dst16[j] = rnd.Rand8() % 2 ? mask_ : 0; + } + test_input_block[j] = src16[j] - dst16[j]; +#endif + } + } + + ASM_REGISTER_STATE_CHECK( + RunFwdTxfm(test_input_block, test_temp_block, pitch_)); + ASM_REGISTER_STATE_CHECK( + fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_)); + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif + } + + for (int j = 0; j < 64; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + const int diff = dst[j] - src[j]; +#endif + const int error = diff * diff; + if (max_error < error) max_error = error; + total_error += error; + + const int coeff_diff = test_temp_block[j] - ref_temp_block[j]; + total_coeff_error += abs(coeff_diff); + } + + EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error) + << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has" + << "an individual roundtrip error > 1"; + + EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error) + << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average" + << " roundtrip error > 1/5 per block"; + + EXPECT_EQ(0, total_coeff_error) + << "Error: Extremal 8x8 FDCT/FHT has" + << "overflow issues in the intermediate steps > 1"; + } + } + + void RunInvAccuracyCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); +#endif + + for (int i = 0; i < count_test_block; ++i) { + double out_r[kNumCoeffs]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < kNumCoeffs; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8() % 2 ? 255 : 0; + dst[j] = src[j] > 0 ? 0 : 255; + in[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + src16[j] = rnd.Rand8() % 2 ? mask_ : 0; + dst16[j] = src16[j] > 0 ? 0 : mask_; + in[j] = src16[j] - dst16[j]; +#endif + } + } + + reference_8x8_dct_2d(in, out_r); + for (int j = 0; j < kNumCoeffs; ++j) + coeff[j] = static_cast<tran_low_t>(round(out_r[j])); + + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif + } + + for (int j = 0; j < kNumCoeffs; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + const int diff = dst[j] - src[j]; +#endif + const uint32_t error = diff * diff; + EXPECT_GE(1u << 2 * (bit_depth_ - 8), error) + << "Error: 8x8 IDCT has error " << error << " at index " << j; + } + } + } + + void RunFwdAccuracyCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); + + for (int i = 0; i < count_test_block; ++i) { + double out_r[kNumCoeffs]; + + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < kNumCoeffs; ++j) + in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_; + + RunFwdTxfm(in, coeff, pitch_); + reference_8x8_dct_2d(in, out_r); + for (int j = 0; j < kNumCoeffs; ++j) + coeff_r[j] = static_cast<tran_low_t>(round(out_r[j])); + + for (int j = 0; j < kNumCoeffs; ++j) { + const int32_t diff = coeff[j] - coeff_r[j]; + const uint32_t error = diff * diff; + EXPECT_GE(9u << 2 * (bit_depth_ - 8), error) + << "Error: 8x8 DCT has error " << error << " at index " << j; + } + } + } + + void CompareInvReference(IdctFunc ref_txfm, int thresh) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 10000; + const int eob = 12; + DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]); +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]); +#endif + const int16_t *scan = av1_default_scan_orders[TX_8X8].scan; + + for (int i = 0; i < count_test_block; ++i) { + for (int j = 0; j < kNumCoeffs; ++j) { + if (j < eob) { + // Random values less than the threshold, either positive or negative + coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2)); + } else { + coeff[scan[j]] = 0; + } + if (bit_depth_ == AOM_BITS_8) { + dst[j] = 0; + ref[j] = 0; +#if CONFIG_HIGHBITDEPTH + } else { + dst16[j] = 0; + ref16[j] = 0; +#endif + } + } + if (bit_depth_ == AOM_BITS_8) { + ref_txfm(coeff, ref, pitch_); + ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); +#if CONFIG_HIGHBITDEPTH + } else { + ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_); + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif + } + + for (int j = 0; j < kNumCoeffs; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j]; +#else + const int diff = dst[j] - ref[j]; +#endif + const uint32_t error = diff * diff; + EXPECT_EQ(0u, error) << "Error: 8x8 IDCT has error " << error + << " at index " << j; + } + } + } + int pitch_; + int tx_type_; + FhtFunc fwd_txfm_ref; + aom_bit_depth_t bit_depth_; + int mask_; +}; + +class FwdTrans8x8DCT : public FwdTrans8x8TestBase, + public ::testing::TestWithParam<Dct8x8Param> { + public: + virtual ~FwdTrans8x8DCT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 8; + fwd_txfm_ref = fdct8x8_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride); + } + void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride); + } + + FdctFunc fwd_txfm_; + IdctFunc inv_txfm_; +}; + +TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); } + +TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); } + +TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); } + +TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); } + +TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); } + +class FwdTrans8x8HT : public FwdTrans8x8TestBase, + public ::testing::TestWithParam<Ht8x8Param> { + public: + virtual ~FwdTrans8x8HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 8; + fwd_txfm_ref = fht8x8_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); } + +TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); } + +TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); } + +class InvTrans8x8DCT : public FwdTrans8x8TestBase, + public ::testing::TestWithParam<Idct8x8Param> { + public: + virtual ~InvTrans8x8DCT() {} + + virtual void SetUp() { + ref_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + thresh_ = GET_PARAM(2); + pitch_ = 8; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride); + } + void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {} + + IdctFunc ref_txfm_; + IdctFunc inv_txfm_; + int thresh_; +}; + +TEST_P(InvTrans8x8DCT, CompareReference) { + CompareInvReference(ref_txfm_, thresh_); +} + +using std::tr1::make_tuple; + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT, + ::testing::Values(make_tuple(&aom_fdct8x8_c, + &aom_idct8x8_64_add_c, 0, + AOM_BITS_8))); +#else +INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT, + ::testing::Values(make_tuple(&aom_fdct8x8_c, + &aom_idct8x8_64_add_c, 0, + AOM_BITS_8))); +#endif // CONFIG_HIGHBITDEPTH + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + C, FwdTrans8x8HT, + ::testing::Values( + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 0, AOM_BITS_8), + make_tuple(&av1_highbd_fht8x8_c, &iht8x8_10, 0, AOM_BITS_10), + make_tuple(&av1_highbd_fht8x8_c, &iht8x8_10, 1, AOM_BITS_10), + make_tuple(&av1_highbd_fht8x8_c, &iht8x8_10, 2, AOM_BITS_10), + make_tuple(&av1_highbd_fht8x8_c, &iht8x8_10, 3, AOM_BITS_10), + make_tuple(&av1_highbd_fht8x8_c, &iht8x8_12, 0, AOM_BITS_12), + make_tuple(&av1_highbd_fht8x8_c, &iht8x8_12, 1, AOM_BITS_12), + make_tuple(&av1_highbd_fht8x8_c, &iht8x8_12, 2, AOM_BITS_12), + make_tuple(&av1_highbd_fht8x8_c, &iht8x8_12, 3, AOM_BITS_12), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 1, AOM_BITS_8), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 2, AOM_BITS_8), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 3, AOM_BITS_8))); +#else +INSTANTIATE_TEST_CASE_P( + C, FwdTrans8x8HT, + ::testing::Values( + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 0, AOM_BITS_8), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 1, AOM_BITS_8), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 2, AOM_BITS_8), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 3, AOM_BITS_8))); +#endif // CONFIG_HIGHBITDEPTH + +#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT, + ::testing::Values(make_tuple(&aom_fdct8x8_neon, + &aom_idct8x8_64_add_neon, + 0, AOM_BITS_8))); +#endif // HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH + +#if HAVE_NEON && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + NEON, FwdTrans8x8HT, + ::testing::Values( + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 0, AOM_BITS_8), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 1, AOM_BITS_8), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 2, AOM_BITS_8), + make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 3, AOM_BITS_8))); +#endif // HAVE_NEON && !CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT, + ::testing::Values(make_tuple(&aom_fdct8x8_sse2, + &aom_idct8x8_64_add_sse2, + 0, AOM_BITS_8))); +INSTANTIATE_TEST_CASE_P( + SSE2, FwdTrans8x8HT, + ::testing::Values( + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 0, AOM_BITS_8), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 1, AOM_BITS_8), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 2, AOM_BITS_8), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 3, AOM_BITS_8))); +#endif // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT, + ::testing::Values(make_tuple(&aom_fdct8x8_sse2, + &aom_idct8x8_64_add_c, 0, + AOM_BITS_8))); + +INSTANTIATE_TEST_CASE_P( + SSE2, FwdTrans8x8HT, + ::testing::Values( + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 0, AOM_BITS_8), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 1, AOM_BITS_8), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 2, AOM_BITS_8), + make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 3, AOM_BITS_8))); + +#endif // HAVE_SSE2 && CONFIG_HIGHBITDEPTH + +#if HAVE_SSSE3 && ARCH_X86_64 +INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT, + ::testing::Values(make_tuple(&aom_fdct8x8_ssse3, + &aom_idct8x8_64_add_ssse3, + 0, AOM_BITS_8))); +#endif + +#if HAVE_MSA && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT, + ::testing::Values(make_tuple(&aom_fdct8x8_msa, + &aom_idct8x8_64_add_msa, 0, + AOM_BITS_8))); +#if !CONFIG_EXT_TX +INSTANTIATE_TEST_CASE_P( + MSA, FwdTrans8x8HT, + ::testing::Values( + make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 0, AOM_BITS_8), + make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 1, AOM_BITS_8), + make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 2, AOM_BITS_8), + make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 3, AOM_BITS_8))); +#endif // !CONFIG_EXT_TX +#endif // HAVE_MSA && !CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/fht32x32_test.cc b/third_party/aom/test/fht32x32_test.cc new file mode 100644 index 000000000..56ac597c0 --- /dev/null +++ b/third_party/aom/test/fht32x32_test.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" +#include "aom_ports/mem.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); +using std::tr1::tuple; +using libaom_test::FhtFunc; +typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht32x32Param; + +void fht32x32_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + av1_fht32x32_c(in, out, stride, tx_type); +} + +#if CONFIG_HIGHBITDEPTH +typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd); +typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd); + +// Target optimized function, tx_type, bit depth +typedef tuple<HbdHtFunc, int, int> HighbdHt32x32Param; + +void highbd_fht32x32_ref(const int16_t *in, int32_t *out, int stride, + int tx_type, int bd) { + av1_fwd_txfm2d_32x32_c(in, out, stride, tx_type, bd); +} +#endif // CONFIG_HIGHBITDEPTH + +#if HAVE_AVX2 +void dummy_inv_txfm(const tran_low_t *in, uint8_t *out, int stride, + int tx_type) { + (void)in; + (void)out; + (void)stride; + (void)tx_type; +} +#endif + +class AV1Trans32x32HT : public libaom_test::TransformTestBase, + public ::testing::TestWithParam<Ht32x32Param> { + public: + virtual ~AV1Trans32x32HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 32; + height_ = 32; + fwd_txfm_ref = fht32x32_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_); + } + + FhtFunc fwd_txfm_; + IhtFunc inv_txfm_; +}; + +TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); } +TEST_P(AV1Trans32x32HT, MemCheck) { RunMemCheck(); } + +#if CONFIG_HIGHBITDEPTH +class AV1HighbdTrans32x32HT + : public ::testing::TestWithParam<HighbdHt32x32Param> { + public: + virtual ~AV1HighbdTrans32x32HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + fwd_txfm_ref_ = highbd_fht32x32_ref; + tx_type_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = 1024; + + input_ = reinterpret_cast<int16_t *>( + aom_memalign(32, sizeof(int16_t) * num_coeffs_)); + output_ = reinterpret_cast<int32_t *>( + aom_memalign(32, sizeof(int32_t) * num_coeffs_)); + output_ref_ = reinterpret_cast<int32_t *>( + aom_memalign(32, sizeof(int32_t) * num_coeffs_)); + } + + virtual void TearDown() { + aom_free(input_); + aom_free(output_); + aom_free(output_ref_); + libaom_test::ClearSystemState(); + } + + protected: + void RunBitexactCheck(); + + private: + HbdHtFunc fwd_txfm_; + HbdHtFunc fwd_txfm_ref_; + int tx_type_; + int bit_depth_; + int mask_; + int num_coeffs_; + int16_t *input_; + int32_t *output_; + int32_t *output_ref_; +}; + +void AV1HighbdTrans32x32HT::RunBitexactCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int i, j; + const int stride = 32; + const int num_tests = 1000; + + for (i = 0; i < num_tests; ++i) { + for (j = 0; j < num_coeffs_; ++j) { + input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + } + + fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_); + ASM_REGISTER_STATE_CHECK( + fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_)); + + for (j = 0; j < num_coeffs_; ++j) { + EXPECT_EQ(output_ref_[j], output_[j]) + << "Not bit-exact result at index: " << j << " at test block: " << i; + } + } +} + +TEST_P(AV1HighbdTrans32x32HT, HighbdCoeffCheck) { RunBitexactCheck(); } +#endif // CONFIG_HIGHBITDEPTH + +using std::tr1::make_tuple; + +#if HAVE_SSE2 +const Ht32x32Param kArrayHt32x32Param_sse2[] = { + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 0, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 1, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 2, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 3, AOM_BITS_8, 1024), +#if CONFIG_EXT_TX + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 4, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 5, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 6, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 7, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 8, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 9, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 10, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 11, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 12, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 13, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 14, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 15, AOM_BITS_8, 1024) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x32HT, + ::testing::ValuesIn(kArrayHt32x32Param_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +const Ht32x32Param kArrayHt32x32Param_avx2[] = { + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 0, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 1, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 2, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 3, AOM_BITS_8, 1024), +#if CONFIG_EXT_TX + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 4, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 5, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 6, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 7, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 8, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 9, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 10, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 11, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 12, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 13, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 14, AOM_BITS_8, 1024), + make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 15, AOM_BITS_8, 1024) +#endif // CONFIG_EXT_TX +}; +INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans32x32HT, + ::testing::ValuesIn(kArrayHt32x32Param_avx2)); +#endif // HAVE_AVX2 +} // namespace diff --git a/third_party/aom/test/filterintra_predictors_test.cc b/third_party/aom/test/filterintra_predictors_test.cc new file mode 100644 index 000000000..5c6b56d14 --- /dev/null +++ b/third_party/aom/test/filterintra_predictors_test.cc @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/enums.h" + +namespace { + +using std::tr1::tuple; +using libaom_test::ACMRandom; + +typedef void (*Predictor)(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left); + +// Note: +// Test parameter list: +// Reference predictor, optimized predictor, prediction mode, block size +// +typedef tuple<Predictor, Predictor, int> PredFuncMode; +typedef tuple<PredFuncMode, int> PredParams; + +#if CONFIG_HIGHBITDEPTH +typedef void (*HbdPredictor)(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, + int bd); + +// Note: +// Test parameter list: +// Reference predictor, optimized predictor, prediction mode, block size, +// bit depth +// +typedef tuple<HbdPredictor, HbdPredictor, int> HbdPredFuncMode; +typedef tuple<HbdPredFuncMode, int, int> HbdPredParams; +#endif + +const int MaxBlkSize = 32; + +// By default, disable speed test +#define PREDICTORS_SPEED_TEST (0) + +#if PREDICTORS_SPEED_TEST +const int MaxTestNum = 100000; +#else +const int MaxTestNum = 100; +#endif + +class AV1FilterIntraPredOptimzTest + : public ::testing::TestWithParam<PredParams> { + public: + virtual ~AV1FilterIntraPredOptimzTest() {} + virtual void SetUp() { + PredFuncMode funcMode = GET_PARAM(0); + predFuncRef_ = std::tr1::get<0>(funcMode); + predFunc_ = std::tr1::get<1>(funcMode); + mode_ = std::tr1::get<2>(funcMode); + blockSize_ = GET_PARAM(1); + + alloc_ = new uint8_t[3 * MaxBlkSize + 2]; + predRef_ = new uint8_t[MaxBlkSize * MaxBlkSize]; + pred_ = new uint8_t[MaxBlkSize * MaxBlkSize]; + } + + virtual void TearDown() { + delete[] alloc_; + delete[] predRef_; + delete[] pred_; + libaom_test::ClearSystemState(); + } + + protected: + void RunTest() const { + int tstIndex = 0; + int stride = blockSize_; + uint8_t *left = alloc_; + uint8_t *above = alloc_ + MaxBlkSize + 1; + while (tstIndex < MaxTestNum) { + PrepareBuffer(); + predFuncRef_(predRef_, stride, blockSize_, &above[1], left); + ASM_REGISTER_STATE_CHECK( + predFunc_(pred_, stride, blockSize_, &above[1], left)); + DiffPred(tstIndex); + tstIndex += 1; + } + } + + void RunSpeedTestC() const { + int tstIndex = 0; + int stride = blockSize_; + uint8_t *left = alloc_; + uint8_t *above = alloc_ + MaxBlkSize + 1; + PrepareBuffer(); + while (tstIndex < MaxTestNum) { + predFuncRef_(predRef_, stride, blockSize_, &above[1], left); + tstIndex += 1; + } + } + + void RunSpeedTestSSE() const { + int tstIndex = 0; + int stride = blockSize_; + uint8_t *left = alloc_; + uint8_t *above = alloc_ + MaxBlkSize + 1; + PrepareBuffer(); + while (tstIndex < MaxTestNum) { + predFunc_(predRef_, stride, blockSize_, &above[1], left); + tstIndex += 1; + } + } + + private: + void PrepareBuffer() const { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int i = 0; + while (i < (3 * MaxBlkSize + 2)) { + alloc_[i] = rnd.Rand8(); + i += 1; + } + } + + void DiffPred(int testNum) const { + int i = 0; + while (i < blockSize_ * blockSize_) { + EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " " + << "Block size: " << blockSize_ << " " + << "Test number: " << testNum; + i += 1; + } + } + + Predictor predFunc_; + Predictor predFuncRef_; + int mode_; + int blockSize_; + uint8_t *alloc_; + uint8_t *pred_; + uint8_t *predRef_; +}; + +#if CONFIG_HIGHBITDEPTH +class AV1HbdFilterIntraPredOptimzTest + : public ::testing::TestWithParam<HbdPredParams> { + public: + virtual ~AV1HbdFilterIntraPredOptimzTest() {} + virtual void SetUp() { + HbdPredFuncMode funcMode = GET_PARAM(0); + predFuncRef_ = std::tr1::get<0>(funcMode); + predFunc_ = std::tr1::get<1>(funcMode); + mode_ = std::tr1::get<2>(funcMode); + blockSize_ = GET_PARAM(1); + bd_ = GET_PARAM(2); + + alloc_ = new uint16_t[3 * MaxBlkSize + 2]; + predRef_ = new uint16_t[MaxBlkSize * MaxBlkSize]; + pred_ = new uint16_t[MaxBlkSize * MaxBlkSize]; + } + + virtual void TearDown() { + delete[] alloc_; + delete[] predRef_; + delete[] pred_; + libaom_test::ClearSystemState(); + } + + protected: + void RunTest() const { + int tstIndex = 0; + int stride = blockSize_; + uint16_t *left = alloc_; + uint16_t *above = alloc_ + MaxBlkSize + 1; + while (tstIndex < MaxTestNum) { + PrepareBuffer(); + predFuncRef_(predRef_, stride, blockSize_, &above[1], left, bd_); + ASM_REGISTER_STATE_CHECK( + predFunc_(pred_, stride, blockSize_, &above[1], left, bd_)); + DiffPred(tstIndex); + tstIndex += 1; + } + } + + void RunSpeedTestC() const { + int tstIndex = 0; + int stride = blockSize_; + uint16_t *left = alloc_; + uint16_t *above = alloc_ + MaxBlkSize + 1; + PrepareBuffer(); + while (tstIndex < MaxTestNum) { + predFuncRef_(predRef_, stride, blockSize_, &above[1], left, bd_); + tstIndex += 1; + } + } + + void RunSpeedTestSSE() const { + int tstIndex = 0; + int stride = blockSize_; + uint16_t *left = alloc_; + uint16_t *above = alloc_ + MaxBlkSize + 1; + PrepareBuffer(); + while (tstIndex < MaxTestNum) { + predFunc_(predRef_, stride, blockSize_, &above[1], left, bd_); + tstIndex += 1; + } + } + + private: + void PrepareBuffer() const { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int i = 0; + while (i < (3 * MaxBlkSize + 2)) { + alloc_[i] = rnd.Rand16() & ((1 << bd_) - 1); + i += 1; + } + } + + void DiffPred(int testNum) const { + int i = 0; + while (i < blockSize_ * blockSize_) { + EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " " + << "Block size: " << blockSize_ << " " + << "Bit depth: " << bd_ << " " + << "Test number: " << testNum; + i += 1; + } + } + + HbdPredictor predFunc_; + HbdPredictor predFuncRef_; + int mode_; + int blockSize_; + int bd_; + uint16_t *alloc_; + uint16_t *pred_; + uint16_t *predRef_; +}; +#endif // CONFIG_HIGHBITDEPTH + +TEST_P(AV1FilterIntraPredOptimzTest, BitExactCheck) { RunTest(); } + +#if PREDICTORS_SPEED_TEST +TEST_P(AV1FilterIntraPredOptimzTest, SpeedCheckC) { RunSpeedTestC(); } + +TEST_P(AV1FilterIntraPredOptimzTest, SpeedCheckSSE) { RunSpeedTestSSE(); } +#endif + +#if CONFIG_HIGHBITDEPTH +TEST_P(AV1HbdFilterIntraPredOptimzTest, BitExactCheck) { RunTest(); } + +#if PREDICTORS_SPEED_TEST +TEST_P(AV1HbdFilterIntraPredOptimzTest, SpeedCheckC) { RunSpeedTestC(); } + +TEST_P(AV1HbdFilterIntraPredOptimzTest, SpeedCheckSSE) { RunSpeedTestSSE(); } +#endif // PREDICTORS_SPEED_TEST +#endif // CONFIG_HIGHBITDEPTH + +using std::tr1::make_tuple; + +const PredFuncMode kPredFuncMdArray[] = { + make_tuple(av1_dc_filter_predictor_c, av1_dc_filter_predictor_sse4_1, + DC_PRED), + make_tuple(av1_v_filter_predictor_c, av1_v_filter_predictor_sse4_1, V_PRED), + make_tuple(av1_h_filter_predictor_c, av1_h_filter_predictor_sse4_1, H_PRED), + make_tuple(av1_d45_filter_predictor_c, av1_d45_filter_predictor_sse4_1, + D45_PRED), + make_tuple(av1_d135_filter_predictor_c, av1_d135_filter_predictor_sse4_1, + D135_PRED), + make_tuple(av1_d117_filter_predictor_c, av1_d117_filter_predictor_sse4_1, + D117_PRED), + make_tuple(av1_d153_filter_predictor_c, av1_d153_filter_predictor_sse4_1, + D153_PRED), + make_tuple(av1_d207_filter_predictor_c, av1_d207_filter_predictor_sse4_1, + D207_PRED), + make_tuple(av1_d63_filter_predictor_c, av1_d63_filter_predictor_sse4_1, + D63_PRED), + make_tuple(av1_tm_filter_predictor_c, av1_tm_filter_predictor_sse4_1, + TM_PRED), +}; + +const int kBlkSize[] = { 4, 8, 16, 32 }; + +INSTANTIATE_TEST_CASE_P( + SSE4_1, AV1FilterIntraPredOptimzTest, + ::testing::Combine(::testing::ValuesIn(kPredFuncMdArray), + ::testing::ValuesIn(kBlkSize))); + +#if CONFIG_HIGHBITDEPTH +const HbdPredFuncMode kHbdPredFuncMdArray[] = { + make_tuple(av1_highbd_dc_filter_predictor_c, + av1_highbd_dc_filter_predictor_sse4_1, DC_PRED), + make_tuple(av1_highbd_v_filter_predictor_c, + av1_highbd_v_filter_predictor_sse4_1, V_PRED), + make_tuple(av1_highbd_h_filter_predictor_c, + av1_highbd_h_filter_predictor_sse4_1, H_PRED), + make_tuple(av1_highbd_d45_filter_predictor_c, + av1_highbd_d45_filter_predictor_sse4_1, D45_PRED), + make_tuple(av1_highbd_d135_filter_predictor_c, + av1_highbd_d135_filter_predictor_sse4_1, D135_PRED), + make_tuple(av1_highbd_d117_filter_predictor_c, + av1_highbd_d117_filter_predictor_sse4_1, D117_PRED), + make_tuple(av1_highbd_d153_filter_predictor_c, + av1_highbd_d153_filter_predictor_sse4_1, D153_PRED), + make_tuple(av1_highbd_d207_filter_predictor_c, + av1_highbd_d207_filter_predictor_sse4_1, D207_PRED), + make_tuple(av1_highbd_d63_filter_predictor_c, + av1_highbd_d63_filter_predictor_sse4_1, D63_PRED), + make_tuple(av1_highbd_tm_filter_predictor_c, + av1_highbd_tm_filter_predictor_sse4_1, TM_PRED), +}; + +const int kBd[] = { 10, 12 }; + +INSTANTIATE_TEST_CASE_P( + SSE4_1, AV1HbdFilterIntraPredOptimzTest, + ::testing::Combine(::testing::ValuesIn(kHbdPredFuncMdArray), + ::testing::ValuesIn(kBlkSize), + ::testing::ValuesIn(kBd))); +#endif // CONFIG_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/frame_size_tests.cc b/third_party/aom/test/frame_size_tests.cc new file mode 100644 index 000000000..73cc9c075 --- /dev/null +++ b/third_party/aom/test/frame_size_tests.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/video_source.h" + +namespace { + +class AV1FrameSizeTests : public ::libaom_test::EncoderTest, + public ::testing::Test { + protected: + AV1FrameSizeTests() + : EncoderTest(&::libaom_test::kAV1), expected_res_(AOM_CODEC_OK) {} + virtual ~AV1FrameSizeTests() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(::libaom_test::kRealTime); + } + + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) { + EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError(); + return !::testing::Test::HasFailure(); + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AOME_SET_CPUUSED, 7); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + int expected_res_; +}; + +TEST_F(AV1FrameSizeTests, TestInvalidSizes) { + ::libaom_test::RandomVideoSource video; + +#if CONFIG_SIZE_LIMIT + video.SetSize(DECODE_WIDTH_LIMIT + 16, DECODE_HEIGHT_LIMIT + 16); + video.set_limit(2); + expected_res_ = AOM_CODEC_CORRUPT_FRAME; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#endif +} + +TEST_F(AV1FrameSizeTests, LargeValidSizes) { + ::libaom_test::RandomVideoSource video; + +#if CONFIG_SIZE_LIMIT + video.SetSize(DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); + video.set_limit(2); + expected_res_ = AOM_CODEC_OK; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#else + // This test produces a pretty large single frame allocation, (roughly + // 25 megabits). The encoder allocates a good number of these frames + // one for each lag in frames (for 2 pass), and then one for each possible + // reference buffer (8) - we can end up with up to 30 buffers of roughly this + // size or almost 1 gig of memory. + // In total the allocations will exceed 2GiB which may cause a failure with + // non-64 bit platforms, use a smaller size in that case. + if (sizeof(void *) < 8) + video.SetSize(2560, 1440); + else + video.SetSize(4096, 4096); + + video.set_limit(2); + expected_res_ = AOM_CODEC_OK; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#endif +} + +TEST_F(AV1FrameSizeTests, OneByOneVideo) { + ::libaom_test::RandomVideoSource video; + + video.SetSize(1, 1); + video.set_limit(2); + expected_res_ = AOM_CODEC_OK; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +#undef ONE_BY_ONE_VIDEO_NAME +} // namespace diff --git a/third_party/aom/test/function_equivalence_test.h b/third_party/aom/test/function_equivalence_test.h new file mode 100644 index 000000000..4b22c74a2 --- /dev/null +++ b/third_party/aom/test/function_equivalence_test.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TEST_FUNCTION_EQUIVALENCE_TEST_H_ +#define TEST_FUNCTION_EQUIVALENCE_TEST_H_ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace libaom_test { +// Base class for tests that compare 2 implementations of the same function +// for equivalence. The template parameter should be pointer to a function +// that is being tested. +// +// The test takes a 3-parameters encapsulating struct 'FuncParam', containing: +// - Pointer to reference function +// - Pointer to tested function +// - Integer bit depth (default to 0). +// +// These values are then accessible in the tests as member of params_: +// params_.ref_func, params_.tst_func, and params_.bit_depth. +// + +template <typename T> +struct FuncParam { + FuncParam(T ref = NULL, T tst = NULL, int bit_depth = 0) + : ref_func(ref), tst_func(tst), bit_depth(bit_depth) {} + T ref_func; + T tst_func; + int bit_depth; +}; + +template <typename T> +std::ostream &operator<<(std::ostream &os, const FuncParam<T> &p) { + return os << "bit_depth:" << p.bit_depth + << " function:" << reinterpret_cast<const void *>(p.ref_func) + << " function:" << reinterpret_cast<const void *>(p.tst_func); +} + +template <typename T> +class FunctionEquivalenceTest : public ::testing::TestWithParam<FuncParam<T> > { + public: + FunctionEquivalenceTest() : rng_(ACMRandom::DeterministicSeed()) {} + + virtual ~FunctionEquivalenceTest() {} + + virtual void SetUp() { params_ = this->GetParam(); } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + ACMRandom rng_; + FuncParam<T> params_; +}; + +} // namespace libaom_test +#endif // TEST_FUNCTION_EQUIVALENCE_TEST_H_ diff --git a/third_party/aom/test/hadamard_test.cc b/third_party/aom/test/hadamard_test.cc new file mode 100644 index 000000000..db5cb7474 --- /dev/null +++ b/third_party/aom/test/hadamard_test.cc @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <algorithm> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" + +namespace { + +using ::libaom_test::ACMRandom; + +typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b); + +void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) { + int16_t b[8]; + for (int i = 0; i < 8; i += 2) { + b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride]; + b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride]; + } + int16_t c[8]; + for (int i = 0; i < 8; i += 4) { + c[i + 0] = b[i + 0] + b[i + 2]; + c[i + 1] = b[i + 1] + b[i + 3]; + c[i + 2] = b[i + 0] - b[i + 2]; + c[i + 3] = b[i + 1] - b[i + 3]; + } + out[0] = c[0] + c[4]; + out[7] = c[1] + c[5]; + out[3] = c[2] + c[6]; + out[4] = c[3] + c[7]; + out[2] = c[0] - c[4]; + out[6] = c[1] - c[5]; + out[1] = c[2] - c[6]; + out[5] = c[3] - c[7]; +} + +void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) { + int16_t buf[64]; + for (int i = 0; i < 8; ++i) { + hadamard_loop(a + i, a_stride, buf + i * 8); + } + + for (int i = 0; i < 8; ++i) { + hadamard_loop(buf + i, 8, b + i * 8); + } +} + +void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) { + /* The source is a 16x16 block. The destination is rearranged to 8x32. + * Input is 9 bit. */ + reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0); + reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64); + reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128); + reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192); + + /* Overlay the 8x8 blocks and combine. */ + for (int i = 0; i < 64; ++i) { + /* 8x8 steps the range up to 15 bits. */ + const int16_t a0 = b[0]; + const int16_t a1 = b[64]; + const int16_t a2 = b[128]; + const int16_t a3 = b[192]; + + /* Prevent the result from escaping int16_t. */ + const int16_t b0 = (a0 + a1) >> 1; + const int16_t b1 = (a0 - a1) >> 1; + const int16_t b2 = (a2 + a3) >> 1; + const int16_t b3 = (a2 - a3) >> 1; + + /* Store a 16 bit value. */ + b[0] = b0 + b2; + b[64] = b1 + b3; + b[128] = b0 - b2; + b[192] = b1 - b3; + + ++b; + } +} + +class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> { + public: + virtual void SetUp() { + h_func_ = GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + protected: + HadamardFunc h_func_; + ACMRandom rnd_; +}; + +class Hadamard8x8Test : public HadamardTestBase {}; + +TEST_P(Hadamard8x8Test, CompareReferenceRandom) { + DECLARE_ALIGNED(16, int16_t, a[64]); + DECLARE_ALIGNED(16, int16_t, b[64]); + int16_t b_ref[64]; + for (int i = 0; i < 64; ++i) { + a[i] = rnd_.Rand9Signed(); + } + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard8x8(a, 8, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 64); + std::sort(b_ref, b_ref + 64); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); +} + +TEST_P(Hadamard8x8Test, VaryStride) { + DECLARE_ALIGNED(16, int16_t, a[64 * 8]); + DECLARE_ALIGNED(16, int16_t, b[64]); + int16_t b_ref[64]; + for (int i = 0; i < 64 * 8; ++i) { + a[i] = rnd_.Rand9Signed(); + } + + for (int i = 8; i < 64; i += 8) { + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard8x8(a, i, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 64); + std::sort(b_ref, b_ref + 64); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); + } +} + +INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test, + ::testing::Values(&aom_hadamard_8x8_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test, + ::testing::Values(&aom_hadamard_8x8_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 && ARCH_X86_64 +INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test, + ::testing::Values(&aom_hadamard_8x8_ssse3)); +#endif // HAVE_SSSE3 && ARCH_X86_64 + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test, + ::testing::Values(&aom_hadamard_8x8_neon)); +#endif // HAVE_NEON + +class Hadamard16x16Test : public HadamardTestBase {}; + +TEST_P(Hadamard16x16Test, CompareReferenceRandom) { + DECLARE_ALIGNED(16, int16_t, a[16 * 16]); + DECLARE_ALIGNED(16, int16_t, b[16 * 16]); + int16_t b_ref[16 * 16]; + for (int i = 0; i < 16 * 16; ++i) { + a[i] = rnd_.Rand9Signed(); + } + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard16x16(a, 16, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 16 * 16); + std::sort(b_ref, b_ref + 16 * 16); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); +} + +TEST_P(Hadamard16x16Test, VaryStride) { + DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]); + DECLARE_ALIGNED(16, int16_t, b[16 * 16]); + int16_t b_ref[16 * 16]; + for (int i = 0; i < 16 * 16 * 8; ++i) { + a[i] = rnd_.Rand9Signed(); + } + + for (int i = 8; i < 64; i += 8) { + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard16x16(a, i, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 16 * 16); + std::sort(b_ref, b_ref + 16 * 16); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); + } +} + +INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test, + ::testing::Values(&aom_hadamard_16x16_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test, + ::testing::Values(&aom_hadamard_16x16_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test, + ::testing::Values(&aom_hadamard_16x16_neon)); +#endif // HAVE_NEON +} // namespace diff --git a/third_party/aom/test/hbd_metrics_test.cc b/third_party/aom/test/hbd_metrics_test.cc new file mode 100644 index 000000000..4def53b21 --- /dev/null +++ b/third_party/aom/test/hbd_metrics_test.cc @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <new> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "./aom_config.h" +#include "aom_dsp/psnr.h" +#include "aom_dsp/ssim.h" +#include "aom_ports/mem.h" +#include "aom_ports/msvc.h" +#include "aom_scale/yv12config.h" + +using libaom_test::ACMRandom; + +namespace { + +typedef double (*LBDMetricFunc)(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest); +typedef double (*HBDMetricFunc)(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd); + +double compute_hbd_psnr(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd) { + PSNR_STATS psnr; + aom_calc_highbd_psnr(source, dest, &psnr, bd, in_bd); + return psnr.psnr[0]; +} + +double compute_psnr(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + PSNR_STATS psnr; + aom_calc_psnr(source, dest, &psnr); + return psnr.psnr[0]; +} + +double compute_hbd_psnrhvs(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd) { + double tempy, tempu, tempv; + return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, bd, in_bd); +} + +double compute_psnrhvs(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + double tempy, tempu, tempv; + return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, 8, 8); +} + +double compute_hbd_fastssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd) { + double tempy, tempu, tempv; + return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, bd, in_bd); +} + +double compute_fastssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + double tempy, tempu, tempv; + return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, 8, 8); +} + +double compute_hbd_aomssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd) { + double ssim, weight; + ssim = aom_highbd_calc_ssim(source, dest, &weight, bd, in_bd); + return 100 * pow(ssim / weight, 8.0); +} + +double compute_aomssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + double ssim, weight; + ssim = aom_calc_ssim(source, dest, &weight); + return 100 * pow(ssim / weight, 8.0); +} + +class HBDMetricsTestBase { + public: + virtual ~HBDMetricsTestBase() {} + + protected: + void RunAccuracyCheck() { + const int width = 1920; + const int height = 1080; + size_t i = 0; + const uint8_t kPixFiller = 128; + YV12_BUFFER_CONFIG lbd_src, lbd_dst; + YV12_BUFFER_CONFIG hbd_src, hbd_dst; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + double lbd_db, hbd_db; + + memset(&lbd_src, 0, sizeof(lbd_src)); + memset(&lbd_dst, 0, sizeof(lbd_dst)); + memset(&hbd_src, 0, sizeof(hbd_src)); + memset(&hbd_dst, 0, sizeof(hbd_dst)); + + aom_alloc_frame_buffer(&lbd_src, width, height, 1, 1, 0, 32, 16); + aom_alloc_frame_buffer(&lbd_dst, width, height, 1, 1, 0, 32, 16); + aom_alloc_frame_buffer(&hbd_src, width, height, 1, 1, 1, 32, 16); + aom_alloc_frame_buffer(&hbd_dst, width, height, 1, 1, 1, 32, 16); + + memset(lbd_src.buffer_alloc, kPixFiller, lbd_src.buffer_alloc_sz); + while (i < lbd_src.buffer_alloc_sz) { + uint16_t spel, dpel; + spel = lbd_src.buffer_alloc[i]; + // Create some distortion for dst buffer. + dpel = rnd.Rand8(); + lbd_dst.buffer_alloc[i] = (uint8_t)dpel; + ((uint16_t *)(hbd_src.buffer_alloc))[i] = spel << (bit_depth_ - 8); + ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8); + i++; + } + + lbd_db = lbd_metric_(&lbd_src, &lbd_dst); + hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_); + EXPECT_LE(fabs(lbd_db - hbd_db), threshold_); + + i = 0; + while (i < lbd_src.buffer_alloc_sz) { + uint16_t dpel; + // Create some small distortion for dst buffer. + dpel = 120 + (rnd.Rand8() >> 4); + lbd_dst.buffer_alloc[i] = (uint8_t)dpel; + ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8); + i++; + } + + lbd_db = lbd_metric_(&lbd_src, &lbd_dst); + hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_); + EXPECT_LE(fabs(lbd_db - hbd_db), threshold_); + + i = 0; + while (i < lbd_src.buffer_alloc_sz) { + uint16_t dpel; + // Create some small distortion for dst buffer. + dpel = 126 + (rnd.Rand8() >> 6); + lbd_dst.buffer_alloc[i] = (uint8_t)dpel; + ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8); + i++; + } + + lbd_db = lbd_metric_(&lbd_src, &lbd_dst); + hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_); + EXPECT_LE(fabs(lbd_db - hbd_db), threshold_); + + aom_free_frame_buffer(&lbd_src); + aom_free_frame_buffer(&lbd_dst); + aom_free_frame_buffer(&hbd_src); + aom_free_frame_buffer(&hbd_dst); + } + + int input_bit_depth_; + int bit_depth_; + double threshold_; + LBDMetricFunc lbd_metric_; + HBDMetricFunc hbd_metric_; +}; + +typedef std::tr1::tuple<LBDMetricFunc, HBDMetricFunc, int, int, double> + MetricTestTParam; +class HBDMetricsTest : public HBDMetricsTestBase, + public ::testing::TestWithParam<MetricTestTParam> { + public: + virtual void SetUp() { + lbd_metric_ = GET_PARAM(0); + hbd_metric_ = GET_PARAM(1); + input_bit_depth_ = GET_PARAM(2); + bit_depth_ = GET_PARAM(3); + threshold_ = GET_PARAM(4); + } + virtual void TearDown() {} +}; + +TEST_P(HBDMetricsTest, RunAccuracyCheck) { RunAccuracyCheck(); } + +// Allow small variation due to floating point operations. +static const double kSsim_thresh = 0.001; +// Allow some additional errors accumulated in floating point operations. +static const double kFSsim_thresh = 0.03; +// Allow some extra variation due to rounding error accumulated in dct. +static const double kPhvs_thresh = 0.3; + +INSTANTIATE_TEST_CASE_P( + AOMSSIM, HBDMetricsTest, + ::testing::Values(MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim, + 8, 10, kSsim_thresh), + MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim, + 10, 10, kPhvs_thresh), + MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim, + 8, 12, kSsim_thresh), + MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim, + 12, 12, kPhvs_thresh))); +INSTANTIATE_TEST_CASE_P( + FASTSSIM, HBDMetricsTest, + ::testing::Values(MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, + 8, 10, kFSsim_thresh), + MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, + 10, 10, kFSsim_thresh), + MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, + 8, 12, kFSsim_thresh), + MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, + 12, 12, kFSsim_thresh))); +INSTANTIATE_TEST_CASE_P( + PSNRHVS, HBDMetricsTest, + ::testing::Values(MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, + 8, 10, kPhvs_thresh), + MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, + 10, 10, kPhvs_thresh), + MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, + 8, 12, kPhvs_thresh), + MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, + 12, 12, kPhvs_thresh))); +INSTANTIATE_TEST_CASE_P( + PSNR, HBDMetricsTest, + ::testing::Values( + MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 10, kPhvs_thresh), + MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 10, 10, + kPhvs_thresh), + MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 12, kPhvs_thresh), + MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 12, 12, + kPhvs_thresh))); +} // namespace diff --git a/third_party/aom/test/i420_video_source.h b/third_party/aom/test/i420_video_source.h new file mode 100644 index 000000000..0825296d7 --- /dev/null +++ b/third_party/aom/test/i420_video_source.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_I420_VIDEO_SOURCE_H_ +#define TEST_I420_VIDEO_SOURCE_H_ +#include <cstdio> +#include <cstdlib> +#include <string> + +#include "test/yuv_video_source.h" + +namespace libaom_test { + +// This class extends VideoSource to allow parsing of raw yv12 +// so that we can do actual file encodes. +class I420VideoSource : public YUVVideoSource { + public: + I420VideoSource(const std::string &file_name, unsigned int width, + unsigned int height, int rate_numerator, int rate_denominator, + unsigned int start, int limit) + : YUVVideoSource(file_name, AOM_IMG_FMT_I420, width, height, + rate_numerator, rate_denominator, start, limit) {} +}; + +} // namespace libaom_test + +#endif // TEST_I420_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/idct8x8_test.cc b/third_party/aom/test/idct8x8_test.cc new file mode 100644 index 000000000..f99a4075f --- /dev/null +++ b/third_party/aom/test/idct8x8_test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "aom/aom_integer.h" +#include "aom_ports/msvc.h" // for round() + +using libaom_test::ACMRandom; + +namespace { + +void reference_dct_1d(double input[8], double output[8]) { + const double kPi = 3.141592653589793238462643383279502884; + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < 8; k++) { + output[k] = 0.0; + for (int n = 0; n < 8; n++) + output[k] += input[n] * cos(kPi * (2 * n + 1) * k / 16.0); + if (k == 0) output[k] = output[k] * kInvSqrt2; + } +} + +void reference_dct_2d(int16_t input[64], double output[64]) { + // First transform columns + for (int i = 0; i < 8; ++i) { + double temp_in[8], temp_out[8]; + for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i]; + reference_dct_1d(temp_in, temp_out); + for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j]; + } + // Then transform rows + for (int i = 0; i < 8; ++i) { + double temp_in[8], temp_out[8]; + for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8]; + reference_dct_1d(temp_in, temp_out); + for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j]; + } + // Scale by some magic number + for (int i = 0; i < 64; ++i) output[i] *= 2; +} + +TEST(AV1Idct8x8Test, AccuracyCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 10000; + for (int i = 0; i < count_test_block; ++i) { + int16_t input[64]; + tran_low_t coeff[64]; + double output_r[64]; + uint8_t dst[64], src[64]; + + for (int j = 0; j < 64; ++j) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + } + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 64; ++j) input[j] = src[j] - dst[j]; + + reference_dct_2d(input, output_r); + for (int j = 0; j < 64; ++j) + coeff[j] = static_cast<tran_low_t>(round(output_r[j])); + aom_idct8x8_64_add_c(coeff, dst, 8); + for (int j = 0; j < 64; ++j) { + const int diff = dst[j] - src[j]; + const int error = diff * diff; + EXPECT_GE(1, error) << "Error: 8x8 FDCT/IDCT has error " << error + << " at index " << j; + } + } +} + +} // namespace diff --git a/third_party/aom/test/idct_test.cc b/third_party/aom/test/idct_test.cc new file mode 100644 index 000000000..a880a9182 --- /dev/null +++ b/third_party/aom/test/idct_test.cc @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "./aom_config.h" +#include "./aom_rtcd.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "aom/aom_integer.h" + +typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr, + int pred_stride, unsigned char *dst_ptr, + int dst_stride); +namespace { +class IDCTTest : public ::testing::TestWithParam<IdctFunc> { + protected: + virtual void SetUp() { + int i; + + UUT = GetParam(); + memset(input, 0, sizeof(input)); + /* Set up guard blocks */ + for (i = 0; i < 256; i++) output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + IdctFunc UUT; + int16_t input[16]; + unsigned char output[256]; + unsigned char predict[256]; +}; + +TEST_P(IDCTTest, TestGuardBlocks) { + int i; + + for (i = 0; i < 256; i++) + if ((i & 0xF) < 4 && i < 64) + EXPECT_EQ(0, output[i]) << i; + else + EXPECT_EQ(255, output[i]); +} + +TEST_P(IDCTTest, TestAllZeros) { + int i; + + ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); + + for (i = 0; i < 256; i++) + if ((i & 0xF) < 4 && i < 64) + EXPECT_EQ(0, output[i]) << "i==" << i; + else + EXPECT_EQ(255, output[i]) << "i==" << i; +} + +TEST_P(IDCTTest, TestAllOnes) { + int i; + + input[0] = 4; + ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); + + for (i = 0; i < 256; i++) + if ((i & 0xF) < 4 && i < 64) + EXPECT_EQ(1, output[i]) << "i==" << i; + else + EXPECT_EQ(255, output[i]) << "i==" << i; +} + +TEST_P(IDCTTest, TestAddOne) { + int i; + + for (i = 0; i < 256; i++) predict[i] = i; + input[0] = 4; + ASM_REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16)); + + for (i = 0; i < 256; i++) + if ((i & 0xF) < 4 && i < 64) + EXPECT_EQ(i + 1, output[i]) << "i==" << i; + else + EXPECT_EQ(255, output[i]) << "i==" << i; +} + +TEST_P(IDCTTest, TestWithData) { + int i; + + for (i = 0; i < 16; i++) input[i] = i; + + ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); + + for (i = 0; i < 256; i++) + if ((i & 0xF) > 3 || i > 63) + EXPECT_EQ(255, output[i]) << "i==" << i; + else if (i == 0) + EXPECT_EQ(11, output[i]) << "i==" << i; + else if (i == 34) + EXPECT_EQ(1, output[i]) << "i==" << i; + else if (i == 2 || i == 17 || i == 32) + EXPECT_EQ(3, output[i]) << "i==" << i; + else + EXPECT_EQ(0, output[i]) << "i==" << i; +} + +INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(aom_short_idct4x4llm_c)); +#if HAVE_MMX +INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, + ::testing::Values(aom_short_idct4x4llm_mmx)); +#endif +#if HAVE_MSA +INSTANTIATE_TEST_CASE_P(MSA, IDCTTest, + ::testing::Values(aom_short_idct4x4llm_msa)); +#endif +} diff --git a/third_party/aom/test/intrabc_test.cc b/third_party/aom/test/intrabc_test.cc new file mode 100644 index 000000000..84cfa5c48 --- /dev/null +++ b/third_party/aom/test/intrabc_test.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "av1/common/enums.h" +#include "av1/common/mv.h" +#include "av1/common/mvref_common.h" +#include "av1/common/tile_common.h" + +namespace { +TEST(IntrabcTest, DvValidation) { + struct DvTestCase { + MV dv; + int mi_row_offset; + int mi_col_offset; + BLOCK_SIZE bsize; + bool valid; + }; + const int kSubPelScale = 8; + const int kTileMaxMibWidth = 8; + const DvTestCase kDvCases[] = { +#if CONFIG_EXT_PARTITION + { { 0, 0 }, 0, 0, BLOCK_128X128, false }, +#endif + { { 0, 0 }, 0, 0, BLOCK_64X64, false }, + { { 0, 0 }, 0, 0, BLOCK_32X32, false }, + { { 0, 0 }, 0, 0, BLOCK_16X16, false }, + { { 0, 0 }, 0, 0, BLOCK_8X8, false }, + { { 0, 0 }, 0, 0, BLOCK_4X4, false }, + { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + true }, + { { 0, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + true }, + { { -MAX_SB_SIZE * kSubPelScale, 0 }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + true }, + { { MAX_SB_SIZE * kSubPelScale, 0 }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + false }, + { { 0, MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + false }, + { { -32 * kSubPelScale, -32 * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + true }, + { { -32 * kSubPelScale, -32 * kSubPelScale }, + 32 / MI_SIZE, + 32 / MI_SIZE, + BLOCK_32X32, + false }, + { { -32 * kSubPelScale - kSubPelScale / 2, -32 * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + false }, + { { -33 * kSubPelScale, -32 * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + true }, + { { -32 * kSubPelScale, -32 * kSubPelScale - kSubPelScale / 2 }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + false }, + { { -32 * kSubPelScale, -33 * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + true }, + { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + true }, + { { -(MAX_SB_SIZE + 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE + 1) * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -(MAX_SB_SIZE - 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + true }, + { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + true }, + { { -(MAX_SB_SIZE - 1) * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -MAX_SB_SIZE * kSubPelScale, MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + true }, + { { -MAX_SB_SIZE * kSubPelScale, + (kTileMaxMibWidth - 2) * MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + true }, + { { -MAX_SB_SIZE * kSubPelScale, + ((kTileMaxMibWidth - 2) * MAX_SB_SIZE + 1) * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + }; + TileInfo tile; + tile.mi_row_start = 8 * MAX_MIB_SIZE; + tile.mi_row_end = 16 * MAX_MIB_SIZE; + tile.mi_col_start = 24 * MAX_MIB_SIZE; + tile.mi_col_end = tile.mi_col_start + kTileMaxMibWidth * MAX_MIB_SIZE; + for (int i = 0; i < static_cast<int>(GTEST_ARRAY_SIZE_(kDvCases)); ++i) { + EXPECT_EQ(kDvCases[i].valid, + is_dv_valid(kDvCases[i].dv, &tile, + tile.mi_row_start + kDvCases[i].mi_row_offset, + tile.mi_col_start + kDvCases[i].mi_col_offset, + kDvCases[i].bsize)) + << "DvCases[" << i << "]"; + } +} +} // namespace diff --git a/third_party/aom/test/intrapred_test.cc b/third_party/aom/test/intrapred_test.cc new file mode 100644 index 000000000..4efed57b6 --- /dev/null +++ b/third_party/aom/test/intrapred_test.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/blockd.h" +#include "av1/common/pred_common.h" +#include "aom_mem/aom_mem.h" + +namespace { + +using libaom_test::ACMRandom; + +const int count_test_block = 100000; + +typedef void (*IntraPred)(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, int bps); + +struct IntraPredFunc { + IntraPredFunc(IntraPred pred = NULL, IntraPred ref = NULL, + int block_size_value = 0, int bit_depth_value = 0) + : pred_fn(pred), ref_fn(ref), block_size(block_size_value), + bit_depth(bit_depth_value) {} + + IntraPred pred_fn; + IntraPred ref_fn; + int block_size; + int bit_depth; +}; + +class AV1IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> { + public: + void RunTest(uint16_t *left_col, uint16_t *above_data, uint16_t *dst, + uint16_t *ref_dst) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int block_size = params_.block_size; + above_row_ = above_data + 16; + left_col_ = left_col; + dst_ = dst; + ref_dst_ = ref_dst; + int error_count = 0; + for (int i = 0; i < count_test_block; ++i) { + // Fill edges with random data, try first with saturated values. + for (int x = -1; x <= block_size * 2; x++) { + if (i == 0) { + above_row_[x] = mask_; + } else { + above_row_[x] = rnd.Rand16() & mask_; + } + } + for (int y = 0; y < block_size; y++) { + if (i == 0) { + left_col_[y] = mask_; + } else { + left_col_[y] = rnd.Rand16() & mask_; + } + } + Predict(); + CheckPrediction(i, &error_count); + } + ASSERT_EQ(0, error_count); + } + + protected: + virtual void SetUp() { + params_ = GetParam(); + stride_ = params_.block_size * 3; + mask_ = (1 << params_.bit_depth) - 1; + } + + void Predict() { + const int bit_depth = params_.bit_depth; + params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth); + ASM_REGISTER_STATE_CHECK( + params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth)); + } + + void CheckPrediction(int test_case_number, int *error_count) const { + // For each pixel ensure that the calculated value is the same as reference. + const int block_size = params_.block_size; + for (int y = 0; y < block_size; y++) { + for (int x = 0; x < block_size; x++) { + *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_]; + if (*error_count == 1) { + ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_]) + << " Failed on Test Case Number " << test_case_number; + } + } + } + } + + uint16_t *above_row_; + uint16_t *left_col_; + uint16_t *dst_; + uint16_t *ref_dst_; + ptrdiff_t stride_; + int mask_; + + IntraPredFunc params_; +}; + +TEST_P(AV1IntraPredTest, IntraPredTests) { + // max block size is 32 + DECLARE_ALIGNED(16, uint16_t, left_col[2 * 32]); + DECLARE_ALIGNED(16, uint16_t, above_data[2 * 32 + 32]); + DECLARE_ALIGNED(16, uint16_t, dst[3 * 32 * 32]); + DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 32 * 32]); + RunTest(left_col, above_data, dst, ref_dst); +} + +#if HAVE_SSE2 +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE2_TO_C_8, AV1IntraPredTest, + ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2, + &aom_highbd_dc_predictor_32x32_c, 32, 8), +#if !CONFIG_ALT_INTRA + IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2, + &aom_highbd_tm_predictor_16x16_c, 16, 8), + IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2, + &aom_highbd_tm_predictor_32x32_c, 32, 8), +#endif // !CONFIG_ALT_INTRA + + IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2, + &aom_highbd_dc_predictor_4x4_c, 4, 8), + IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2, + &aom_highbd_dc_predictor_8x8_c, 8, 8), + IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2, + &aom_highbd_dc_predictor_16x16_c, 16, 8), + IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, + &aom_highbd_v_predictor_4x4_c, 4, 8), + IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, + &aom_highbd_v_predictor_8x8_c, 8, 8), + IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2, + &aom_highbd_v_predictor_16x16_c, 16, 8), + IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2, + &aom_highbd_v_predictor_32x32_c, 32, 8) +#if !CONFIG_ALT_INTRA + , + IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2, + &aom_highbd_tm_predictor_4x4_c, 4, 8), + IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2, + &aom_highbd_tm_predictor_8x8_c, 8, 8) +#endif // !CONFIG_ALT_INTRA + )); + +INSTANTIATE_TEST_CASE_P( + SSE2_TO_C_10, AV1IntraPredTest, + ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2, + &aom_highbd_dc_predictor_32x32_c, 32, 10), +#if !CONFIG_ALT_INTRA + IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2, + &aom_highbd_tm_predictor_16x16_c, 16, 10), + IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2, + &aom_highbd_tm_predictor_32x32_c, 32, 10), +#endif // !CONFIG_ALT_INTRA + IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2, + &aom_highbd_dc_predictor_4x4_c, 4, 10), + IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2, + &aom_highbd_dc_predictor_8x8_c, 8, 10), + IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2, + &aom_highbd_dc_predictor_16x16_c, 16, 10), + IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, + &aom_highbd_v_predictor_4x4_c, 4, 10), + IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, + &aom_highbd_v_predictor_8x8_c, 8, 10), + IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2, + &aom_highbd_v_predictor_16x16_c, 16, 10), + IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2, + &aom_highbd_v_predictor_32x32_c, 32, 10) +#if !CONFIG_ALT_INTRA + , + IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2, + &aom_highbd_tm_predictor_4x4_c, 4, 10), + IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2, + &aom_highbd_tm_predictor_8x8_c, 8, 10) +#endif // !CONFIG_ALT_INTRA + )); + +INSTANTIATE_TEST_CASE_P( + SSE2_TO_C_12, AV1IntraPredTest, + ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2, + &aom_highbd_dc_predictor_32x32_c, 32, 12), +#if !CONFIG_ALT_INTRA + IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2, + &aom_highbd_tm_predictor_16x16_c, 16, 12), + IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2, + &aom_highbd_tm_predictor_32x32_c, 32, 12), +#endif // !CONFIG_ALT_INTRA + IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2, + &aom_highbd_dc_predictor_4x4_c, 4, 12), + IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2, + &aom_highbd_dc_predictor_8x8_c, 8, 12), + IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2, + &aom_highbd_dc_predictor_16x16_c, 16, 12), + IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, + &aom_highbd_v_predictor_4x4_c, 4, 12), + IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, + &aom_highbd_v_predictor_8x8_c, 8, 12), + IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2, + &aom_highbd_v_predictor_16x16_c, 16, 12), + IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2, + &aom_highbd_v_predictor_32x32_c, 32, 12) +#if !CONFIG_ALT_INTRA + , + IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2, + &aom_highbd_tm_predictor_4x4_c, 4, 12), + IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2, + &aom_highbd_tm_predictor_8x8_c, 8, 12) +#endif // !CONFIG_ALT_INTRA + )); + +#endif // CONFIG_HIGHBITDEPTH +#endif // HAVE_SSE2 +} // namespace diff --git a/third_party/aom/test/ivf_video_source.h b/third_party/aom/test/ivf_video_source.h new file mode 100644 index 000000000..0d3e9f9cb --- /dev/null +++ b/third_party/aom/test/ivf_video_source.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_IVF_VIDEO_SOURCE_H_ +#define TEST_IVF_VIDEO_SOURCE_H_ +#include <cstdio> +#include <cstdlib> +#include <new> +#include <string> +#include "test/video_source.h" + +namespace libaom_test { +const unsigned int kCodeBufferSize = 256 * 1024; +const unsigned int kIvfFileHdrSize = 32; +const unsigned int kIvfFrameHdrSize = 12; + +static unsigned int MemGetLe32(const uint8_t *mem) { + return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]); +} + +// This class extends VideoSource to allow parsing of ivf files, +// so that we can do actual file decodes. +class IVFVideoSource : public CompressedVideoSource { + public: + explicit IVFVideoSource(const std::string &file_name) + : file_name_(file_name), input_file_(NULL), compressed_frame_buf_(NULL), + frame_sz_(0), frame_(0), end_of_file_(false) {} + + virtual ~IVFVideoSource() { + delete[] compressed_frame_buf_; + + if (input_file_) fclose(input_file_); + } + + virtual void Init() { + // Allocate a buffer for read in the compressed video frame. + compressed_frame_buf_ = new uint8_t[libaom_test::kCodeBufferSize]; + ASSERT_TRUE(compressed_frame_buf_ != NULL) + << "Allocate frame buffer failed"; + } + + virtual void Begin() { + input_file_ = OpenTestDataFile(file_name_); + ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: " + << file_name_; + + // Read file header + uint8_t file_hdr[kIvfFileHdrSize]; + ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_)) + << "File header read failed."; + // Check file header + ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && + file_hdr[2] == 'I' && file_hdr[3] == 'F') + << "Input is not an IVF file."; + + FillFrame(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + void FillFrame() { + ASSERT_TRUE(input_file_ != NULL); + uint8_t frame_hdr[kIvfFrameHdrSize]; + // Check frame header and read a frame from input_file. + if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) != + kIvfFrameHdrSize) { + end_of_file_ = true; + } else { + end_of_file_ = false; + + frame_sz_ = MemGetLe32(frame_hdr); + ASSERT_LE(frame_sz_, kCodeBufferSize) + << "Frame is too big for allocated code buffer"; + ASSERT_EQ(frame_sz_, + fread(compressed_frame_buf_, 1, frame_sz_, input_file_)) + << "Failed to read complete frame"; + } + } + + virtual const uint8_t *cxdata() const { + return end_of_file_ ? NULL : compressed_frame_buf_; + } + virtual size_t frame_size() const { return frame_sz_; } + virtual unsigned int frame_number() const { return frame_; } + + protected: + std::string file_name_; + FILE *input_file_; + uint8_t *compressed_frame_buf_; + size_t frame_sz_; + unsigned int frame_; + bool end_of_file_; +}; + +} // namespace libaom_test + +#endif // TEST_IVF_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/level_test.cc b/third_party/aom/test/level_test.cc new file mode 100644 index 000000000..1049d4901 --- /dev/null +++ b/third_party/aom/test/level_test.cc @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { +class LevelTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + LevelTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), min_gf_internal_(24), target_level_(0), + level_(0) {} + virtual ~LevelTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 25; + cfg_.rc_end_usage = AOM_VBR; + } else { + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = AOM_CBR; + } + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 400; + cfg_.rc_max_quantizer = 63; + cfg_.rc_min_quantizer = 0; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_SET_TARGET_LEVEL, target_level_); + encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_gf_internal_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + encoder->Control(AV1E_GET_LEVEL, &level_); + ASSERT_LE(level_, 51); + ASSERT_GE(level_, 0); + } + + ::libaom_test::TestMode encoding_mode_; + int cpu_used_; + int min_gf_internal_; + int target_level_; + int level_; +}; + +// Test for keeping level stats only +TEST_P(LevelTest, TestTargetLevel0) { + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 40); + target_level_ = 0; + min_gf_internal_ = 4; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(11, level_); + + cfg_.rc_target_bitrate = 1600; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(20, level_); +} + +// Test for level control being turned off +TEST_P(LevelTest, TestTargetLevel255) { + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 30); + target_level_ = 255; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +TEST_P(LevelTest, TestTargetLevelApi) { + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1); + static const aom_codec_iface_t *codec = &aom_codec_av1_cx_algo; + aom_codec_ctx_t enc; + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(codec, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, codec, &cfg, 0)); + for (int level = 0; level <= 256; ++level) { + if (level == 10 || level == 11 || level == 20 || level == 21 || + level == 30 || level == 31 || level == 40 || level == 41 || + level == 50 || level == 51 || level == 52 || level == 60 || + level == 61 || level == 62 || level == 0 || level == 255) + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_TARGET_LEVEL, level)); + else + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_control(&enc, AV1E_SET_TARGET_LEVEL, level)); + } + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +AV1_INSTANTIATE_TEST_CASE(LevelTest, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Range(0, 9)); +} // namespace diff --git a/third_party/aom/test/lossless_test.cc b/third_party/aom/test/lossless_test.cc new file mode 100644 index 000000000..5c5b32d93 --- /dev/null +++ b/third_party/aom/test/lossless_test.cc @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { + +const int kMaxPsnr = 100; + +class LosslessTestLarge + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> { + protected: + LosslessTestLarge() + : EncoderTest(GET_PARAM(0)), psnr_(kMaxPsnr), nframes_(0), + encoding_mode_(GET_PARAM(1)) {} + + virtual ~LosslessTestLarge() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + // Only call Control if quantizer > 0 to verify that using quantizer + // alone will activate lossless + if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) { + encoder->Control(AV1E_SET_LOSSLESS, 1); + } + } + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + psnr_ = kMaxPsnr; + nframes_ = 0; + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + if (pkt->data.psnr.psnr[0] < psnr_) psnr_ = pkt->data.psnr.psnr[0]; + } + + double GetMinPsnr() const { return psnr_; } + + private: + double psnr_; + unsigned int nframes_; + libaom_test::TestMode encoding_mode_; +}; + +TEST_P(LosslessTestLarge, TestLossLessEncoding) { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 25; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 0; + + init_flags_ = AOM_CODEC_USE_PSNR; + + // intentionally changed the dimension for better testing coverage + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 5); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_lossless = GetMinPsnr(); + EXPECT_GE(psnr_lossless, kMaxPsnr); +} + +TEST_P(LosslessTestLarge, TestLossLessEncoding444) { + libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 5); + + cfg_.g_profile = 1; + cfg_.g_timebase = video.timebase(); + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 25; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 0; + + init_flags_ = AOM_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_lossless = GetMinPsnr(); + EXPECT_GE(psnr_lossless, kMaxPsnr); +} + +TEST_P(LosslessTestLarge, TestLossLessEncodingCtrl) { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 25; + // Intentionally set Q > 0, to make sure control can be used to activate + // lossless + cfg_.rc_min_quantizer = 10; + cfg_.rc_max_quantizer = 20; + + init_flags_ = AOM_CODEC_USE_PSNR; + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 5); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_lossless = GetMinPsnr(); + EXPECT_GE(psnr_lossless, kMaxPsnr); +} + +AV1_INSTANTIATE_TEST_CASE(LosslessTestLarge, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood)); +} // namespace diff --git a/third_party/aom/test/lpf_8_test.cc b/third_party/aom/test/lpf_8_test.cc new file mode 100644 index 000000000..cee0d3b81 --- /dev/null +++ b/third_party/aom/test/lpf_8_test.cc @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <cmath> +#include <cstdlib> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/av1_loopfilter.h" +#include "av1/common/entropy.h" +#include "aom/aom_integer.h" + +using libaom_test::ACMRandom; + +namespace { +// Horizontally and Vertically need 32x32: 8 Coeffs preceeding filtered section +// 16 Coefs within filtered section +// 8 Coeffs following filtered section +const int kNumCoeffs = 1024; + +const int number_of_iterations = 10000; + +#if CONFIG_HIGHBITDEPTH +typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, int bd); +typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1, int bd); +#else +typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh); +typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1); +#endif // CONFIG_HIGHBITDEPTH + +typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t; +typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t; + +class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> { + public: + virtual ~Loop8Test6Param() {} + virtual void SetUp() { + loopfilter_op_ = GET_PARAM(0); + ref_loopfilter_op_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + mask_ = (1 << bit_depth_) - 1; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + int bit_depth_; + int mask_; + loop_op_t loopfilter_op_; + loop_op_t ref_loopfilter_op_; +}; + +class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> { + public: + virtual ~Loop8Test9Param() {} + virtual void SetUp() { + loopfilter_op_ = GET_PARAM(0); + ref_loopfilter_op_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + mask_ = (1 << bit_depth_) - 1; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + int bit_depth_; + int mask_; + dual_loop_op_t loopfilter_op_; + dual_loop_op_t ref_loopfilter_op_; +}; + +TEST_P(Loop8Test6Param, OperationCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = number_of_iterations; +#if CONFIG_HIGHBITDEPTH + int32_t bd = bit_depth_; + DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]); +#else + DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]); + DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]); +#endif // CONFIG_HIGHBITDEPTH + int err_count_total = 0; + int first_failure = -1; + for (int i = 0; i < count_test_block; ++i) { + int err_count = 0; + uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4)); + DECLARE_ALIGNED(16, const uint8_t, + blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER)); + DECLARE_ALIGNED(16, const uint8_t, + limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = rnd.Rand8(); + DECLARE_ALIGNED(16, const uint8_t, + thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + int32_t p = kNumCoeffs / 32; + + uint16_t tmp_s[kNumCoeffs]; + int j = 0; + while (j < kNumCoeffs) { + uint8_t val = rnd.Rand8(); + if (val & 0x80) { // 50% chance to choose a new value. + tmp_s[j] = rnd.Rand16(); + j++; + } else { // 50% chance to repeat previous value in row X times + int k = 0; + while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) { + if (j < 1) { + tmp_s[j] = rnd.Rand16(); + } else if (val & 0x20) { // Increment by an value within the limit + tmp_s[j] = (tmp_s[j - 1] + (*limit - 1)); + } else { // Decrement by an value within the limit + tmp_s[j] = (tmp_s[j - 1] - (*limit - 1)); + } + j++; + } + } + } + for (j = 0; j < kNumCoeffs; j++) { + if (i % 2) { + s[j] = tmp_s[j] & mask_; + } else { + s[j] = tmp_s[p * (j % p) + j / p] & mask_; + } + ref_s[j] = s[j]; + } +#if CONFIG_HIGHBITDEPTH + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd); + ASM_REGISTER_STATE_CHECK( + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd)); +#else + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh); + ASM_REGISTER_STATE_CHECK( + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh)); +#endif // CONFIG_HIGHBITDEPTH + + for (j = 0; j < kNumCoeffs; ++j) { + err_count += ref_s[j] != s[j]; + } + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Loop8Test6Param, C output doesn't match SSE2 " + "loopfilter output. " + << "First failed at test case " << first_failure; +} + +TEST_P(Loop8Test6Param, ValueCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = number_of_iterations; +#if CONFIG_HIGHBITDEPTH + const int32_t bd = bit_depth_; + DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]); +#else + DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]); + DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]); +#endif // CONFIG_HIGHBITDEPTH + int err_count_total = 0; + int first_failure = -1; + + // NOTE: The code in av1_loopfilter.c:update_sharpness computes mblim as a + // function of sharpness_lvl and the loopfilter lvl as: + // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); + // ... + // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), + // SIMD_WIDTH); + // This means that the largest value for mblim will occur when sharpness_lvl + // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER). + // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and + // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) = + // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4 + + for (int i = 0; i < count_test_block; ++i) { + int err_count = 0; + uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4)); + DECLARE_ALIGNED(16, const uint8_t, + blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER)); + DECLARE_ALIGNED(16, const uint8_t, + limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = rnd.Rand8(); + DECLARE_ALIGNED(16, const uint8_t, + thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + int32_t p = kNumCoeffs / 32; + for (int j = 0; j < kNumCoeffs; ++j) { + s[j] = rnd.Rand16() & mask_; + ref_s[j] = s[j]; + } +#if CONFIG_HIGHBITDEPTH + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd); + ASM_REGISTER_STATE_CHECK( + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd)); +#else + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh); + ASM_REGISTER_STATE_CHECK( + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh)); +#endif // CONFIG_HIGHBITDEPTH + for (int j = 0; j < kNumCoeffs; ++j) { + err_count += ref_s[j] != s[j]; + } + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Loop8Test6Param, C output doesn't match SSE2 " + "loopfilter output. " + << "First failed at test case " << first_failure; +} + +TEST_P(Loop8Test9Param, OperationCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = number_of_iterations; +#if CONFIG_HIGHBITDEPTH + const int32_t bd = bit_depth_; + DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]); +#else + DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]); + DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]); +#endif // CONFIG_HIGHBITDEPTH + int err_count_total = 0; + int first_failure = -1; + for (int i = 0; i < count_test_block; ++i) { + int err_count = 0; + uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4)); + DECLARE_ALIGNED(16, const uint8_t, + blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER)); + DECLARE_ALIGNED(16, const uint8_t, + limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = rnd.Rand8(); + DECLARE_ALIGNED(16, const uint8_t, + thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4)); + DECLARE_ALIGNED(16, const uint8_t, + blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER)); + DECLARE_ALIGNED(16, const uint8_t, + limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = rnd.Rand8(); + DECLARE_ALIGNED(16, const uint8_t, + thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + int32_t p = kNumCoeffs / 32; + uint16_t tmp_s[kNumCoeffs]; + int j = 0; + const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1; + while (j < kNumCoeffs) { + uint8_t val = rnd.Rand8(); + if (val & 0x80) { // 50% chance to choose a new value. + tmp_s[j] = rnd.Rand16(); + j++; + } else { // 50% chance to repeat previous value in row X times. + int k = 0; + while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) { + if (j < 1) { + tmp_s[j] = rnd.Rand16(); + } else if (val & 0x20) { // Increment by a value within the limit. + tmp_s[j] = (tmp_s[j - 1] + (limit - 1)); + } else { // Decrement by an value within the limit. + tmp_s[j] = (tmp_s[j - 1] - (limit - 1)); + } + j++; + } + } + } + for (j = 0; j < kNumCoeffs; j++) { + if (i % 2) { + s[j] = tmp_s[j] & mask_; + } else { + s[j] = tmp_s[p * (j % p) + j / p] & mask_; + } + ref_s[j] = s[j]; + } +#if CONFIG_HIGHBITDEPTH + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, + limit1, thresh1, bd); + ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, + thresh0, blimit1, limit1, thresh1, + bd)); +#else + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, + limit1, thresh1); + ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, + thresh0, blimit1, limit1, thresh1)); +#endif // CONFIG_HIGHBITDEPTH + for (j = 0; j < kNumCoeffs; ++j) { + err_count += ref_s[j] != s[j]; + } + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Loop8Test9Param, C output doesn't match SSE2 " + "loopfilter output. " + << "First failed at test case " << first_failure; +} + +TEST_P(Loop8Test9Param, ValueCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = number_of_iterations; +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]); + DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]); +#else + DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]); + DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]); +#endif // CONFIG_HIGHBITDEPTH + int err_count_total = 0; + int first_failure = -1; + for (int i = 0; i < count_test_block; ++i) { + int err_count = 0; + uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4)); + DECLARE_ALIGNED(16, const uint8_t, + blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER)); + DECLARE_ALIGNED(16, const uint8_t, + limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = rnd.Rand8(); + DECLARE_ALIGNED(16, const uint8_t, + thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4)); + DECLARE_ALIGNED(16, const uint8_t, + blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER)); + DECLARE_ALIGNED(16, const uint8_t, + limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + tmp = rnd.Rand8(); + DECLARE_ALIGNED(16, const uint8_t, + thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; + int32_t p = kNumCoeffs / 32; // TODO(pdlf) can we have non-square here? + for (int j = 0; j < kNumCoeffs; ++j) { + s[j] = rnd.Rand16() & mask_; + ref_s[j] = s[j]; + } +#if CONFIG_HIGHBITDEPTH + const int32_t bd = bit_depth_; + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, + limit1, thresh1, bd); + ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, + thresh0, blimit1, limit1, thresh1, + bd)); +#else + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, + limit1, thresh1); + ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, + thresh0, blimit1, limit1, thresh1)); +#endif // CONFIG_HIGHBITDEPTH + for (int j = 0; j < kNumCoeffs; ++j) { + err_count += ref_s[j] != s[j]; + } + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Loop8Test9Param, C output doesn't match SSE2" + "loopfilter output. " + << "First failed at test case " << first_failure; +} + +using std::tr1::make_tuple; + +#if HAVE_SSE2 +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE2, Loop8Test6Param, + ::testing::Values(make_tuple(&aom_highbd_lpf_horizontal_4_sse2, + &aom_highbd_lpf_horizontal_4_c, 8), + make_tuple(&aom_highbd_lpf_vertical_4_sse2, + &aom_highbd_lpf_vertical_4_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_8_sse2, + &aom_highbd_lpf_horizontal_8_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2, + &aom_highbd_lpf_horizontal_edge_8_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2, + &aom_highbd_lpf_horizontal_edge_16_c, 8), + make_tuple(&aom_highbd_lpf_vertical_8_sse2, + &aom_highbd_lpf_vertical_8_c, 8), + make_tuple(&aom_highbd_lpf_vertical_16_sse2, + &aom_highbd_lpf_vertical_16_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_4_sse2, + &aom_highbd_lpf_horizontal_4_c, 10), + make_tuple(&aom_highbd_lpf_vertical_4_sse2, + &aom_highbd_lpf_vertical_4_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_8_sse2, + &aom_highbd_lpf_horizontal_8_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2, + &aom_highbd_lpf_horizontal_edge_8_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2, + &aom_highbd_lpf_horizontal_edge_16_c, 10), + make_tuple(&aom_highbd_lpf_vertical_8_sse2, + &aom_highbd_lpf_vertical_8_c, 10), + make_tuple(&aom_highbd_lpf_vertical_16_sse2, + &aom_highbd_lpf_vertical_16_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_4_sse2, + &aom_highbd_lpf_horizontal_4_c, 12), + make_tuple(&aom_highbd_lpf_vertical_4_sse2, + &aom_highbd_lpf_vertical_4_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_8_sse2, + &aom_highbd_lpf_horizontal_8_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2, + &aom_highbd_lpf_horizontal_edge_8_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2, + &aom_highbd_lpf_horizontal_edge_16_c, 12), + make_tuple(&aom_highbd_lpf_vertical_8_sse2, + &aom_highbd_lpf_vertical_8_c, 12), + make_tuple(&aom_highbd_lpf_vertical_16_sse2, + &aom_highbd_lpf_vertical_16_c, 12), + make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2, + &aom_highbd_lpf_vertical_16_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2, + &aom_highbd_lpf_vertical_16_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2, + &aom_highbd_lpf_vertical_16_dual_c, 12))); +#else +INSTANTIATE_TEST_CASE_P( + SSE2, Loop8Test6Param, + ::testing::Values( + make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8), + make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8), + make_tuple(&aom_lpf_horizontal_edge_8_sse2, + &aom_lpf_horizontal_edge_8_c, 8), + make_tuple(&aom_lpf_horizontal_edge_16_sse2, + &aom_lpf_horizontal_edge_16_c, 8), + make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8), + make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8), + make_tuple(&aom_lpf_vertical_16_sse2, &aom_lpf_vertical_16_c, 8), + make_tuple(&aom_lpf_vertical_16_dual_sse2, &aom_lpf_vertical_16_dual_c, + 8))); +#endif // CONFIG_HIGHBITDEPTH +#endif + +#if HAVE_AVX2 && (!CONFIG_HIGHBITDEPTH) +INSTANTIATE_TEST_CASE_P( + AVX2, Loop8Test6Param, + ::testing::Values(make_tuple(&aom_lpf_horizontal_edge_8_avx2, + &aom_lpf_horizontal_edge_8_c, 8), + make_tuple(&aom_lpf_horizontal_edge_16_avx2, + &aom_lpf_horizontal_edge_16_c, 8))); +#endif + +#if HAVE_SSE2 +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE2, Loop8Test9Param, + ::testing::Values(make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2, + &aom_highbd_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2, + &aom_highbd_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2, + &aom_highbd_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2, + &aom_highbd_lpf_vertical_8_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2, + &aom_highbd_lpf_horizontal_4_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2, + &aom_highbd_lpf_horizontal_8_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2, + &aom_highbd_lpf_vertical_4_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2, + &aom_highbd_lpf_vertical_8_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2, + &aom_highbd_lpf_horizontal_4_dual_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2, + &aom_highbd_lpf_horizontal_8_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2, + &aom_highbd_lpf_vertical_4_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2, + &aom_highbd_lpf_vertical_8_dual_c, 12))); +#else +INSTANTIATE_TEST_CASE_P( + SSE2, Loop8Test9Param, + ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_sse2, + &aom_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_lpf_horizontal_8_dual_sse2, + &aom_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_lpf_vertical_4_dual_sse2, + &aom_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_lpf_vertical_8_dual_sse2, + &aom_lpf_vertical_8_dual_c, 8))); +#endif // CONFIG_HIGHBITDEPTH +#endif + +#if HAVE_NEON +#if CONFIG_HIGHBITDEPTH +// No neon high bitdepth functions. +#else +INSTANTIATE_TEST_CASE_P( + NEON, Loop8Test6Param, + ::testing::Values( +#if HAVE_NEON_ASM + // Using #if inside the macro is unsupported on MSVS but the tests are + // not + // currently built for MSVS with ARM and NEON. + make_tuple(&aom_lpf_horizontal_edge_8_neon, + &aom_lpf_horizontal_edge_8_c, 8), + make_tuple(&aom_lpf_horizontal_edge_16_neon, + &aom_lpf_horizontal_edge_16_c, 8), + make_tuple(&aom_lpf_vertical_16_neon, &aom_lpf_vertical_16_c, 8), + make_tuple(&aom_lpf_vertical_16_dual_neon, &aom_lpf_vertical_16_dual_c, + 8), +#endif // HAVE_NEON_ASM + make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8), + make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8), + make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8), + make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8))); +INSTANTIATE_TEST_CASE_P(NEON, Loop8Test9Param, + ::testing::Values( +#if HAVE_NEON_ASM + make_tuple(&aom_lpf_horizontal_8_dual_neon, + &aom_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_lpf_vertical_8_dual_neon, + &aom_lpf_vertical_8_dual_c, 8), +#endif // HAVE_NEON_ASM + make_tuple(&aom_lpf_horizontal_4_dual_neon, + &aom_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_lpf_vertical_4_dual_neon, + &aom_lpf_vertical_4_dual_c, 8))); +#endif // CONFIG_HIGHBITDEPTH +#endif // HAVE_NEON + +#if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + DSPR2, Loop8Test6Param, + ::testing::Values( + make_tuple(&aom_lpf_horizontal_4_dspr2, &aom_lpf_horizontal_4_c, 8), + make_tuple(&aom_lpf_horizontal_8_dspr2, &aom_lpf_horizontal_8_c, 8), + make_tuple(&aom_lpf_horizontal_edge_8, &aom_lpf_horizontal_edge_8, 8), + make_tuple(&aom_lpf_horizontal_edge_16, &aom_lpf_horizontal_edge_16, 8), + make_tuple(&aom_lpf_vertical_4_dspr2, &aom_lpf_vertical_4_c, 8), + make_tuple(&aom_lpf_vertical_8_dspr2, &aom_lpf_vertical_8_c, 8), + make_tuple(&aom_lpf_vertical_16_dspr2, &aom_lpf_vertical_16_c, 8), + make_tuple(&aom_lpf_vertical_16_dual_dspr2, &aom_lpf_vertical_16_dual_c, + 8))); + +INSTANTIATE_TEST_CASE_P( + DSPR2, Loop8Test9Param, + ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_dspr2, + &aom_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_lpf_horizontal_8_dual_dspr2, + &aom_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_lpf_vertical_4_dual_dspr2, + &aom_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_lpf_vertical_8_dual_dspr2, + &aom_lpf_vertical_8_dual_c, 8))); +#endif // HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_MSA && (!CONFIG_HIGHBITDEPTH) +INSTANTIATE_TEST_CASE_P( + MSA, Loop8Test6Param, + ::testing::Values( + make_tuple(&aom_lpf_horizontal_4_msa, &aom_lpf_horizontal_4_c, 8), + make_tuple(&aom_lpf_horizontal_8_msa, &aom_lpf_horizontal_8_c, 8), + make_tuple(&aom_lpf_horizontal_edge_8_msa, &aom_lpf_horizontal_edge_8_c, + 8), + make_tuple(&aom_lpf_horizontal_edge_16_msa, + &aom_lpf_horizontal_edge_16_c, 8), + make_tuple(&aom_lpf_vertical_4_msa, &aom_lpf_vertical_4_c, 8), + make_tuple(&aom_lpf_vertical_8_msa, &aom_lpf_vertical_8_c, 8), + make_tuple(&aom_lpf_vertical_16_msa, &aom_lpf_vertical_16_c, 8))); + +INSTANTIATE_TEST_CASE_P( + MSA, Loop8Test9Param, + ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_msa, + &aom_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_lpf_horizontal_8_dual_msa, + &aom_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_lpf_vertical_4_dual_msa, + &aom_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_lpf_vertical_8_dual_msa, + &aom_lpf_vertical_8_dual_c, 8))); +#endif // HAVE_MSA && (!CONFIG_HIGHBITDEPTH) + +} // namespace diff --git a/third_party/aom/test/masked_sad_test.cc b/third_party/aom/test/masked_sad_test.cc new file mode 100644 index 000000000..53f85eef7 --- /dev/null +++ b/third_party/aom/test/masked_sad_test.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "aom/aom_integer.h" + +using libaom_test::ACMRandom; + +namespace { +const int number_of_iterations = 500; + +typedef unsigned int (*MaskedSADFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + const uint8_t *m, int m_stride); +typedef std::tr1::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam; + +class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> { + public: + virtual ~MaskedSADTest() {} + virtual void SetUp() { + maskedSAD_op_ = GET_PARAM(0); + ref_maskedSAD_op_ = GET_PARAM(1); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + MaskedSADFunc maskedSAD_op_; + MaskedSADFunc ref_maskedSAD_op_; +}; + +TEST_P(MaskedSADTest, OperationCheck) { + unsigned int ref_ret, ret; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + int err_count = 0; + int first_failure = -1; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; + for (int i = 0; i < number_of_iterations; ++i) { + for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) { + src_ptr[j] = rnd.Rand8(); + ref_ptr[j] = rnd.Rand8(); + msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64; + assert(msk_ptr[j] <= 64); + } + + ref_ret = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride, + msk_ptr, msk_stride); + ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride, ref_ptr, + ref_stride, msk_ptr, + msk_stride)); + if (ret != ref_ret) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + EXPECT_EQ(0, err_count) + << "Error: Masked SAD Test, C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure; +} + +#if CONFIG_HIGHBITDEPTH +typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + const uint8_t *m, int m_stride); +typedef std::tr1::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc> + HighbdMaskedSADParam; + +class HighbdMaskedSADTest + : public ::testing::TestWithParam<HighbdMaskedSADParam> { + public: + virtual ~HighbdMaskedSADTest() {} + virtual void SetUp() { + maskedSAD_op_ = GET_PARAM(0); + ref_maskedSAD_op_ = GET_PARAM(1); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + HighbdMaskedSADFunc maskedSAD_op_; + HighbdMaskedSADFunc ref_maskedSAD_op_; +}; + +TEST_P(HighbdMaskedSADTest, OperationCheck) { + unsigned int ref_ret, ret; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); + uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); + int err_count = 0; + int first_failure = -1; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; + for (int i = 0; i < number_of_iterations; ++i) { + for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) { + src_ptr[j] = rnd.Rand16() & 0xfff; + ref_ptr[j] = rnd.Rand16() & 0xfff; + msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64; + } + + ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride, + msk_ptr, msk_stride); + ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, + ref_stride, msk_ptr, + msk_stride)); + if (ret != ref_ret) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + EXPECT_EQ(0, err_count) + << "Error: High BD Masked SAD Test, C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure; +} +#endif // CONFIG_HIGHBITDEPTH + +using std::tr1::make_tuple; + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P( + SSSE3_C_COMPARE, MaskedSADTest, + ::testing::Values( +#if CONFIG_EXT_PARTITION + make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c), + make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c), + make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c), + make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c), + make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c), + make_tuple(&aom_masked_sad32x32_ssse3, &aom_masked_sad32x32_c), + make_tuple(&aom_masked_sad32x16_ssse3, &aom_masked_sad32x16_c), + make_tuple(&aom_masked_sad16x32_ssse3, &aom_masked_sad16x32_c), + make_tuple(&aom_masked_sad16x16_ssse3, &aom_masked_sad16x16_c), + make_tuple(&aom_masked_sad16x8_ssse3, &aom_masked_sad16x8_c), + make_tuple(&aom_masked_sad8x16_ssse3, &aom_masked_sad8x16_c), + make_tuple(&aom_masked_sad8x8_ssse3, &aom_masked_sad8x8_c), + make_tuple(&aom_masked_sad8x4_ssse3, &aom_masked_sad8x4_c), + make_tuple(&aom_masked_sad4x8_ssse3, &aom_masked_sad4x8_c), + make_tuple(&aom_masked_sad4x4_ssse3, &aom_masked_sad4x4_c))); +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, HighbdMaskedSADTest, + ::testing::Values( +#if CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_masked_sad128x128_ssse3, + &aom_highbd_masked_sad128x128_c), + make_tuple(&aom_highbd_masked_sad128x64_ssse3, + &aom_highbd_masked_sad128x64_c), + make_tuple(&aom_highbd_masked_sad64x128_ssse3, + &aom_highbd_masked_sad64x128_c), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_masked_sad64x64_ssse3, + &aom_highbd_masked_sad64x64_c), + make_tuple(&aom_highbd_masked_sad64x32_ssse3, + &aom_highbd_masked_sad64x32_c), + make_tuple(&aom_highbd_masked_sad32x64_ssse3, + &aom_highbd_masked_sad32x64_c), + make_tuple(&aom_highbd_masked_sad32x32_ssse3, + &aom_highbd_masked_sad32x32_c), + make_tuple(&aom_highbd_masked_sad32x16_ssse3, + &aom_highbd_masked_sad32x16_c), + make_tuple(&aom_highbd_masked_sad16x32_ssse3, + &aom_highbd_masked_sad16x32_c), + make_tuple(&aom_highbd_masked_sad16x16_ssse3, + &aom_highbd_masked_sad16x16_c), + make_tuple(&aom_highbd_masked_sad16x8_ssse3, + &aom_highbd_masked_sad16x8_c), + make_tuple(&aom_highbd_masked_sad8x16_ssse3, + &aom_highbd_masked_sad8x16_c), + make_tuple(&aom_highbd_masked_sad8x8_ssse3, + &aom_highbd_masked_sad8x8_c), + make_tuple(&aom_highbd_masked_sad8x4_ssse3, + &aom_highbd_masked_sad8x4_c), + make_tuple(&aom_highbd_masked_sad4x8_ssse3, + &aom_highbd_masked_sad4x8_c), + make_tuple(&aom_highbd_masked_sad4x4_ssse3, + &aom_highbd_masked_sad4x4_c))); +#endif // CONFIG_HIGHBITDEPTH +#endif // HAVE_SSSE3 +} // namespace diff --git a/third_party/aom/test/masked_variance_test.cc b/third_party/aom/test/masked_variance_test.cc new file mode 100644 index 000000000..65e852aea --- /dev/null +++ b/third_party/aom/test/masked_variance_test.cc @@ -0,0 +1,790 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_dsp/aom_filter.h" +#include "aom_mem/aom_mem.h" + +using libaom_test::ACMRandom; + +namespace { +const int number_of_iterations = 500; + +typedef unsigned int (*MaskedVarianceFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + const uint8_t *m, int m_stride, + unsigned int *sse); + +typedef std::tr1::tuple<MaskedVarianceFunc, MaskedVarianceFunc> + MaskedVarianceParam; + +class MaskedVarianceTest + : public ::testing::TestWithParam<MaskedVarianceParam> { + public: + virtual ~MaskedVarianceTest() {} + virtual void SetUp() { + opt_func_ = GET_PARAM(0); + ref_func_ = GET_PARAM(1); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + MaskedVarianceFunc opt_func_; + MaskedVarianceFunc ref_func_; +}; + +TEST_P(MaskedVarianceTest, OperationCheck) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + int err_count = 0; + int first_failure = -1; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; + + for (int i = 0; i < number_of_iterations; ++i) { + for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) { + src_ptr[j] = rnd.Rand8(); + ref_ptr[j] = rnd.Rand8(); + msk_ptr[j] = rnd(65); + } + + ref_ret = ref_func_(src_ptr, src_stride, ref_ptr, ref_stride, msk_ptr, + msk_stride, &ref_sse); + ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride, ref_ptr, + ref_stride, msk_ptr, + msk_stride, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + + EXPECT_EQ(0, err_count) << "Error: Masked Variance Test OperationCheck," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure; +} + +TEST_P(MaskedVarianceTest, ExtremeValues) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + int err_count = 0; + int first_failure = -1; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; + + for (int i = 0; i < 8; ++i) { + memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SB_SIZE * MAX_SB_SIZE); + memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SB_SIZE * MAX_SB_SIZE); + memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE * MAX_SB_SIZE); + + ref_ret = ref_func_(src_ptr, src_stride, ref_ptr, ref_stride, msk_ptr, + msk_stride, &ref_sse); + ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride, ref_ptr, + ref_stride, msk_ptr, + msk_stride, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + + EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure; +} + +typedef unsigned int (*MaskedSubPixelVarianceFunc)( + const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b, + int b_stride, const uint8_t *m, int m_stride, unsigned int *sse); + +typedef std::tr1::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc> + MaskedSubPixelVarianceParam; + +class MaskedSubPixelVarianceTest + : public ::testing::TestWithParam<MaskedSubPixelVarianceParam> { + public: + virtual ~MaskedSubPixelVarianceTest() {} + virtual void SetUp() { + opt_func_ = GET_PARAM(0); + ref_func_ = GET_PARAM(1); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + MaskedSubPixelVarianceFunc opt_func_; + MaskedSubPixelVarianceFunc ref_func_; +}; + +TEST_P(MaskedSubPixelVarianceTest, OperationCheck) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + int err_count = 0; + int first_failure = -1; + int src_stride = (MAX_SB_SIZE + 1); + int ref_stride = (MAX_SB_SIZE + 1); + int msk_stride = (MAX_SB_SIZE + 1); + int xoffset; + int yoffset; + + for (int i = 0; i < number_of_iterations; ++i) { + int xoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) }; + int yoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) }; + for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) { + src_ptr[j] = rnd.Rand8(); + ref_ptr[j] = rnd.Rand8(); + msk_ptr[j] = rnd(65); + } + for (int k = 0; k < 3; k++) { + xoffset = xoffsets[k]; + for (int l = 0; l < 3; l++) { + xoffset = xoffsets[k]; + yoffset = yoffsets[l]; + + ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr, + ref_stride, msk_ptr, msk_stride, &ref_sse); + ASM_REGISTER_STATE_CHECK( + opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr, + ref_stride, msk_ptr, msk_stride, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + } + } + + EXPECT_EQ(0, err_count) + << "Error: Masked Sub Pixel Variance Test OperationCheck," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure; +} + +TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + int first_failure_x = -1; + int first_failure_y = -1; + int err_count = 0; + int first_failure = -1; + int src_stride = (MAX_SB_SIZE + 1); + int ref_stride = (MAX_SB_SIZE + 1); + int msk_stride = (MAX_SB_SIZE + 1); + + for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { + for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { + for (int i = 0; i < 8; ++i) { + memset(src_ptr, (i & 0x1) ? 255 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)); + memset(ref_ptr, (i & 0x2) ? 255 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)); + memset(msk_ptr, (i & 0x4) ? 64 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)); + + ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr, + ref_stride, msk_ptr, msk_stride, &ref_sse); + ASM_REGISTER_STATE_CHECK( + opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr, + ref_stride, msk_ptr, msk_stride, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) { + first_failure = i; + first_failure_x = xoffset; + first_failure_y = yoffset; + } + } + } + } + } + + EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure + << " x_offset = " << first_failure_x + << " y_offset = " << first_failure_y; +} + +#if CONFIG_HIGHBITDEPTH +typedef std::tr1::tuple<MaskedVarianceFunc, MaskedVarianceFunc, aom_bit_depth_t> + HighbdMaskedVarianceParam; + +class HighbdMaskedVarianceTest + : public ::testing::TestWithParam<HighbdMaskedVarianceParam> { + public: + virtual ~HighbdMaskedVarianceTest() {} + virtual void SetUp() { + opt_func_ = GET_PARAM(0); + ref_func_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + MaskedVarianceFunc opt_func_; + MaskedVarianceFunc ref_func_; + aom_bit_depth_t bit_depth_; +}; + +TEST_P(HighbdMaskedVarianceTest, OperationCheck) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); + uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); + int err_count = 0; + int first_failure = -1; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; + + for (int i = 0; i < number_of_iterations; ++i) { + for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) { + src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); + ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); + msk_ptr[j] = rnd(65); + } + + ref_ret = ref_func_(src8_ptr, src_stride, ref8_ptr, ref_stride, msk_ptr, + msk_stride, &ref_sse); + ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride, ref8_ptr, + ref_stride, msk_ptr, + msk_stride, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + + EXPECT_EQ(0, err_count) << "Error: Masked Variance Test OperationCheck," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure; +} + +TEST_P(HighbdMaskedVarianceTest, ExtremeValues) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); + uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); + int err_count = 0; + int first_failure = -1; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; + + for (int i = 0; i < 8; ++i) { + aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, + MAX_SB_SIZE * MAX_SB_SIZE); + aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, + MAX_SB_SIZE * MAX_SB_SIZE); + memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE * MAX_SB_SIZE); + + ref_ret = ref_func_(src8_ptr, src_stride, ref8_ptr, ref_stride, msk_ptr, + msk_stride, &ref_sse); + ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride, ref8_ptr, + ref_stride, msk_ptr, + msk_stride, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + + EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure; +} + +typedef std::tr1::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc, + aom_bit_depth_t> + HighbdMaskedSubPixelVarianceParam; + +class HighbdMaskedSubPixelVarianceTest + : public ::testing::TestWithParam<HighbdMaskedSubPixelVarianceParam> { + public: + virtual ~HighbdMaskedSubPixelVarianceTest() {} + virtual void SetUp() { + opt_func_ = GET_PARAM(0); + ref_func_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + MaskedSubPixelVarianceFunc opt_func_; + MaskedSubPixelVarianceFunc ref_func_; + aom_bit_depth_t bit_depth_; +}; + +TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); + uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); + int err_count = 0; + int first_failure = -1; + int first_failure_x = -1; + int first_failure_y = -1; + int src_stride = (MAX_SB_SIZE + 1); + int ref_stride = (MAX_SB_SIZE + 1); + int msk_stride = (MAX_SB_SIZE + 1); + int xoffset, yoffset; + + for (int i = 0; i < number_of_iterations; ++i) { + for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { + for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { + for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) { + src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); + ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); + msk_ptr[j] = rnd(65); + } + + ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr, + ref_stride, msk_ptr, msk_stride, &ref_sse); + ASM_REGISTER_STATE_CHECK(opt_ret = + opt_func_(src8_ptr, src_stride, xoffset, + yoffset, ref8_ptr, ref_stride, + msk_ptr, msk_stride, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) { + first_failure = i; + first_failure_x = xoffset; + first_failure_y = yoffset; + } + } + } + } + } + + EXPECT_EQ(0, err_count) + << "Error: Masked Sub Pixel Variance Test OperationCheck," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure + << " x_offset = " << first_failure_x << " y_offset = " << first_failure_y; +} + +TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]); + uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); + uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); + int first_failure_x = -1; + int first_failure_y = -1; + int err_count = 0; + int first_failure = -1; + int src_stride = (MAX_SB_SIZE + 1); + int ref_stride = (MAX_SB_SIZE + 1); + int msk_stride = (MAX_SB_SIZE + 1); + + for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { + for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { + for (int i = 0; i < 8; ++i) { + aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)); + aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)); + memset(msk_ptr, (i & 0x4) ? 64 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)); + + ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr, + ref_stride, msk_ptr, msk_stride, &ref_sse); + ASM_REGISTER_STATE_CHECK(opt_ret = + opt_func_(src8_ptr, src_stride, xoffset, + yoffset, ref8_ptr, ref_stride, + msk_ptr, msk_stride, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) { + first_failure = i; + first_failure_x = xoffset; + first_failure_y = yoffset; + } + } + } + } + } + + EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure + << " x_offset = " << first_failure_x + << " y_offset = " << first_failure_y; +} +#endif // CONFIG_HIGHBITDEPTH + +using std::tr1::make_tuple; + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P( + SSSE3_C_COMPARE, MaskedVarianceTest, + ::testing::Values( +#if CONFIG_EXT_PARTITION + make_tuple(&aom_masked_variance128x128_ssse3, + &aom_masked_variance128x128_c), + make_tuple(&aom_masked_variance128x64_ssse3, + &aom_masked_variance128x64_c), + make_tuple(&aom_masked_variance64x128_ssse3, + &aom_masked_variance64x128_c), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_masked_variance64x64_ssse3, + &aom_masked_variance64x64_c), + make_tuple(&aom_masked_variance64x32_ssse3, + &aom_masked_variance64x32_c), + make_tuple(&aom_masked_variance32x64_ssse3, + &aom_masked_variance32x64_c), + make_tuple(&aom_masked_variance32x32_ssse3, + &aom_masked_variance32x32_c), + make_tuple(&aom_masked_variance32x16_ssse3, + &aom_masked_variance32x16_c), + make_tuple(&aom_masked_variance16x32_ssse3, + &aom_masked_variance16x32_c), + make_tuple(&aom_masked_variance16x16_ssse3, + &aom_masked_variance16x16_c), + make_tuple(&aom_masked_variance16x8_ssse3, &aom_masked_variance16x8_c), + make_tuple(&aom_masked_variance8x16_ssse3, &aom_masked_variance8x16_c), + make_tuple(&aom_masked_variance8x8_ssse3, &aom_masked_variance8x8_c), + make_tuple(&aom_masked_variance8x4_ssse3, &aom_masked_variance8x4_c), + make_tuple(&aom_masked_variance4x8_ssse3, &aom_masked_variance4x8_c), + make_tuple(&aom_masked_variance4x4_ssse3, &aom_masked_variance4x4_c))); + +INSTANTIATE_TEST_CASE_P( + SSSE3_C_COMPARE, MaskedSubPixelVarianceTest, + ::testing::Values( +#if CONFIG_EXT_PARTITION + make_tuple(&aom_masked_sub_pixel_variance128x128_ssse3, + &aom_masked_sub_pixel_variance128x128_c), + make_tuple(&aom_masked_sub_pixel_variance128x64_ssse3, + &aom_masked_sub_pixel_variance128x64_c), + make_tuple(&aom_masked_sub_pixel_variance64x128_ssse3, + &aom_masked_sub_pixel_variance64x128_c), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_masked_sub_pixel_variance64x64_ssse3, + &aom_masked_sub_pixel_variance64x64_c), + make_tuple(&aom_masked_sub_pixel_variance64x32_ssse3, + &aom_masked_sub_pixel_variance64x32_c), + make_tuple(&aom_masked_sub_pixel_variance32x64_ssse3, + &aom_masked_sub_pixel_variance32x64_c), + make_tuple(&aom_masked_sub_pixel_variance32x32_ssse3, + &aom_masked_sub_pixel_variance32x32_c), + make_tuple(&aom_masked_sub_pixel_variance32x16_ssse3, + &aom_masked_sub_pixel_variance32x16_c), + make_tuple(&aom_masked_sub_pixel_variance16x32_ssse3, + &aom_masked_sub_pixel_variance16x32_c), + make_tuple(&aom_masked_sub_pixel_variance16x16_ssse3, + &aom_masked_sub_pixel_variance16x16_c), + make_tuple(&aom_masked_sub_pixel_variance16x8_ssse3, + &aom_masked_sub_pixel_variance16x8_c), + make_tuple(&aom_masked_sub_pixel_variance8x16_ssse3, + &aom_masked_sub_pixel_variance8x16_c), + make_tuple(&aom_masked_sub_pixel_variance8x8_ssse3, + &aom_masked_sub_pixel_variance8x8_c), + make_tuple(&aom_masked_sub_pixel_variance8x4_ssse3, + &aom_masked_sub_pixel_variance8x4_c), + make_tuple(&aom_masked_sub_pixel_variance4x8_ssse3, + &aom_masked_sub_pixel_variance4x8_c), + make_tuple(&aom_masked_sub_pixel_variance4x4_ssse3, + &aom_masked_sub_pixel_variance4x4_c))); + +#if CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSSE3_C_COMPARE, HighbdMaskedVarianceTest, + ::testing::Values( +#if CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_masked_variance128x128_ssse3, + &aom_highbd_masked_variance128x128_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance128x64_ssse3, + &aom_highbd_masked_variance128x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance64x128_ssse3, + &aom_highbd_masked_variance64x128_c, AOM_BITS_8), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_masked_variance64x64_ssse3, + &aom_highbd_masked_variance64x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance64x32_ssse3, + &aom_highbd_masked_variance64x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance32x64_ssse3, + &aom_highbd_masked_variance32x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance32x32_ssse3, + &aom_highbd_masked_variance32x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance32x16_ssse3, + &aom_highbd_masked_variance32x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance16x32_ssse3, + &aom_highbd_masked_variance16x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance16x16_ssse3, + &aom_highbd_masked_variance16x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance16x8_ssse3, + &aom_highbd_masked_variance16x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance8x16_ssse3, + &aom_highbd_masked_variance8x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance8x8_ssse3, + &aom_highbd_masked_variance8x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance8x4_ssse3, + &aom_highbd_masked_variance8x4_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance4x8_ssse3, + &aom_highbd_masked_variance4x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_variance4x4_ssse3, + &aom_highbd_masked_variance4x4_c, AOM_BITS_8), +#if CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_10_masked_variance128x128_ssse3, + &aom_highbd_10_masked_variance128x128_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance128x64_ssse3, + &aom_highbd_10_masked_variance128x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance64x128_ssse3, + &aom_highbd_10_masked_variance64x128_c, AOM_BITS_10), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_10_masked_variance64x64_ssse3, + &aom_highbd_10_masked_variance64x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance64x32_ssse3, + &aom_highbd_10_masked_variance64x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance32x64_ssse3, + &aom_highbd_10_masked_variance32x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance32x32_ssse3, + &aom_highbd_10_masked_variance32x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance32x16_ssse3, + &aom_highbd_10_masked_variance32x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance16x32_ssse3, + &aom_highbd_10_masked_variance16x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance16x16_ssse3, + &aom_highbd_10_masked_variance16x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance16x8_ssse3, + &aom_highbd_10_masked_variance16x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance8x16_ssse3, + &aom_highbd_10_masked_variance8x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance8x8_ssse3, + &aom_highbd_10_masked_variance8x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance8x4_ssse3, + &aom_highbd_10_masked_variance8x4_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance4x8_ssse3, + &aom_highbd_10_masked_variance4x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_variance4x4_ssse3, + &aom_highbd_10_masked_variance4x4_c, AOM_BITS_10), +#if CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_12_masked_variance128x128_ssse3, + &aom_highbd_12_masked_variance128x128_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance128x64_ssse3, + &aom_highbd_12_masked_variance128x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance64x128_ssse3, + &aom_highbd_12_masked_variance64x128_c, AOM_BITS_12), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_12_masked_variance64x64_ssse3, + &aom_highbd_12_masked_variance64x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance64x32_ssse3, + &aom_highbd_12_masked_variance64x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance32x64_ssse3, + &aom_highbd_12_masked_variance32x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance32x32_ssse3, + &aom_highbd_12_masked_variance32x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance32x16_ssse3, + &aom_highbd_12_masked_variance32x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance16x32_ssse3, + &aom_highbd_12_masked_variance16x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance16x16_ssse3, + &aom_highbd_12_masked_variance16x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance16x8_ssse3, + &aom_highbd_12_masked_variance16x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance8x16_ssse3, + &aom_highbd_12_masked_variance8x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance8x8_ssse3, + &aom_highbd_12_masked_variance8x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance8x4_ssse3, + &aom_highbd_12_masked_variance8x4_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance4x8_ssse3, + &aom_highbd_12_masked_variance4x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_variance4x4_ssse3, + &aom_highbd_12_masked_variance4x4_c, AOM_BITS_12))); + +INSTANTIATE_TEST_CASE_P( + SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest, + ::testing::Values( +#if CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_masked_sub_pixel_variance128x128_ssse3, + &aom_highbd_masked_sub_pixel_variance128x128_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance128x64_ssse3, + &aom_highbd_masked_sub_pixel_variance128x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance64x128_ssse3, + &aom_highbd_masked_sub_pixel_variance64x128_c, AOM_BITS_8), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_masked_sub_pixel_variance64x64_ssse3, + &aom_highbd_masked_sub_pixel_variance64x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance64x32_ssse3, + &aom_highbd_masked_sub_pixel_variance64x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance32x64_ssse3, + &aom_highbd_masked_sub_pixel_variance32x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance32x32_ssse3, + &aom_highbd_masked_sub_pixel_variance32x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance32x16_ssse3, + &aom_highbd_masked_sub_pixel_variance32x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance16x32_ssse3, + &aom_highbd_masked_sub_pixel_variance16x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance16x16_ssse3, + &aom_highbd_masked_sub_pixel_variance16x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance16x8_ssse3, + &aom_highbd_masked_sub_pixel_variance16x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance8x16_ssse3, + &aom_highbd_masked_sub_pixel_variance8x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance8x8_ssse3, + &aom_highbd_masked_sub_pixel_variance8x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance8x4_ssse3, + &aom_highbd_masked_sub_pixel_variance8x4_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance4x8_ssse3, + &aom_highbd_masked_sub_pixel_variance4x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_masked_sub_pixel_variance4x4_ssse3, + &aom_highbd_masked_sub_pixel_variance4x4_c, AOM_BITS_8), +#if CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_ssse3, + &aom_highbd_10_masked_sub_pixel_variance128x128_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x64_ssse3, + &aom_highbd_10_masked_sub_pixel_variance128x64_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x128_ssse3, + &aom_highbd_10_masked_sub_pixel_variance64x128_c, + AOM_BITS_10), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x64_ssse3, + &aom_highbd_10_masked_sub_pixel_variance64x64_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x32_ssse3, + &aom_highbd_10_masked_sub_pixel_variance64x32_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x64_ssse3, + &aom_highbd_10_masked_sub_pixel_variance32x64_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x32_ssse3, + &aom_highbd_10_masked_sub_pixel_variance32x32_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x16_ssse3, + &aom_highbd_10_masked_sub_pixel_variance32x16_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x32_ssse3, + &aom_highbd_10_masked_sub_pixel_variance16x32_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x16_ssse3, + &aom_highbd_10_masked_sub_pixel_variance16x16_c, + AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x8_ssse3, + &aom_highbd_10_masked_sub_pixel_variance16x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x16_ssse3, + &aom_highbd_10_masked_sub_pixel_variance8x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x8_ssse3, + &aom_highbd_10_masked_sub_pixel_variance8x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x4_ssse3, + &aom_highbd_10_masked_sub_pixel_variance8x4_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x8_ssse3, + &aom_highbd_10_masked_sub_pixel_variance4x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x4_ssse3, + &aom_highbd_10_masked_sub_pixel_variance4x4_c, AOM_BITS_10), +#if CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x128_ssse3, + &aom_highbd_12_masked_sub_pixel_variance128x128_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x64_ssse3, + &aom_highbd_12_masked_sub_pixel_variance128x64_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x128_ssse3, + &aom_highbd_12_masked_sub_pixel_variance64x128_c, + AOM_BITS_12), +#endif // CONFIG_EXT_PARTITION + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x64_ssse3, + &aom_highbd_12_masked_sub_pixel_variance64x64_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x32_ssse3, + &aom_highbd_12_masked_sub_pixel_variance64x32_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x64_ssse3, + &aom_highbd_12_masked_sub_pixel_variance32x64_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x32_ssse3, + &aom_highbd_12_masked_sub_pixel_variance32x32_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x16_ssse3, + &aom_highbd_12_masked_sub_pixel_variance32x16_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x32_ssse3, + &aom_highbd_12_masked_sub_pixel_variance16x32_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x16_ssse3, + &aom_highbd_12_masked_sub_pixel_variance16x16_c, + AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x8_ssse3, + &aom_highbd_12_masked_sub_pixel_variance16x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x16_ssse3, + &aom_highbd_12_masked_sub_pixel_variance8x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x8_ssse3, + &aom_highbd_12_masked_sub_pixel_variance8x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x4_ssse3, + &aom_highbd_12_masked_sub_pixel_variance8x4_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x8_ssse3, + &aom_highbd_12_masked_sub_pixel_variance4x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x4_ssse3, + &aom_highbd_12_masked_sub_pixel_variance4x4_c, + AOM_BITS_12))); +#endif // CONFIG_HIGHBITDEPTH + +#endif // HAVE_SSSE3 +} // namespace diff --git a/third_party/aom/test/md5_helper.h b/third_party/aom/test/md5_helper.h new file mode 100644 index 000000000..8c9d4f706 --- /dev/null +++ b/third_party/aom/test/md5_helper.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TEST_MD5_HELPER_H_ +#define TEST_MD5_HELPER_H_ + +#include "./md5_utils.h" +#include "aom/aom_decoder.h" + +namespace libaom_test { +class MD5 { + public: + MD5() { MD5Init(&md5_); } + + void Add(const aom_image_t *img) { + for (int plane = 0; plane < 3; ++plane) { + const uint8_t *buf = img->planes[plane]; + // Calculate the width and height to do the md5 check. For the chroma + // plane, we never want to round down and thus skip a pixel so if + // we are shifting by 1 (chroma_shift) we add 1 before doing the shift. + // This works only for chroma_shift of 0 and 1. + const int bytes_per_sample = + (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; + const int h = + plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift + : img->d_h; + const int w = + (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift + : img->d_w) * + bytes_per_sample; + + for (int y = 0; y < h; ++y) { + MD5Update(&md5_, buf, w); + buf += img->stride[plane]; + } + } + } + + void Add(const uint8_t *data, size_t size) { + MD5Update(&md5_, data, static_cast<uint32_t>(size)); + } + + const char *Get(void) { + static const char hex[16] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', + }; + uint8_t tmp[16]; + MD5Context ctx_tmp = md5_; + + MD5Final(tmp, &ctx_tmp); + for (int i = 0; i < 16; i++) { + res_[i * 2 + 0] = hex[tmp[i] >> 4]; + res_[i * 2 + 1] = hex[tmp[i] & 0xf]; + } + res_[32] = 0; + + return res_; + } + + protected: + char res_[33]; + MD5Context md5_; +}; + +} // namespace libaom_test + +#endif // TEST_MD5_HELPER_H_ diff --git a/third_party/aom/test/minmax_test.cc b/third_party/aom/test/minmax_test.cc new file mode 100644 index 000000000..f82529192 --- /dev/null +++ b/third_party/aom/test/minmax_test.cc @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "aom/aom_integer.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" + +namespace { + +using ::libaom_test::ACMRandom; + +typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride, int *min, int *max); + +class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> { + public: + virtual void SetUp() { + mm_func_ = GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + protected: + MinMaxFunc mm_func_; + ACMRandom rnd_; +}; + +void reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride, int *min_ret, int *max_ret) { + int min = 255; + int max = 0; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]); + if (min > diff) min = diff; + if (max < diff) max = diff; + } + } + + *min_ret = min; + *max_ret = max; +} + +TEST_P(MinMaxTest, MinValue) { + for (int i = 0; i < 64; i++) { + uint8_t a[64], b[64]; + memset(a, 0, sizeof(a)); + memset(b, 255, sizeof(b)); + b[i] = i; // Set a minimum difference of i. + + int min, max; + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(255, max); + EXPECT_EQ(i, min); + } +} + +TEST_P(MinMaxTest, MaxValue) { + for (int i = 0; i < 64; i++) { + uint8_t a[64], b[64]; + memset(a, 0, sizeof(a)); + memset(b, 0, sizeof(b)); + b[i] = i; // Set a maximum difference of i. + + int min, max; + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(i, max); + EXPECT_EQ(0, min); + } +} + +TEST_P(MinMaxTest, CompareReference) { + uint8_t a[64], b[64]; + for (int j = 0; j < 64; j++) { + a[j] = rnd_.Rand8(); + b[j] = rnd_.Rand8(); + } + + int min_ref, max_ref, min, max; + reference_minmax(a, 8, b, 8, &min_ref, &max_ref); + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(max_ref, max); + EXPECT_EQ(min_ref, min); +} + +TEST_P(MinMaxTest, CompareReferenceAndVaryStride) { + uint8_t a[8 * 64], b[8 * 64]; + for (int i = 0; i < 8 * 64; i++) { + a[i] = rnd_.Rand8(); + b[i] = rnd_.Rand8(); + } + for (int a_stride = 8; a_stride <= 64; a_stride += 8) { + for (int b_stride = 8; b_stride <= 64; b_stride += 8) { + int min_ref, max_ref, min, max; + reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref); + ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max)); + EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride + << " and b_stride = " << b_stride; + EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride + << " and b_stride = " << b_stride; + } + } +} + +INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&aom_minmax_8x8_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest, + ::testing::Values(&aom_minmax_8x8_sse2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest, + ::testing::Values(&aom_minmax_8x8_neon)); +#endif + +} // namespace diff --git a/third_party/aom/test/motion_vector_test.cc b/third_party/aom/test/motion_vector_test.cc new file mode 100644 index 000000000..403a8f1a7 --- /dev/null +++ b/third_party/aom/test/motion_vector_test.cc @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/yuv_video_source.h" + +namespace { +#if defined(__has_feature) +#if __has_feature(address_sanitizer) +#define BUILDING_WITH_ASAN +#endif +#endif + +#define MAX_EXTREME_MV 1 +#define MIN_EXTREME_MV 2 + +// Encoding modes +const libaom_test::TestMode kEncodingModeVectors[] = { + ::libaom_test::kTwoPassGood, ::libaom_test::kOnePassGood, +}; + +// Encoding speeds +const int kCpuUsedVectors[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + +// MV test modes: 1 - always use maximum MV; 2 - always use minimum MV. +const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV }; + +class MotionVectorTestLarge + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int, + int> { + protected: + MotionVectorTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), mv_test_mode_(GET_PARAM(3)) {} + + virtual ~MotionVectorTestLarge() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 3; + cfg_.rc_end_usage = AOM_VBR; + } else { + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + } + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + } + + libaom_test::TestMode encoding_mode_; + int cpu_used_; + int mv_test_mode_; +}; + +TEST_P(MotionVectorTestLarge, OverallTest) { + int width = 3840; + int height = 2160; + +#ifdef BUILDING_WITH_ASAN + // On the 32-bit system, if using 4k test clip, an "out of memory" error + // occurs because of the AddressSanitizer instrumentation memory overhead. + // Here, reduce the test clip's resolution while testing on 32-bit system + // and AddressSanitizer is enabled. + if (sizeof(void *) == 4) { + width = 2048; + height = 1080; + } +#endif + + cfg_.rc_target_bitrate = 24000; + cfg_.g_profile = 0; + init_flags_ = AOM_CODEC_USE_PSNR; + + testing::internal::scoped_ptr<libaom_test::VideoSource> video; + video.reset(new libaom_test::YUVVideoSource( + "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, width, height, 30, 1, 0, 5)); + + ASSERT_TRUE(video.get() != NULL); + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); +} + +AV1_INSTANTIATE_TEST_CASE(MotionVectorTestLarge, + ::testing::ValuesIn(kEncodingModeVectors), + ::testing::ValuesIn(kCpuUsedVectors), + ::testing::ValuesIn(kMVTestModes)); +} // namespace diff --git a/third_party/aom/test/obmc_sad_test.cc b/third_party/aom/test/obmc_sad_test.cc new file mode 100644 index 000000000..219c5d810 --- /dev/null +++ b/third_party/aom/test/obmc_sad_test.cc @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/function_equivalence_test.h" +#include "test/register_state_check.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "aom/aom_integer.h" + +#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) + +using libaom_test::FunctionEquivalenceTest; + +namespace { + +static const int kIterations = 1000; +static const int kMaskMax = 64; + +typedef unsigned int (*ObmcSadF)(const uint8_t *pre, int pre_stride, + const int32_t *wsrc, const int32_t *mask); +typedef libaom_test::FuncParam<ObmcSadF> TestFuncs; + +//////////////////////////////////////////////////////////////////////////////// +// 8 bit +//////////////////////////////////////////////////////////////////////////////// + +class ObmcSadTest : public FunctionEquivalenceTest<ObmcSadF> {}; + +TEST_P(ObmcSadTest, RandomValues) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + const int pre_stride = rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = rng_.Rand8(); + wsrc[i] = rng_.Rand8() * rng_(kMaskMax * kMaskMax + 1); + mask[i] = rng_(kMaskMax * kMaskMax + 1); + } + + const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask); + unsigned int tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = + params_.tst_func(pre, pre_stride, wsrc, mask)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(ObmcSadTest, ExtremeValues) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { + const int pre_stride = iter; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = UINT8_MAX; + wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax; + mask[i] = kMaskMax * kMaskMax; + } + + const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask); + unsigned int tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = + params_.tst_func(pre, pre_stride, wsrc, mask)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +#if HAVE_SSE4_1 +#if CONFIG_MOTION_VAR +const ObmcSadTest::ParamType sse4_functions[] = { +#if CONFIG_EXT_PARTITION + TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_sse4_1), + TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_sse4_1), + TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_sse4_1), +#endif // CONFIG_EXT_PARTITION + TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_sse4_1), + TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_sse4_1), + TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_sse4_1), + TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_sse4_1), + TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_sse4_1), + TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_sse4_1), + TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_sse4_1), + TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_sse4_1), + TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_sse4_1), + TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_sse4_1), + TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_sse4_1), + TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_sse4_1), + TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_sse4_1) +}; + +INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcSadTest, + ::testing::ValuesIn(sse4_functions)); +#endif // CONFIG_MOTION_VAR +#endif // HAVE_SSE4_1 + +//////////////////////////////////////////////////////////////////////////////// +// High bit-depth +//////////////////////////////////////////////////////////////////////////////// + +#if CONFIG_HIGHBITDEPTH +class ObmcSadHBDTest : public FunctionEquivalenceTest<ObmcSadF> {}; + +TEST_P(ObmcSadHBDTest, RandomValues) { + DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + const int pre_stride = rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = rng_(1 << 12); + wsrc[i] = rng_(1 << 12) * rng_(kMaskMax * kMaskMax + 1); + mask[i] = rng_(kMaskMax * kMaskMax + 1); + } + + const unsigned int ref_res = + params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask); + unsigned int tst_res; + ASM_REGISTER_STATE_CHECK( + tst_res = + params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(ObmcSadHBDTest, ExtremeValues) { + DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { + const int pre_stride = iter; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = (1 << 12) - 1; + wsrc[i] = ((1 << 12) - 1) * kMaskMax * kMaskMax; + mask[i] = kMaskMax * kMaskMax; + } + + const unsigned int ref_res = + params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask); + unsigned int tst_res; + ASM_REGISTER_STATE_CHECK( + tst_res = + params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +#if HAVE_SSE4_1 +#if CONFIG_MOTION_VAR +ObmcSadHBDTest::ParamType sse4_functions_hbd[] = { +#if CONFIG_EXT_PARTITION + TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_sse4_1), + TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_sse4_1), + TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_sse4_1), +#endif // CONFIG_EXT_PARTITION + TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_sse4_1), + TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_sse4_1), + TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_sse4_1), + TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_sse4_1), + TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_sse4_1), + TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_sse4_1), + TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_sse4_1), + TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_sse4_1), + TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_sse4_1), + TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_sse4_1), + TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_sse4_1), + TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_sse4_1), + TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_sse4_1) +}; + +INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcSadHBDTest, + ::testing::ValuesIn(sse4_functions_hbd)); +#endif // CONFIG_MOTION_VAR +#endif // HAVE_SSE4_1 +#endif // CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/obmc_variance_test.cc b/third_party/aom/test/obmc_variance_test.cc new file mode 100644 index 000000000..1b30645a5 --- /dev/null +++ b/third_party/aom/test/obmc_variance_test.cc @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" + +#include "test/function_equivalence_test.h" +#include "test/register_state_check.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "aom/aom_integer.h" + +#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) + +using libaom_test::ACMRandom; +using libaom_test::FunctionEquivalenceTest; + +namespace { + +static const int kIterations = 1000; +static const int kMaskMax = 64; + +typedef unsigned int (*ObmcVarF)(const uint8_t *pre, int pre_stride, + const int32_t *wsrc, const int32_t *mask, + unsigned int *sse); +typedef libaom_test::FuncParam<ObmcVarF> TestFuncs; + +//////////////////////////////////////////////////////////////////////////////// +// 8 bit +//////////////////////////////////////////////////////////////////////////////// + +class ObmcVarianceTest : public FunctionEquivalenceTest<ObmcVarF> {}; + +TEST_P(ObmcVarianceTest, RandomValues) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + const int pre_stride = this->rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = this->rng_.Rand8(); + wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1); + mask[i] = this->rng_(kMaskMax * kMaskMax + 1); + } + + unsigned int ref_sse, tst_sse; + const unsigned int ref_res = + params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse); + unsigned int tst_res; + ASM_REGISTER_STATE_CHECK( + tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse)); + + ASSERT_EQ(ref_res, tst_res); + ASSERT_EQ(ref_sse, tst_sse); + } +} + +TEST_P(ObmcVarianceTest, ExtremeValues) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { + const int pre_stride = iter; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = UINT8_MAX; + wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax; + mask[i] = kMaskMax * kMaskMax; + } + + unsigned int ref_sse, tst_sse; + const unsigned int ref_res = + params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse); + unsigned int tst_res; + ASM_REGISTER_STATE_CHECK( + tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse)); + + ASSERT_EQ(ref_res, tst_res); + ASSERT_EQ(ref_sse, tst_sse); + } +} + +#if HAVE_SSE4_1 +#if CONFIG_MOTION_VAR +const ObmcVarianceTest::ParamType sse4_functions[] = { +#if CONFIG_EXT_PARTITION + TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_sse4_1), + TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_sse4_1), + TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_sse4_1), +#endif // CONFIG_EXT_PARTITION + TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_sse4_1), + TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_sse4_1), + TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_sse4_1), + TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_sse4_1), + TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_sse4_1), + TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_sse4_1), + TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_sse4_1), + TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_sse4_1), + TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_sse4_1), + TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_sse4_1), + TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_sse4_1), + TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1), + TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1) +}; + +INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceTest, + ::testing::ValuesIn(sse4_functions)); +#endif // CONFIG_MOTION_VAR +#endif // HAVE_SSE4_1 + +//////////////////////////////////////////////////////////////////////////////// +// High bit-depth +//////////////////////////////////////////////////////////////////////////////// + +#if CONFIG_HIGHBITDEPTH +class ObmcVarianceHBDTest : public FunctionEquivalenceTest<ObmcVarF> {}; + +TEST_P(ObmcVarianceHBDTest, RandomValues) { + DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + const int pre_stride = this->rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = this->rng_(1 << params_.bit_depth); + wsrc[i] = this->rng_(1 << params_.bit_depth) * + this->rng_(kMaskMax * kMaskMax + 1); + mask[i] = this->rng_(kMaskMax * kMaskMax + 1); + } + + unsigned int ref_sse, tst_sse; + const unsigned int ref_res = params_.ref_func( + CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse); + unsigned int tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre), + pre_stride, wsrc, mask, + &tst_sse)); + + ASSERT_EQ(ref_res, tst_res); + ASSERT_EQ(ref_sse, tst_sse); + } +} + +TEST_P(ObmcVarianceHBDTest, ExtremeValues) { + DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { + const int pre_stride = iter; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = (1 << params_.bit_depth) - 1; + wsrc[i] = ((1 << params_.bit_depth) - 1) * kMaskMax * kMaskMax; + mask[i] = kMaskMax * kMaskMax; + } + + unsigned int ref_sse, tst_sse; + const unsigned int ref_res = params_.ref_func( + CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse); + unsigned int tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre), + pre_stride, wsrc, mask, + &tst_sse)); + + ASSERT_EQ(ref_res, tst_res); + ASSERT_EQ(ref_sse, tst_sse); + } +} + +#if HAVE_SSE4_1 +#if CONFIG_MOTION_VAR +ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = { +#if CONFIG_EXT_PARTITION + TestFuncs(aom_highbd_obmc_variance128x128_c, + aom_highbd_obmc_variance128x128_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance128x64_c, + aom_highbd_obmc_variance128x64_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance64x128_c, + aom_highbd_obmc_variance64x128_sse4_1, 8), +#endif // CONFIG_EXT_PARTITION + TestFuncs(aom_highbd_obmc_variance64x64_c, + aom_highbd_obmc_variance64x64_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance64x32_c, + aom_highbd_obmc_variance64x32_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance32x64_c, + aom_highbd_obmc_variance32x64_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance32x32_c, + aom_highbd_obmc_variance32x32_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance32x16_c, + aom_highbd_obmc_variance32x16_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance16x32_c, + aom_highbd_obmc_variance16x32_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance16x16_c, + aom_highbd_obmc_variance16x16_sse4_1, 8), + TestFuncs(aom_highbd_obmc_variance16x8_c, aom_highbd_obmc_variance16x8_sse4_1, + 8), + TestFuncs(aom_highbd_obmc_variance8x16_c, aom_highbd_obmc_variance8x16_sse4_1, + 8), + TestFuncs(aom_highbd_obmc_variance8x8_c, aom_highbd_obmc_variance8x8_sse4_1, + 8), + TestFuncs(aom_highbd_obmc_variance8x4_c, aom_highbd_obmc_variance8x4_sse4_1, + 8), + TestFuncs(aom_highbd_obmc_variance4x8_c, aom_highbd_obmc_variance4x8_sse4_1, + 8), + TestFuncs(aom_highbd_obmc_variance4x4_c, aom_highbd_obmc_variance4x4_sse4_1, + 8), +#if CONFIG_EXT_PARTITION + TestFuncs(aom_highbd_10_obmc_variance128x128_c, + aom_highbd_10_obmc_variance128x128_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance128x64_c, + aom_highbd_10_obmc_variance128x64_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance64x128_c, + aom_highbd_10_obmc_variance64x128_sse4_1, 10), +#endif // CONFIG_EXT_PARTITION + TestFuncs(aom_highbd_10_obmc_variance64x64_c, + aom_highbd_10_obmc_variance64x64_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance64x32_c, + aom_highbd_10_obmc_variance64x32_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance32x64_c, + aom_highbd_10_obmc_variance32x64_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance32x32_c, + aom_highbd_10_obmc_variance32x32_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance32x16_c, + aom_highbd_10_obmc_variance32x16_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance16x32_c, + aom_highbd_10_obmc_variance16x32_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance16x16_c, + aom_highbd_10_obmc_variance16x16_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance16x8_c, + aom_highbd_10_obmc_variance16x8_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance8x16_c, + aom_highbd_10_obmc_variance8x16_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance8x8_c, + aom_highbd_10_obmc_variance8x8_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance8x4_c, + aom_highbd_10_obmc_variance8x4_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance4x8_c, + aom_highbd_10_obmc_variance4x8_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance4x4_c, + aom_highbd_10_obmc_variance4x4_sse4_1, 10), +#if CONFIG_EXT_PARTITION + TestFuncs(aom_highbd_12_obmc_variance128x128_c, + aom_highbd_12_obmc_variance128x128_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance128x64_c, + aom_highbd_12_obmc_variance128x64_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance64x128_c, + aom_highbd_12_obmc_variance64x128_sse4_1, 12), +#endif // CONFIG_EXT_PARTITION + TestFuncs(aom_highbd_12_obmc_variance64x64_c, + aom_highbd_12_obmc_variance64x64_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance64x32_c, + aom_highbd_12_obmc_variance64x32_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance32x64_c, + aom_highbd_12_obmc_variance32x64_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance32x32_c, + aom_highbd_12_obmc_variance32x32_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance32x16_c, + aom_highbd_12_obmc_variance32x16_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance16x32_c, + aom_highbd_12_obmc_variance16x32_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance16x16_c, + aom_highbd_12_obmc_variance16x16_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance16x8_c, + aom_highbd_12_obmc_variance16x8_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance8x16_c, + aom_highbd_12_obmc_variance8x16_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance8x8_c, + aom_highbd_12_obmc_variance8x8_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance8x4_c, + aom_highbd_12_obmc_variance8x4_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance4x8_c, + aom_highbd_12_obmc_variance4x8_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance4x4_c, + aom_highbd_12_obmc_variance4x4_sse4_1, 12) +}; + +INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceHBDTest, + ::testing::ValuesIn(sse4_functions_hbd)); +#endif // CONFIG_MOTION_VAR +#endif // HAVE_SSE4_1 +#endif // CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/partial_idct_test.cc b/third_party/aom/test/partial_idct_test.cc new file mode 100644 index 000000000..0899b60c3 --- /dev/null +++ b/third_party/aom/test/partial_idct_test.cc @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <limits> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/blockd.h" +#include "av1/common/scan.h" +#include "aom/aom_integer.h" +#include "aom_ports/aom_timer.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); +typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); +typedef void (*InvTxfmWithBdFunc)(const tran_low_t *in, uint8_t *out, + int stride, int bd); + +template <InvTxfmFunc fn> +void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) { + (void)bd; + fn(in, out, stride); +} + +#if CONFIG_HIGHBITDEPTH +template <InvTxfmWithBdFunc fn> +void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) { + fn(in, CONVERT_TO_BYTEPTR(out), stride, bd); +} +#endif + +typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc, + TX_SIZE, int, int, int> + PartialInvTxfmParam; +const int kMaxNumCoeffs = 1024; +const int kCountTestBlock = 1000; + +class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> { + public: + virtual ~PartialIDctTest() {} + virtual void SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); + ftxfm_ = GET_PARAM(0); + full_itxfm_ = GET_PARAM(1); + partial_itxfm_ = GET_PARAM(2); + tx_size_ = GET_PARAM(3); + last_nonzero_ = GET_PARAM(4); + bit_depth_ = GET_PARAM(5); + pixel_size_ = GET_PARAM(6); + mask_ = (1 << bit_depth_) - 1; + + switch (tx_size_) { + case TX_4X4: size_ = 4; break; + case TX_8X8: size_ = 8; break; + case TX_16X16: size_ = 16; break; + case TX_32X32: size_ = 32; break; + default: FAIL() << "Wrong Size!"; break; + } + + // Randomize stride_ to a value less than or equal to 1024 + stride_ = rnd_(1024) + 1; + if (stride_ < size_) { + stride_ = size_; + } + // Align stride_ to 16 if it's bigger than 16. + if (stride_ > 16) { + stride_ &= ~15; + } + + input_block_size_ = size_ * size_; + output_block_size_ = size_ * stride_; + + input_block_ = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(*input_block_) * input_block_size_)); + output_block_ = reinterpret_cast<uint8_t *>( + aom_memalign(16, pixel_size_ * output_block_size_)); + output_block_ref_ = reinterpret_cast<uint8_t *>( + aom_memalign(16, pixel_size_ * output_block_size_)); + } + + virtual void TearDown() { + aom_free(input_block_); + input_block_ = NULL; + aom_free(output_block_); + output_block_ = NULL; + aom_free(output_block_ref_); + output_block_ref_ = NULL; + libaom_test::ClearSystemState(); + } + + void InitMem() { + memset(input_block_, 0, sizeof(*input_block_) * input_block_size_); + if (pixel_size_ == 1) { + for (int j = 0; j < output_block_size_; ++j) { + output_block_[j] = output_block_ref_[j] = rnd_.Rand16() & mask_; + } + } else { + ASSERT_EQ(2, pixel_size_); + uint16_t *const output = reinterpret_cast<uint16_t *>(output_block_); + uint16_t *const output_ref = + reinterpret_cast<uint16_t *>(output_block_ref_); + for (int j = 0; j < output_block_size_; ++j) { + output[j] = output_ref[j] = rnd_.Rand16() & mask_; + } + } + } + + void InitInput() { + const int max_coeff = 32766 / 4; + int max_energy_leftover = max_coeff * max_coeff; + for (int j = 0; j < last_nonzero_; ++j) { + int16_t coeff = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) * + (rnd_.Rand16() - 32768) / 65536); + max_energy_leftover -= coeff * coeff; + if (max_energy_leftover < 0) { + max_energy_leftover = 0; + coeff = 0; + } + input_block_[av1_default_scan_orders[tx_size_].scan[j]] = coeff; + } + } + + protected: + int last_nonzero_; + TX_SIZE tx_size_; + tran_low_t *input_block_; + uint8_t *output_block_; + uint8_t *output_block_ref_; + int size_; + int stride_; + int pixel_size_; + int input_block_size_; + int output_block_size_; + int bit_depth_; + int mask_; + FwdTxfmFunc ftxfm_; + InvTxfmWithBdFunc full_itxfm_; + InvTxfmWithBdFunc partial_itxfm_; + ACMRandom rnd_; +}; + +TEST_P(PartialIDctTest, RunQuantCheck) { + DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]); + + InitMem(); + for (int i = 0; i < kCountTestBlock; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + if (i == 0) { + for (int k = 0; k < input_block_size_; ++k) { + input_extreme_block[k] = mask_; + } + } else if (i == 1) { + for (int k = 0; k < input_block_size_; ++k) { + input_extreme_block[k] = -mask_; + } + } else { + for (int k = 0; k < input_block_size_; ++k) { + input_extreme_block[k] = rnd_.Rand8() % 2 ? mask_ : -mask_; + } + } + + ftxfm_(input_extreme_block, output_ref_block, size_); + + // quantization with minimum allowed step sizes + input_block_[0] = (output_ref_block[0] / 4) * 4; + for (int k = 1; k < last_nonzero_; ++k) { + const int pos = av1_default_scan_orders[tx_size_].scan[k]; + input_block_[pos] = (output_ref_block[pos] / 4) * 4; + } + + ASM_REGISTER_STATE_CHECK( + full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_)); + ASM_REGISTER_STATE_CHECK( + partial_itxfm_(input_block_, output_block_, stride_, bit_depth_)); + ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, + pixel_size_ * output_block_size_)) + << "Error: partial inverse transform produces different results"; + } +} + +TEST_P(PartialIDctTest, ResultsMatch) { + for (int i = 0; i < kCountTestBlock; ++i) { + InitMem(); + InitInput(); + + ASM_REGISTER_STATE_CHECK( + full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_)); + ASM_REGISTER_STATE_CHECK( + partial_itxfm_(input_block_, output_block_, stride_, bit_depth_)); + ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, + pixel_size_ * output_block_size_)) + << "Error: partial inverse transform produces different results"; + } +} + +TEST_P(PartialIDctTest, AddOutputBlock) { + for (int i = 0; i < kCountTestBlock; ++i) { + InitMem(); + for (int j = 0; j < last_nonzero_; ++j) { + input_block_[av1_default_scan_orders[tx_size_].scan[j]] = 10; + } + + ASM_REGISTER_STATE_CHECK( + full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_)); + ASM_REGISTER_STATE_CHECK( + partial_itxfm_(input_block_, output_block_, stride_, bit_depth_)); + ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, + pixel_size_ * output_block_size_)) + << "Error: Transform results are not correctly added to output."; + } +} + +TEST_P(PartialIDctTest, SingleExtremeCoeff) { + const int16_t max_coeff = std::numeric_limits<int16_t>::max(); + const int16_t min_coeff = std::numeric_limits<int16_t>::min(); + for (int i = 0; i < last_nonzero_; ++i) { + memset(input_block_, 0, sizeof(*input_block_) * input_block_size_); + // Run once for min and once for max. + for (int j = 0; j < 2; ++j) { + const int coeff = j ? min_coeff : max_coeff; + + memset(output_block_, 0, pixel_size_ * output_block_size_); + memset(output_block_ref_, 0, pixel_size_ * output_block_size_); + input_block_[av1_default_scan_orders[tx_size_].scan[i]] = coeff; + + ASM_REGISTER_STATE_CHECK( + full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_)); + ASM_REGISTER_STATE_CHECK( + partial_itxfm_(input_block_, output_block_, stride_, bit_depth_)); + ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, + pixel_size_ * output_block_size_)) + << "Error: Fails with single coeff of " << coeff << " at " << i + << "."; + } + } +} + +TEST_P(PartialIDctTest, DISABLED_Speed) { + // Keep runtime stable with transform size. + const int kCountSpeedTestBlock = 500000000 / input_block_size_; + InitMem(); + InitInput(); + + for (int i = 0; i < kCountSpeedTestBlock; ++i) { + ASM_REGISTER_STATE_CHECK( + full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_)); + } + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < kCountSpeedTestBlock; ++i) { + partial_itxfm_(input_block_, output_block_, stride_, bit_depth_); + } + libaom_test::ClearSystemState(); + aom_usec_timer_mark(&timer); + const int elapsed_time = + static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000); + printf("idct%dx%d_%d (bitdepth %d) time: %5d ms\n", size_, size_, + last_nonzero_, bit_depth_, elapsed_time); + + ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, + pixel_size_ * output_block_size_)) + << "Error: partial inverse transform produces different results"; +} + +using std::tr1::make_tuple; + +const PartialInvTxfmParam c_partial_idct_tests[] = { +#if CONFIG_HIGHBITDEPTH + make_tuple(&aom_highbd_fdct4x4_c, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 8, 2), + make_tuple(&aom_highbd_fdct4x4_c, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 10, 2), + make_tuple(&aom_highbd_fdct4x4_c, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 12, 2), + make_tuple(&aom_highbd_fdct4x4_c, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 8, 2), + make_tuple(&aom_highbd_fdct4x4_c, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 10, 2), + make_tuple(&aom_highbd_fdct4x4_c, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 12, 2), +#endif // CONFIG_HIGHBITDEPTH + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_135_add_c>, TX_32X32, 135, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_34_add_c>, TX_32X32, 34, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1_add_c>, TX_32X32, 1, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_256_add_c>, TX_16X16, 256, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_38_add_c>, TX_16X16, 38, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_10_add_c>, TX_16X16, 10, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_1_add_c>, TX_16X16, 1, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_64_add_c>, TX_8X8, 64, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_12_add_c>, TX_8X8, 12, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_1_add_c>, TX_8X8, 1, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_16_add_c>, TX_4X4, 16, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_1_add_c>, TX_4X4, 1, 8, 1) +}; + +INSTANTIATE_TEST_CASE_P(C, PartialIDctTest, + ::testing::ValuesIn(c_partial_idct_tests)); + +#if HAVE_NEON && !CONFIG_HIGHBITDEPTH +const PartialInvTxfmParam neon_partial_idct_tests[] = { + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1_add_neon>, TX_32X32, 1, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_10_add_neon>, TX_16X16, 10, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_1_add_neon>, TX_16X16, 1, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_12_add_neon>, TX_8X8, 12, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_1_add_neon>, TX_8X8, 1, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_1_add_neon>, TX_4X4, 1, 8, 1) +}; + +INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest, + ::testing::ValuesIn(neon_partial_idct_tests)); +#endif // HAVE_NEON && !CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 +const PartialInvTxfmParam sse2_partial_idct_tests[] = { +#if CONFIG_HIGHBITDEPTH + make_tuple(&aom_highbd_fdct4x4_c, + &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 2), + make_tuple( + &aom_highbd_fdct4x4_c, &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 10, 2), + make_tuple( + &aom_highbd_fdct4x4_c, &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, + &highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 12, 2), +#endif // CONFIG_HIGHBITDEPTH + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1024_add_sse2>, TX_32X32, 1024, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1024_add_sse2>, TX_32X32, 135, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_34_add_sse2>, TX_32X32, 34, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_1_add_sse2>, TX_16X16, 1, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_1_add_sse2>, TX_8X8, 1, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_1_add_sse2>, TX_4X4, 1, 8, 1) +}; + +INSTANTIATE_TEST_CASE_P(SSE2, PartialIDctTest, + ::testing::ValuesIn(sse2_partial_idct_tests)); + +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +const PartialInvTxfmParam ssse3_partial_idct_tests[] = { + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1024_add_ssse3>, TX_32X32, 1024, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_135_add_ssse3>, TX_32X32, 135, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_34_add_ssse3>, TX_32X32, 34, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_64_add_ssse3>, TX_8X8, 64, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_12_add_ssse3>, TX_8X8, 12, 8, 1) +}; + +INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest, + ::testing::ValuesIn(ssse3_partial_idct_tests)); +#endif // HAVE_SSSE3 + +#if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH +const PartialInvTxfmParam dspr2_partial_idct_tests[] = { + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1024_add_dspr2>, TX_32X32, 1024, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1024_add_dspr2>, TX_32X32, 135, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_34_add_dspr2>, TX_32X32, 34, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1_add_dspr2>, TX_32X32, 1, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_256_add_dspr2>, TX_16X16, 256, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_10_add_dspr2>, TX_16X16, 10, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_1_add_dspr2>, TX_16X16, 1, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_64_add_dspr2>, TX_8X8, 64, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_12_add_dspr2>, TX_8X8, 12, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_1_add_dspr2>, TX_8X8, 1, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_16_add_dspr2>, TX_4X4, 16, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_1_add_dspr2>, TX_4X4, 1, 8, 1) +}; + +INSTANTIATE_TEST_CASE_P(DSPR2, PartialIDctTest, + ::testing::ValuesIn(dspr2_partial_idct_tests)); +#endif // HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH + +#if HAVE_MSA && !CONFIG_HIGHBITDEPTH +const PartialInvTxfmParam msa_partial_idct_tests[] = { + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1024_add_msa>, TX_32X32, 1024, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1024_add_msa>, TX_32X32, 135, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_34_add_msa>, TX_32X32, 34, 8, 1), + make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>, + &wrapper<aom_idct32x32_1_add_msa>, TX_32X32, 1, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_256_add_msa>, TX_16X16, 256, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_10_add_msa>, TX_16X16, 10, 8, 1), + make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>, + &wrapper<aom_idct16x16_1_add_msa>, TX_16X16, 1, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_64_add_msa>, TX_8X8, 64, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_12_add_msa>, TX_8X8, 12, 8, 1), + make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>, + &wrapper<aom_idct8x8_1_add_msa>, TX_8X8, 1, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_16_add_msa>, TX_4X4, 16, 8, 1), + make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>, + &wrapper<aom_idct4x4_1_add_msa>, TX_4X4, 1, 8, 1) +}; + +INSTANTIATE_TEST_CASE_P(MSA, PartialIDctTest, + ::testing::ValuesIn(msa_partial_idct_tests)); +#endif // HAVE_MSA && !CONFIG_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/quantize_test.cc b/third_party/aom/test/quantize_test.cc new file mode 100644 index 000000000..4f61484a2 --- /dev/null +++ b/third_party/aom/test/quantize_test.cc @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "vp8/common/blockd.h" +#include "vp8/common/onyx.h" +#include "vp8/encoder/block.h" +#include "vp8/encoder/onyx_int.h" +#include "vp8/encoder/quantize.h" +#include "aom/aom_integer.h" +#include "aom_mem/aom_mem.h" + +namespace { +#if !CONFIG_AOM_QM + +const int kNumBlocks = 25; +const int kNumBlockEntries = 16; + +typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d); + +typedef std::tr1::tuple<VP8Quantize, VP8Quantize> VP8QuantizeParam; + +using libaom_test::ACMRandom; +using std::tr1::make_tuple; + +// Create and populate a VP8_COMP instance which has a complete set of +// quantization inputs as well as a second MACROBLOCKD for output. +class QuantizeTestBase { + public: + virtual ~QuantizeTestBase() { + vp8_remove_compressor(&vp8_comp_); + vp8_comp_ = NULL; + aom_free(macroblockd_dst_); + macroblockd_dst_ = NULL; + libaom_test::ClearSystemState(); + } + + protected: + void SetupCompressor() { + rnd_.Reset(ACMRandom::DeterministicSeed()); + + // The full configuration is necessary to generate the quantization tables. + VP8_CONFIG vp8_config; + memset(&vp8_config, 0, sizeof(vp8_config)); + + vp8_comp_ = vp8_create_compressor(&vp8_config); + + // Set the tables based on a quantizer of 0. + vp8_set_quantizer(vp8_comp_, 0); + + // Set up all the block/blockd pointers for the mb in vp8_comp_. + vp8cx_frame_init_quantizer(vp8_comp_); + + // Copy macroblockd from the reference to get pre-set-up dequant values. + macroblockd_dst_ = reinterpret_cast<MACROBLOCKD *>( + aom_memalign(32, sizeof(*macroblockd_dst_))); + memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_)); + // Fix block pointers - currently they point to the blocks in the reference + // structure. + vp8_setup_block_dptrs(macroblockd_dst_); + } + + void UpdateQuantizer(int q) { + vp8_set_quantizer(vp8_comp_, q); + + memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_)); + vp8_setup_block_dptrs(macroblockd_dst_); + } + + void FillCoeffConstant(int16_t c) { + for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) { + vp8_comp_->mb.coeff[i] = c; + } + } + + void FillCoeffRandom() { + for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) { + vp8_comp_->mb.coeff[i] = rnd_.Rand8(); + } + } + + void CheckOutput() { + EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.qcoeff, macroblockd_dst_->qcoeff, + sizeof(*macroblockd_dst_->qcoeff) * kNumBlocks * + kNumBlockEntries)) + << "qcoeff mismatch"; + EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.dqcoeff, macroblockd_dst_->dqcoeff, + sizeof(*macroblockd_dst_->dqcoeff) * kNumBlocks * + kNumBlockEntries)) + << "dqcoeff mismatch"; + EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.eobs, macroblockd_dst_->eobs, + sizeof(*macroblockd_dst_->eobs) * kNumBlocks)) + << "eobs mismatch"; + } + + VP8_COMP *vp8_comp_; + MACROBLOCKD *macroblockd_dst_; + + private: + ACMRandom rnd_; +}; + +class QuantizeTest : public QuantizeTestBase, + public ::testing::TestWithParam<VP8QuantizeParam> { + protected: + virtual void SetUp() { + SetupCompressor(); + asm_quant_ = GET_PARAM(0); + c_quant_ = GET_PARAM(1); + } + + void RunComparison() { + for (int i = 0; i < kNumBlocks; ++i) { + ASM_REGISTER_STATE_CHECK( + c_quant_(&vp8_comp_->mb.block[i], &vp8_comp_->mb.e_mbd.block[i])); + ASM_REGISTER_STATE_CHECK( + asm_quant_(&vp8_comp_->mb.block[i], ¯oblockd_dst_->block[i])); + } + + CheckOutput(); + } + + private: + VP8Quantize asm_quant_; + VP8Quantize c_quant_; +}; + +TEST_P(QuantizeTest, TestZeroInput) { + FillCoeffConstant(0); + RunComparison(); +} + +TEST_P(QuantizeTest, TestLargeNegativeInput) { + FillCoeffConstant(0); + // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues + // like BUG=883 where the constant being compared was incorrectly initialized. + vp8_comp_->mb.coeff[0] = -8191; + RunComparison(); +} + +TEST_P(QuantizeTest, TestRandomInput) { + FillCoeffRandom(); + RunComparison(); +} + +TEST_P(QuantizeTest, TestMultipleQ) { + for (int q = 0; q < QINDEX_RANGE; ++q) { + UpdateQuantizer(q); + FillCoeffRandom(); + RunComparison(); + } +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, QuantizeTest, + ::testing::Values( + make_tuple(&vp8_fast_quantize_b_sse2, &vp8_fast_quantize_b_c), + make_tuple(&vp8_regular_quantize_b_sse2, &vp8_regular_quantize_b_c))); +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest, + ::testing::Values(make_tuple(&vp8_fast_quantize_b_ssse3, + &vp8_fast_quantize_b_c))); +#endif // HAVE_SSSE3 + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P( + SSE4_1, QuantizeTest, + ::testing::Values(make_tuple(&vp8_regular_quantize_b_sse4_1, + &vp8_regular_quantize_b_c))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest, + ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon, + &vp8_fast_quantize_b_c))); +#endif // HAVE_NEON + +#if HAVE_MSA +INSTANTIATE_TEST_CASE_P( + MSA, QuantizeTest, + ::testing::Values( + make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c), + make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c))); +#endif // HAVE_MSA +#endif // CONFIG_AOM_QM +} // namespace diff --git a/third_party/aom/test/realtime_test.cc b/third_party/aom/test/realtime_test.cc new file mode 100644 index 000000000..ffe4a3146 --- /dev/null +++ b/third_party/aom/test/realtime_test.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/video_source.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +const int kVideoSourceWidth = 320; +const int kVideoSourceHeight = 240; +const int kFramesToEncode = 2; + +class RealtimeTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> { + protected: + RealtimeTest() : EncoderTest(GET_PARAM(0)), frame_packets_(0) {} + virtual ~RealtimeTest() {} + + virtual void SetUp() { + InitializeConfig(); + cfg_.g_lag_in_frames = 0; + SetMode(::libaom_test::kRealTime); + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + // TODO(tomfinegan): We're changing the pass value here to make sure + // we get frames when real time mode is combined with |g_pass| set to + // AOM_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets + // the pass value based on the mode passed into EncoderTest::SetMode(), + // which overrides the one specified in SetUp() above. + cfg_.g_pass = AOM_RC_FIRST_PASS; + } + virtual void FramePktHook(const aom_codec_cx_pkt_t * /*pkt*/) { + frame_packets_++; + } + + int frame_packets_; +}; + +TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) { + ::libaom_test::RandomVideoSource video; + video.SetSize(kVideoSourceWidth, kVideoSourceHeight); + video.set_limit(kFramesToEncode); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_EQ(kFramesToEncode, frame_packets_); +} + +AV1_INSTANTIATE_TEST_CASE(RealtimeTest, + ::testing::Values(::libaom_test::kRealTime)); + +} // namespace diff --git a/third_party/aom/test/register_state_check.h b/third_party/aom/test/register_state_check.h new file mode 100644 index 000000000..330820173 --- /dev/null +++ b/third_party/aom/test/register_state_check.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TEST_REGISTER_STATE_CHECK_H_ +#define TEST_REGISTER_STATE_CHECK_H_ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "./aom_config.h" +#include "aom/aom_integer.h" + +// ASM_REGISTER_STATE_CHECK(asm_function) +// Minimally validates the environment pre & post function execution. This +// variant should be used with assembly functions which are not expected to +// fully restore the system state. See platform implementations of +// RegisterStateCheck for details. +// +// API_REGISTER_STATE_CHECK(api_function) +// Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any +// additional checks to ensure the environment is in a consistent state pre & +// post function execution. This variant should be used with API functions. +// See platform implementations of RegisterStateCheckXXX for details. +// + +#if defined(_WIN64) + +#undef NOMINMAX +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <winnt.h> + +inline bool operator==(const M128A &lhs, const M128A &rhs) { + return (lhs.Low == rhs.Low && lhs.High == rhs.High); +} + +namespace libaom_test { + +// Compares the state of xmm[6-15] at construction with their state at +// destruction. These registers should be preserved by the callee on +// Windows x64. +class RegisterStateCheck { + public: + RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); } + ~RegisterStateCheck() { EXPECT_TRUE(Check()); } + + private: + static bool StoreRegisters(CONTEXT *const context) { + const HANDLE this_thread = GetCurrentThread(); + EXPECT_TRUE(this_thread != NULL); + context->ContextFlags = CONTEXT_FLOATING_POINT; + const bool context_saved = GetThreadContext(this_thread, context) == TRUE; + EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError(); + return context_saved; + } + + // Compares the register state. Returns true if the states match. + bool Check() const { + if (!initialized_) return false; + CONTEXT post_context; + if (!StoreRegisters(&post_context)) return false; + + const M128A *xmm_pre = &pre_context_.Xmm6; + const M128A *xmm_post = &post_context.Xmm6; + for (int i = 6; i <= 15; ++i) { + EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!"; + ++xmm_pre; + ++xmm_post; + } + return !testing::Test::HasNonfatalFailure(); + } + + bool initialized_; + CONTEXT pre_context_; +}; + +#define ASM_REGISTER_STATE_CHECK(statement) \ + do { \ + libaom_test::RegisterStateCheck reg_check; \ + statement; \ + } while (false) + +} // namespace libaom_test + +#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && !CONFIG_SHARED && \ + HAVE_NEON_ASM && CONFIG_AV1 + +extern "C" { +// Save the d8-d15 registers into store. +void aom_push_neon(int64_t *store); +} + +namespace libaom_test { + +// Compares the state of d8-d15 at construction with their state at +// destruction. These registers should be preserved by the callee on +// arm platform. +class RegisterStateCheck { + public: + RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); } + ~RegisterStateCheck() { EXPECT_TRUE(Check()); } + + private: + static bool StoreRegisters(int64_t store[8]) { + aom_push_neon(store); + return true; + } + + // Compares the register state. Returns true if the states match. + bool Check() const { + if (!initialized_) return false; + int64_t post_store[8]; + aom_push_neon(post_store); + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(pre_store_[i], post_store[i]) << "d" << i + 8 + << " has been modified"; + } + return !testing::Test::HasNonfatalFailure(); + } + + bool initialized_; + int64_t pre_store_[8]; +}; + +#define ASM_REGISTER_STATE_CHECK(statement) \ + do { \ + libaom_test::RegisterStateCheck reg_check; \ + statement; \ + } while (false) + +} // namespace libaom_test + +#else + +namespace libaom_test { + +class RegisterStateCheck {}; +#define ASM_REGISTER_STATE_CHECK(statement) statement + +} // namespace libaom_test + +#endif // _WIN64 + +#if ARCH_X86 || ARCH_X86_64 +#if defined(__GNUC__) + +namespace libaom_test { + +// Checks the FPU tag word pre/post execution to ensure emms has been called. +class RegisterStateCheckMMX { + public: + RegisterStateCheckMMX() { + __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_)); + } + ~RegisterStateCheckMMX() { EXPECT_TRUE(Check()); } + + private: + // Checks the FPU tag word pre/post execution, returning false if not cleared + // to 0xffff. + bool Check() const { + EXPECT_EQ(0xffff, pre_fpu_env_[4]) + << "FPU was in an inconsistent state prior to call"; + + uint16_t post_fpu_env[14]; + __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env)); + EXPECT_EQ(0xffff, post_fpu_env[4]) + << "FPU was left in an inconsistent state after call"; + return !testing::Test::HasNonfatalFailure(); + } + + uint16_t pre_fpu_env_[14]; +}; + +#define API_REGISTER_STATE_CHECK(statement) \ + do { \ + libaom_test::RegisterStateCheckMMX reg_check; \ + ASM_REGISTER_STATE_CHECK(statement); \ + } while (false) + +} // namespace libaom_test + +#endif // __GNUC__ +#endif // ARCH_X86 || ARCH_X86_64 + +#ifndef API_REGISTER_STATE_CHECK +#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK +#endif + +#endif // TEST_REGISTER_STATE_CHECK_H_ diff --git a/third_party/aom/test/resize_test.cc b/third_party/aom/test/resize_test.cc new file mode 100644 index 000000000..994b30117 --- /dev/null +++ b/third_party/aom/test/resize_test.cc @@ -0,0 +1,717 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <climits> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/video_source.h" +#include "test/util.h" + +// Enable(1) or Disable(0) writing of the compressed bitstream. +#define WRITE_COMPRESSED_STREAM 0 + +namespace { + +#if WRITE_COMPRESSED_STREAM +static void mem_put_le16(char *const mem, unsigned int val) { + mem[0] = val; + mem[1] = val >> 8; +} + +static void mem_put_le32(char *const mem, unsigned int val) { + mem[0] = val; + mem[1] = val >> 8; + mem[2] = val >> 16; + mem[3] = val >> 24; +} + +static void write_ivf_file_header(const aom_codec_enc_cfg_t *const cfg, + int frame_cnt, FILE *const outfile) { + char header[32]; + + header[0] = 'D'; + header[1] = 'K'; + header[2] = 'I'; + header[3] = 'F'; + mem_put_le16(header + 4, 0); /* version */ + mem_put_le16(header + 6, 32); /* headersize */ + mem_put_le32(header + 8, 0x30395056); /* fourcc (av1) */ + mem_put_le16(header + 12, cfg->g_w); /* width */ + mem_put_le16(header + 14, cfg->g_h); /* height */ + mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ + mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ + mem_put_le32(header + 24, frame_cnt); /* length */ + mem_put_le32(header + 28, 0); /* unused */ + + (void)fwrite(header, 1, 32, outfile); +} + +static void write_ivf_frame_size(FILE *const outfile, const size_t size) { + char header[4]; + mem_put_le32(header, static_cast<unsigned int>(size)); + (void)fwrite(header, 1, 4, outfile); +} + +static void write_ivf_frame_header(const aom_codec_cx_pkt_t *const pkt, + FILE *const outfile) { + char header[12]; + aom_codec_pts_t pts; + + if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return; + + pts = pkt->data.frame.pts; + mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz)); + mem_put_le32(header + 4, pts & 0xFFFFFFFF); + mem_put_le32(header + 8, pts >> 32); + + (void)fwrite(header, 1, 12, outfile); +} +#endif // WRITE_COMPRESSED_STREAM + +const unsigned int kInitialWidth = 320; +const unsigned int kInitialHeight = 240; + +struct FrameInfo { + FrameInfo(aom_codec_pts_t _pts, unsigned int _w, unsigned int _h) + : pts(_pts), w(_w), h(_h) {} + + aom_codec_pts_t pts; + unsigned int w; + unsigned int h; +}; + +void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w, + unsigned int initial_h, unsigned int *w, + unsigned int *h, int flag_codec) { + if (frame < 10) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 20) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 30) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 40) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 50) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 60) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 70) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 80) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 90) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 100) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 110) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 120) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 130) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 140) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 150) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 160) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 170) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 180) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 190) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 200) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 210) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 220) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 230) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 240) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 250) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 260) { + *w = initial_w; + *h = initial_h; + return; + } + // Go down very low. + if (frame < 270) { + *w = initial_w / 4; + *h = initial_h / 4; + return; + } + if (flag_codec == 1) { + // Cases that only works for AV1. + // For AV1: Swap width and height of original. + if (frame < 320) { + *w = initial_h; + *h = initial_w; + return; + } + } + *w = initial_w; + *h = initial_h; +} + +class ResizingVideoSource : public ::libaom_test::DummyVideoSource { + public: + ResizingVideoSource() { + SetSize(kInitialWidth, kInitialHeight); + limit_ = 350; + } + int flag_codec_; + virtual ~ResizingVideoSource() {} + + protected: + virtual void Next() { + ++frame_; + unsigned int width; + unsigned int height; + ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height, + flag_codec_); + SetSize(width, height); + FillFrame(); + } +}; + +class ResizeTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> { + protected: + ResizeTest() : EncoderTest(GET_PARAM(0)) {} + + virtual ~ResizeTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + } + + virtual void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) { + frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); + } + + std::vector<FrameInfo> frame_info_list_; +}; + +TEST_P(ResizeTest, TestExternalResizeWorks) { + ResizingVideoSource video; + video.flag_codec_ = 0; + cfg_.g_lag_in_frames = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const unsigned int frame = static_cast<unsigned>(info->pts); + unsigned int expected_w; + unsigned int expected_h; + ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, + &expected_h, 0); + EXPECT_EQ(expected_w, info->w) << "Frame " << frame + << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) << "Frame " << frame + << " had unexpected height"; + } +} + +const unsigned int kStepDownFrame = 3; +const unsigned int kStepUpFrame = 6; + +class ResizeInternalTest : public ResizeTest { + protected: +#if WRITE_COMPRESSED_STREAM + ResizeInternalTest() + : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {} +#else + ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {} +#endif + + virtual ~ResizeInternalTest() {} + + virtual void BeginPassHook(unsigned int /*pass*/) { +#if WRITE_COMPRESSED_STREAM + outfile_ = fopen("av10-2-05-resize.ivf", "wb"); +#endif + } + + virtual void EndPassHook() { +#if WRITE_COMPRESSED_STREAM + if (outfile_) { + if (!fseek(outfile_, 0, SEEK_SET)) + write_ivf_file_header(&cfg_, out_frames_, outfile_); + fclose(outfile_); + outfile_ = NULL; + } +#endif + } + + virtual void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) { + if (change_config_) { + int new_q = 60; + if (video->frame() == 0) { + struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } + if (video->frame() == 1) { + struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q; + encoder->Config(&cfg_); + } + } else { + if (video->frame() == kStepDownFrame) { + struct aom_scaling_mode mode = { AOME_FOURFIVE, AOME_THREEFIVE }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } + if (video->frame() == kStepUpFrame) { + struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } + } + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0]; + EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0); + } + +#if WRITE_COMPRESSED_STREAM + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + ++out_frames_; + + // Write initial file header if first frame. + if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_); + + // Write frame header and data. + write_ivf_frame_header(pkt, outfile_); + (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); + } +#endif + + double frame0_psnr_; + bool change_config_; +#if WRITE_COMPRESSED_STREAM + FILE *outfile_; + unsigned int out_frames_; +#endif +}; + +TEST_P(ResizeInternalTest, TestInternalResizeWorks) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 10); + init_flags_ = AOM_CODEC_USE_PSNR; + change_config_ = false; + + // q picked such that initial keyframe on this clip is ~30dB PSNR + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; + + // If the number of frames being encoded is smaller than g_lag_in_frames + // the encoded frame is unavailable using the current API. Comparing + // frames to detect mismatch would then not be possible. Set + // g_lag_in_frames = 0 to get around this. + cfg_.g_lag_in_frames = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const aom_codec_pts_t pts = info->pts; + if (pts >= kStepDownFrame && pts < kStepUpFrame) { + ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width"; + ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height"; + } else { + EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width"; + EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height"; + } + } +} + +TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 10); + cfg_.g_w = 352; + cfg_.g_h = 288; + change_config_ = true; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +class ResizeRealtimeTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> { + protected: + ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~ResizeRealtimeTest() {} + + virtual void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_AQ_MODE, 3); + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + } + + if (change_bitrate_ && video->frame() == 120) { + change_bitrate_ = false; + cfg_.rc_target_bitrate = 500; + encoder->Config(&cfg_); + } + } + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + } + + virtual void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) { + frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); + } + + virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) { + double mismatch_psnr = compute_psnr(img1, img2); + mismatch_psnr_ += mismatch_psnr; + ++mismatch_nframes_; + } + + unsigned int GetMismatchFrames() { return mismatch_nframes_; } + + void DefaultConfig() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_end_usage = AOM_CBR; + cfg_.kf_mode = AOM_KF_AUTO; + cfg_.g_lag_in_frames = 0; + cfg_.kf_min_dist = cfg_.kf_max_dist = 3000; + // Enable dropped frames. + cfg_.rc_dropframe_thresh = 1; + // Enable error_resilience mode. + cfg_.g_error_resilient = 1; + // Enable dynamic resizing. + cfg_.rc_resize_allowed = 1; + // Run at low bitrate. + cfg_.rc_target_bitrate = 200; + } + + std::vector<FrameInfo> frame_info_list_; + int set_cpu_used_; + bool change_bitrate_; + double mismatch_psnr_; + int mismatch_nframes_; +}; + +TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) { + ResizingVideoSource video; + video.flag_codec_ = 1; + DefaultConfig(); + // Disable internal resize for this test. + cfg_.rc_resize_allowed = 0; + change_bitrate_ = false; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const unsigned int frame = static_cast<unsigned>(info->pts); + unsigned int expected_w; + unsigned int expected_h; + ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, + &expected_h, 1); + EXPECT_EQ(expected_w, info->w) << "Frame " << frame + << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) << "Frame " << frame + << " had unexpected height"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } +} + +// Verify the dynamic resizer behavior for real time, 1 pass CBR mode. +// Run at low bitrate, with resize_allowed = 1, and verify that we get +// one resize down event. +TEST_P(ResizeRealtimeTest, TestInternalResizeDown) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 299); + DefaultConfig(); + cfg_.g_w = 352; + cfg_.g_h = 288; + change_bitrate_ = false; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + unsigned int last_w = cfg_.g_w; + unsigned int last_h = cfg_.g_h; + int resize_count = 0; + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + if (info->w != last_w || info->h != last_h) { + // Verify that resize down occurs. + ASSERT_LT(info->w, last_w); + ASSERT_LT(info->h, last_h); + last_w = info->w; + last_h = info->h; + resize_count++; + } + } + +#if CONFIG_AV1_DECODER + // Verify that we get 1 resize down event in this test. + ASSERT_EQ(1, resize_count) << "Resizing should occur."; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); +#else + printf("Warning: AV1 decoder unavailable, unable to check resize count!\n"); +#endif +} + +// Verify the dynamic resizer behavior for real time, 1 pass CBR mode. +// Start at low target bitrate, raise the bitrate in the middle of the clip, +// scaling-up should occur after bitrate changed. +TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 359); + DefaultConfig(); + cfg_.g_w = 352; + cfg_.g_h = 288; + change_bitrate_ = true; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + // Disable dropped frames. + cfg_.rc_dropframe_thresh = 0; + // Starting bitrate low. + cfg_.rc_target_bitrate = 80; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + unsigned int last_w = cfg_.g_w; + unsigned int last_h = cfg_.g_h; + int resize_count = 0; + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + if (info->w != last_w || info->h != last_h) { + resize_count++; + if (resize_count == 1) { + // Verify that resize down occurs. + ASSERT_LT(info->w, last_w); + ASSERT_LT(info->h, last_h); + } else if (resize_count == 2) { + // Verify that resize up occurs. + ASSERT_GT(info->w, last_w); + ASSERT_GT(info->h, last_h); + } + last_w = info->w; + last_h = info->h; + } + } + +#if CONFIG_AV1_DECODER + // Verify that we get 2 resize events in this test. + ASSERT_EQ(resize_count, 2) << "Resizing should occur twice."; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); +#else + printf("Warning: AV1 decoder unavailable, unable to check resize count!\n"); +#endif +} + +aom_img_fmt_t CspForFrameNumber(int frame) { + if (frame < 10) return AOM_IMG_FMT_I420; + if (frame < 20) return AOM_IMG_FMT_I444; + return AOM_IMG_FMT_I420; +} + +class ResizeCspTest : public ResizeTest { + protected: +#if WRITE_COMPRESSED_STREAM + ResizeCspTest() + : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {} +#else + ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {} +#endif + + virtual ~ResizeCspTest() {} + + virtual void BeginPassHook(unsigned int /*pass*/) { +#if WRITE_COMPRESSED_STREAM + outfile_ = fopen("av11-2-05-cspchape.ivf", "wb"); +#endif + } + + virtual void EndPassHook() { +#if WRITE_COMPRESSED_STREAM + if (outfile_) { + if (!fseek(outfile_, 0, SEEK_SET)) + write_ivf_file_header(&cfg_, out_frames_, outfile_); + fclose(outfile_); + outfile_ = NULL; + } +#endif + } + + virtual void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) { + if (CspForFrameNumber(video->frame()) != AOM_IMG_FMT_I420 && + cfg_.g_profile != 1) { + cfg_.g_profile = 1; + encoder->Config(&cfg_); + } + if (CspForFrameNumber(video->frame()) == AOM_IMG_FMT_I420 && + cfg_.g_profile != 0) { + cfg_.g_profile = 0; + encoder->Config(&cfg_); + } + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0]; + EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0); + } + +#if WRITE_COMPRESSED_STREAM + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + ++out_frames_; + + // Write initial file header if first frame. + if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_); + + // Write frame header and data. + write_ivf_frame_header(pkt, outfile_); + (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); + } +#endif + + double frame0_psnr_; +#if WRITE_COMPRESSED_STREAM + FILE *outfile_; + unsigned int out_frames_; +#endif +}; + +class ResizingCspVideoSource : public ::libaom_test::DummyVideoSource { + public: + ResizingCspVideoSource() { + SetSize(kInitialWidth, kInitialHeight); + limit_ = 30; + } + + virtual ~ResizingCspVideoSource() {} + + protected: + virtual void Next() { + ++frame_; + SetImageFormat(CspForFrameNumber(frame_)); + FillFrame(); + } +}; + +TEST_P(ResizeCspTest, TestResizeCspWorks) { + ResizingCspVideoSource video; + init_flags_ = AOM_CODEC_USE_PSNR; + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; + cfg_.g_lag_in_frames = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_CASE(ResizeTest, + ::testing::Values(::libaom_test::kRealTime)); +AV1_INSTANTIATE_TEST_CASE(ResizeInternalTest, + ::testing::Values(::libaom_test::kOnePassBest)); +AV1_INSTANTIATE_TEST_CASE(ResizeRealtimeTest, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(5, 9)); +AV1_INSTANTIATE_TEST_CASE(ResizeCspTest, + ::testing::Values(::libaom_test::kRealTime)); +} // namespace diff --git a/third_party/aom/test/sad_test.cc b/third_party/aom/test/sad_test.cc new file mode 100644 index 000000000..c3b5dac42 --- /dev/null +++ b/third_party/aom/test/sad_test.cc @@ -0,0 +1,1172 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string.h> +#include <limits.h> +#include <stdio.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "aom/aom_codec.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/mem.h" + +typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride); +typedef std::tr1::tuple<int, int, SadMxNFunc, int> SadMxNParam; + +typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const uint8_t *second_pred); +typedef std::tr1::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam; + +typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_ptr[], int ref_stride, + uint32_t *sad_array); +typedef std::tr1::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param; + +using libaom_test::ACMRandom; + +namespace { +class SADTestBase : public ::testing::Test { + public: + SADTestBase(int width, int height, int bit_depth) + : width_(width), height_(height), bd_(bit_depth) {} + + static void SetUpTestCase() { + source_data8_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBlockSize)); + reference_data8_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBufferSize)); + second_pred8_ = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128)); + source_data16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, kDataBlockSize * sizeof(uint16_t))); + reference_data16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t))); + second_pred16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t))); + } + + static void TearDownTestCase() { + aom_free(source_data8_); + source_data8_ = NULL; + aom_free(reference_data8_); + reference_data8_ = NULL; + aom_free(second_pred8_); + second_pred8_ = NULL; + aom_free(source_data16_); + source_data16_ = NULL; + aom_free(reference_data16_); + reference_data16_ = NULL; + aom_free(second_pred16_); + second_pred16_ = NULL; + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + // Handle up to 4 128x128 blocks, with stride up to 256 + static const int kDataAlignment = 16; + static const int kDataBlockSize = 128 * 256; + static const int kDataBufferSize = 4 * kDataBlockSize; + + virtual void SetUp() { + if (bd_ == -1) { + use_high_bit_depth_ = false; + bit_depth_ = AOM_BITS_8; + source_data_ = source_data8_; + reference_data_ = reference_data8_; + second_pred_ = second_pred8_; +#if CONFIG_HIGHBITDEPTH + } else { + use_high_bit_depth_ = true; + bit_depth_ = static_cast<aom_bit_depth_t>(bd_); + source_data_ = CONVERT_TO_BYTEPTR(source_data16_); + reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_); + second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_); +#endif // CONFIG_HIGHBITDEPTH + } + mask_ = (1 << bit_depth_) - 1; + source_stride_ = (width_ + 31) & ~31; + reference_stride_ = width_ * 2; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + virtual uint8_t *GetReference(int block_idx) { +#if CONFIG_HIGHBITDEPTH + if (use_high_bit_depth_) + return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) + + block_idx * kDataBlockSize); +#endif // CONFIG_HIGHBITDEPTH + return reference_data_ + block_idx * kDataBlockSize; + } + + // Sum of Absolute Differences. Given two blocks, calculate the absolute + // difference between two pixels in the same relative location; accumulate. + unsigned int ReferenceSAD(int block_idx) { + unsigned int sad = 0; + const uint8_t *const reference8 = GetReference(block_idx); + const uint8_t *const source8 = source_data_; +#if CONFIG_HIGHBITDEPTH + const uint16_t *const reference16 = + CONVERT_TO_SHORTPTR(GetReference(block_idx)); + const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); +#endif // CONFIG_HIGHBITDEPTH + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + sad += abs(source8[h * source_stride_ + w] - + reference8[h * reference_stride_ + w]); +#if CONFIG_HIGHBITDEPTH + } else { + sad += abs(source16[h * source_stride_ + w] - + reference16[h * reference_stride_ + w]); +#endif // CONFIG_HIGHBITDEPTH + } + } + } + return sad; + } + + // Sum of Absolute Differences Average. Given two blocks, and a prediction + // calculate the absolute difference between one pixel and average of the + // corresponding and predicted pixels; accumulate. + unsigned int ReferenceSADavg(int block_idx) { + unsigned int sad = 0; + const uint8_t *const reference8 = GetReference(block_idx); + const uint8_t *const source8 = source_data_; + const uint8_t *const second_pred8 = second_pred_; +#if CONFIG_HIGHBITDEPTH + const uint16_t *const reference16 = + CONVERT_TO_SHORTPTR(GetReference(block_idx)); + const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); + const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_); +#endif // CONFIG_HIGHBITDEPTH + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + const int tmp = second_pred8[h * width_ + w] + + reference8[h * reference_stride_ + w]; + const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1); + sad += abs(source8[h * source_stride_ + w] - comp_pred); +#if CONFIG_HIGHBITDEPTH + } else { + const int tmp = second_pred16[h * width_ + w] + + reference16[h * reference_stride_ + w]; + const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1); + sad += abs(source16[h * source_stride_ + w] - comp_pred); +#endif // CONFIG_HIGHBITDEPTH + } + } + } + return sad; + } + + void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) { + uint8_t *data8 = data; +#if CONFIG_HIGHBITDEPTH + uint16_t *data16 = CONVERT_TO_SHORTPTR(data); +#endif // CONFIG_HIGHBITDEPTH + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + data8[h * stride + w] = static_cast<uint8_t>(fill_constant); +#if CONFIG_HIGHBITDEPTH + } else { + data16[h * stride + w] = fill_constant; +#endif // CONFIG_HIGHBITDEPTH + } + } + } + } + + void FillRandom(uint8_t *data, int stride) { + uint8_t *data8 = data; +#if CONFIG_HIGHBITDEPTH + uint16_t *data16 = CONVERT_TO_SHORTPTR(data); +#endif // CONFIG_HIGHBITDEPTH + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + data8[h * stride + w] = rnd_.Rand8(); +#if CONFIG_HIGHBITDEPTH + } else { + data16[h * stride + w] = rnd_.Rand16() & mask_; +#endif // CONFIG_HIGHBITDEPTH + } + } + } + } + + int width_, height_, mask_, bd_; + aom_bit_depth_t bit_depth_; + static uint8_t *source_data_; + static uint8_t *reference_data_; + static uint8_t *second_pred_; + int source_stride_; + bool use_high_bit_depth_; + static uint8_t *source_data8_; + static uint8_t *reference_data8_; + static uint8_t *second_pred8_; + static uint16_t *source_data16_; + static uint16_t *reference_data16_; + static uint16_t *second_pred16_; + int reference_stride_; + + ACMRandom rnd_; +}; + +class SADx4Test : public SADTestBase, + public ::testing::WithParamInterface<SadMxNx4Param> { + public: + SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + void SADs(unsigned int *results) { + const uint8_t *references[] = { GetReference(0), GetReference(1), + GetReference(2), GetReference(3) }; + + ASM_REGISTER_STATE_CHECK(GET_PARAM(2)( + source_data_, source_stride_, references, reference_stride_, results)); + } + + void CheckSADs() { + unsigned int reference_sad, exp_sad[4]; + + SADs(exp_sad); + for (int block = 0; block < 4; ++block) { + reference_sad = ReferenceSAD(block); + + EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block; + } + } +}; + +class SADTest : public SADTestBase, + public ::testing::WithParamInterface<SadMxNParam> { + public: + SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + unsigned int SAD(int block_idx) { + unsigned int ret; + const uint8_t *const reference = GetReference(block_idx); + + ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_, + reference, reference_stride_)); + return ret; + } + + void CheckSAD() { + const unsigned int reference_sad = ReferenceSAD(0); + const unsigned int exp_sad = SAD(0); + + ASSERT_EQ(reference_sad, exp_sad); + } + + void SpeedSAD() { + int test_count = 20000000; + while (test_count > 0) { + SAD(0); + test_count -= 1; + } + } +}; + +class SADavgTest : public SADTestBase, + public ::testing::WithParamInterface<SadMxNAvgParam> { + public: + SADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + unsigned int SAD_avg(int block_idx) { + unsigned int ret; + const uint8_t *const reference = GetReference(block_idx); + + ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_, + reference, reference_stride_, + second_pred_)); + return ret; + } + + void CheckSAD() { + const unsigned int reference_sad = ReferenceSADavg(0); + const unsigned int exp_sad = SAD_avg(0); + + ASSERT_EQ(reference_sad, exp_sad); + } +}; + +uint8_t *SADTestBase::source_data_ = NULL; +uint8_t *SADTestBase::reference_data_ = NULL; +uint8_t *SADTestBase::second_pred_ = NULL; +uint8_t *SADTestBase::source_data8_ = NULL; +uint8_t *SADTestBase::reference_data8_ = NULL; +uint8_t *SADTestBase::second_pred8_ = NULL; +uint16_t *SADTestBase::source_data16_ = NULL; +uint16_t *SADTestBase::reference_data16_ = NULL; +uint16_t *SADTestBase::second_pred16_ = NULL; + +TEST_P(SADTest, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(reference_data_, reference_stride_, mask_); + CheckSAD(); +} + +TEST_P(SADTest, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(reference_data_, reference_stride_, 0); + CheckSAD(); +} + +TEST_P(SADTest, ShortRef) { + const int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADTest, ShortSrc) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 2000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +#define SPEED_TEST (0) +#if SPEED_TEST +TEST_P(SADTest, Speed) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + SpeedSAD(); + source_stride_ = tmp_stride; +} +#endif + +TEST_P(SADavgTest, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(reference_data_, reference_stride_, mask_); + FillConstant(second_pred_, width_, 0); + CheckSAD(); +} +TEST_P(SADavgTest, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(reference_data_, reference_stride_, 0); + FillConstant(second_pred_, width_, 0); + CheckSAD(); +} + +TEST_P(SADavgTest, ShortRef) { + const int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADavgTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADavgTest, ShortSrc) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 2000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +TEST_P(SADx4Test, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(GetReference(0), reference_stride_, mask_); + FillConstant(GetReference(1), reference_stride_, mask_); + FillConstant(GetReference(2), reference_stride_, mask_); + FillConstant(GetReference(3), reference_stride_, mask_); + CheckSADs(); +} + +TEST_P(SADx4Test, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(GetReference(0), reference_stride_, 0); + FillConstant(GetReference(1), reference_stride_, 0); + FillConstant(GetReference(2), reference_stride_, 0); + FillConstant(GetReference(3), reference_stride_, 0); + CheckSADs(); +} + +TEST_P(SADx4Test, ShortRef) { + int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADx4Test, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADx4Test, ShortSrc) { + int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 1000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +TEST_P(SADx4Test, SrcAlignedByWidth) { + uint8_t *tmp_source_data = source_data_; + source_data_ += width_; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + source_data_ = tmp_source_data; +} + +using std::tr1::make_tuple; + +//------------------------------------------------------------------------------ +// C functions +const SadMxNParam c_tests[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_sad128x128_c, -1), + make_tuple(128, 64, &aom_sad128x64_c, -1), + make_tuple(64, 128, &aom_sad64x128_c, -1), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_sad64x64_c, -1), + make_tuple(64, 32, &aom_sad64x32_c, -1), + make_tuple(32, 64, &aom_sad32x64_c, -1), + make_tuple(32, 32, &aom_sad32x32_c, -1), + make_tuple(32, 16, &aom_sad32x16_c, -1), + make_tuple(16, 32, &aom_sad16x32_c, -1), + make_tuple(16, 16, &aom_sad16x16_c, -1), + make_tuple(16, 8, &aom_sad16x8_c, -1), + make_tuple(8, 16, &aom_sad8x16_c, -1), + make_tuple(8, 8, &aom_sad8x8_c, -1), + make_tuple(8, 4, &aom_sad8x4_c, -1), + make_tuple(4, 8, &aom_sad4x8_c, -1), + make_tuple(4, 4, &aom_sad4x4_c, -1), +#if CONFIG_HIGHBITDEPTH +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128_c, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_c, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_c, 8), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64_c, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_c, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_c, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_c, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_c, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_c, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_c, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_c, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_c, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_c, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_c, 8), + make_tuple(4, 8, &aom_highbd_sad4x8_c, 8), + make_tuple(4, 4, &aom_highbd_sad4x4_c, 8), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128_c, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_c, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_c, 10), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64_c, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_c, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_c, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_c, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_c, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_c, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_c, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_c, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_c, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_c, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_c, 10), + make_tuple(4, 8, &aom_highbd_sad4x8_c, 10), + make_tuple(4, 4, &aom_highbd_sad4x4_c, 10), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128_c, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_c, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_c, 12), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64_c, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_c, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_c, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_c, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_c, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_c, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_c, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_c, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_c, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_c, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_c, 12), + make_tuple(4, 8, &aom_highbd_sad4x8_c, 12), + make_tuple(4, 4, &aom_highbd_sad4x4_c, 12), +#endif // CONFIG_HIGHBITDEPTH +}; +INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests)); + +const SadMxNAvgParam avg_c_tests[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_sad128x128_avg_c, -1), + make_tuple(128, 64, &aom_sad128x64_avg_c, -1), + make_tuple(64, 128, &aom_sad64x128_avg_c, -1), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_sad64x64_avg_c, -1), + make_tuple(64, 32, &aom_sad64x32_avg_c, -1), + make_tuple(32, 64, &aom_sad32x64_avg_c, -1), + make_tuple(32, 32, &aom_sad32x32_avg_c, -1), + make_tuple(32, 16, &aom_sad32x16_avg_c, -1), + make_tuple(16, 32, &aom_sad16x32_avg_c, -1), + make_tuple(16, 16, &aom_sad16x16_avg_c, -1), + make_tuple(16, 8, &aom_sad16x8_avg_c, -1), + make_tuple(8, 16, &aom_sad8x16_avg_c, -1), + make_tuple(8, 8, &aom_sad8x8_avg_c, -1), + make_tuple(8, 4, &aom_sad8x4_avg_c, -1), + make_tuple(4, 8, &aom_sad4x8_avg_c, -1), + make_tuple(4, 4, &aom_sad4x4_avg_c, -1), +#if CONFIG_HIGHBITDEPTH +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 8), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 8), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 8), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 8), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 10), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 10), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 10), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 10), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 12), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 12), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 12), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 12), +#endif // CONFIG_HIGHBITDEPTH +}; +INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests)); + +const SadMxNx4Param x4d_c_tests[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_sad128x128x4d_c, -1), + make_tuple(128, 64, &aom_sad128x64x4d_c, -1), + make_tuple(64, 128, &aom_sad64x128x4d_c, -1), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_sad64x64x4d_c, -1), + make_tuple(64, 32, &aom_sad64x32x4d_c, -1), + make_tuple(32, 64, &aom_sad32x64x4d_c, -1), + make_tuple(32, 32, &aom_sad32x32x4d_c, -1), + make_tuple(32, 16, &aom_sad32x16x4d_c, -1), + make_tuple(16, 32, &aom_sad16x32x4d_c, -1), + make_tuple(16, 16, &aom_sad16x16x4d_c, -1), + make_tuple(16, 8, &aom_sad16x8x4d_c, -1), + make_tuple(8, 16, &aom_sad8x16x4d_c, -1), + make_tuple(8, 8, &aom_sad8x8x4d_c, -1), + make_tuple(8, 4, &aom_sad8x4x4d_c, -1), + make_tuple(4, 8, &aom_sad4x8x4d_c, -1), + make_tuple(4, 4, &aom_sad4x4x4d_c, -1), +#if CONFIG_HIGHBITDEPTH +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 8), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 8), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 8), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 8), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 8), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 8), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 8), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 8), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 8), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 10), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 10), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 10), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 10), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 10), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 10), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 10), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 10), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 10), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 12), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 12), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 12), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 12), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 12), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 12), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 12), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 12), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 12), +#endif // CONFIG_HIGHBITDEPTH +}; +INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests)); + +//------------------------------------------------------------------------------ +// ARM functions +#if HAVE_MEDIA +const SadMxNParam media_tests[] = { + make_tuple(16, 16, &aom_sad16x16_media, -1), +}; +INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::ValuesIn(media_tests)); +#endif // HAVE_MEDIA + +#if HAVE_NEON +const SadMxNParam neon_tests[] = { + make_tuple(64, 64, &aom_sad64x64_neon, -1), + make_tuple(32, 32, &aom_sad32x32_neon, -1), + make_tuple(16, 16, &aom_sad16x16_neon, -1), + make_tuple(16, 8, &aom_sad16x8_neon, -1), + make_tuple(8, 16, &aom_sad8x16_neon, -1), + make_tuple(8, 8, &aom_sad8x8_neon, -1), + make_tuple(4, 4, &aom_sad4x4_neon, -1), +}; +INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests)); + +const SadMxNx4Param x4d_neon_tests[] = { + make_tuple(64, 64, &aom_sad64x64x4d_neon, -1), + make_tuple(32, 32, &aom_sad32x32x4d_neon, -1), + make_tuple(16, 16, &aom_sad16x16x4d_neon, -1), +}; +INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests)); +#endif // HAVE_NEON + +//------------------------------------------------------------------------------ +// x86 functions +#if HAVE_SSE2 +const SadMxNParam sse2_tests[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_sad128x128_sse2, -1), + make_tuple(128, 64, &aom_sad128x64_sse2, -1), + make_tuple(64, 128, &aom_sad64x128_sse2, -1), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_sad64x64_sse2, -1), + make_tuple(64, 32, &aom_sad64x32_sse2, -1), + make_tuple(32, 64, &aom_sad32x64_sse2, -1), + make_tuple(32, 32, &aom_sad32x32_sse2, -1), + make_tuple(32, 16, &aom_sad32x16_sse2, -1), + make_tuple(16, 32, &aom_sad16x32_sse2, -1), + make_tuple(16, 16, &aom_sad16x16_sse2, -1), + make_tuple(16, 8, &aom_sad16x8_sse2, -1), + make_tuple(8, 16, &aom_sad8x16_sse2, -1), + make_tuple(8, 8, &aom_sad8x8_sse2, -1), + make_tuple(8, 4, &aom_sad8x4_sse2, -1), + make_tuple(4, 8, &aom_sad4x8_sse2, -1), + make_tuple(4, 4, &aom_sad4x4_sse2, -1), +#if CONFIG_HIGHBITDEPTH + make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 12), +#endif // CONFIG_HIGHBITDEPTH +}; +INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); + +const SadMxNAvgParam avg_sse2_tests[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_sad128x128_avg_sse2, -1), + make_tuple(128, 64, &aom_sad128x64_avg_sse2, -1), + make_tuple(64, 128, &aom_sad64x128_avg_sse2, -1), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_sad64x64_avg_sse2, -1), + make_tuple(64, 32, &aom_sad64x32_avg_sse2, -1), + make_tuple(32, 64, &aom_sad32x64_avg_sse2, -1), + make_tuple(32, 32, &aom_sad32x32_avg_sse2, -1), + make_tuple(32, 16, &aom_sad32x16_avg_sse2, -1), + make_tuple(16, 32, &aom_sad16x32_avg_sse2, -1), + make_tuple(16, 16, &aom_sad16x16_avg_sse2, -1), + make_tuple(16, 8, &aom_sad16x8_avg_sse2, -1), + make_tuple(8, 16, &aom_sad8x16_avg_sse2, -1), + make_tuple(8, 8, &aom_sad8x8_avg_sse2, -1), + make_tuple(8, 4, &aom_sad8x4_avg_sse2, -1), + make_tuple(4, 8, &aom_sad4x8_avg_sse2, -1), + make_tuple(4, 4, &aom_sad4x4_avg_sse2, -1), +#if CONFIG_HIGHBITDEPTH + make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 12), +#endif // CONFIG_HIGHBITDEPTH +}; +INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests)); + +const SadMxNx4Param x4d_sse2_tests[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_sad128x128x4d_sse2, -1), + make_tuple(128, 64, &aom_sad128x64x4d_sse2, -1), + make_tuple(64, 128, &aom_sad64x128x4d_sse2, -1), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(64, 64, &aom_sad64x64x4d_sse2, -1), + make_tuple(64, 32, &aom_sad64x32x4d_sse2, -1), + make_tuple(32, 64, &aom_sad32x64x4d_sse2, -1), + make_tuple(32, 32, &aom_sad32x32x4d_sse2, -1), + make_tuple(32, 16, &aom_sad32x16x4d_sse2, -1), + make_tuple(16, 32, &aom_sad16x32x4d_sse2, -1), + make_tuple(16, 16, &aom_sad16x16x4d_sse2, -1), + make_tuple(16, 8, &aom_sad16x8x4d_sse2, -1), + make_tuple(8, 16, &aom_sad8x16x4d_sse2, -1), + make_tuple(8, 8, &aom_sad8x8x4d_sse2, -1), + make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1), + make_tuple(4, 8, &aom_sad4x8x4d_sse2, -1), + make_tuple(4, 4, &aom_sad4x4x4d_sse2, -1), +#if CONFIG_HIGHBITDEPTH + make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 8), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 8), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 8), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 8), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 8), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 10), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 10), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 10), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 10), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 10), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 12), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 12), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 12), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 12), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 12), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 12), +#endif // CONFIG_HIGHBITDEPTH +}; +INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests)); +#endif // HAVE_SSE2 + +#if HAVE_SSE3 +// Only functions are x3, which do not have tests. +#endif // HAVE_SSE3 + +#if HAVE_SSSE3 +// Only functions are x3, which do not have tests. +#endif // HAVE_SSSE3 + +#if HAVE_SSE4_1 +// Only functions are x8, which do not have tests. +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +const SadMxNParam avx2_tests[] = { +#if CONFIG_EXT_PARTITION + make_tuple(64, 128, &aom_sad64x128_avx2, -1), + make_tuple(128, 64, &aom_sad128x64_avx2, -1), + make_tuple(128, 128, &aom_sad128x128_avx2, -1), +#endif + make_tuple(64, 64, &aom_sad64x64_avx2, -1), + make_tuple(64, 32, &aom_sad64x32_avx2, -1), + make_tuple(32, 64, &aom_sad32x64_avx2, -1), + make_tuple(32, 32, &aom_sad32x32_avx2, -1), + make_tuple(32, 16, &aom_sad32x16_avx2, -1), +#if CONFIG_HIGHBITDEPTH +#if CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 8), + make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 10), + make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 12), +#endif + make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 12), +#endif +}; +INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests)); + +const SadMxNAvgParam avg_avx2_tests[] = { +#if CONFIG_EXT_PARTITION + make_tuple(64, 128, &aom_sad64x128_avg_avx2, -1), + make_tuple(128, 64, &aom_sad128x64_avg_avx2, -1), + make_tuple(128, 128, &aom_sad128x128_avg_avx2, -1), +#endif + make_tuple(64, 64, &aom_sad64x64_avg_avx2, -1), + make_tuple(64, 32, &aom_sad64x32_avg_avx2, -1), + make_tuple(32, 64, &aom_sad32x64_avg_avx2, -1), + make_tuple(32, 32, &aom_sad32x32_avg_avx2, -1), + make_tuple(32, 16, &aom_sad32x16_avg_avx2, -1), +#if CONFIG_HIGHBITDEPTH +#if CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 8), + make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 10), + make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 12), +#endif + make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 12), +#endif +}; +INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests)); + +const SadMxNx4Param x4d_avx2_tests[] = { +#if CONFIG_EXT_PARTITION + make_tuple(64, 128, &aom_sad64x128x4d_avx2, -1), + make_tuple(128, 64, &aom_sad128x64x4d_avx2, -1), + make_tuple(128, 128, &aom_sad128x128x4d_avx2, -1), +#endif + make_tuple(64, 64, &aom_sad64x64x4d_avx2, -1), + make_tuple(32, 64, &aom_sad32x64x4d_avx2, -1), + make_tuple(64, 32, &aom_sad64x32x4d_avx2, -1), + make_tuple(32, 32, &aom_sad32x32x4d_avx2, -1), +#if CONFIG_HIGHBITDEPTH +#if CONFIG_EXT_PARTITION + make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 8), + make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 10), + make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 12), +#endif + make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 12), +#endif +}; +INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests)); +#endif // HAVE_AVX2 + +//------------------------------------------------------------------------------ +// MIPS functions +#if HAVE_MSA +const SadMxNParam msa_tests[] = { + make_tuple(64, 64, &aom_sad64x64_msa, -1), + make_tuple(64, 32, &aom_sad64x32_msa, -1), + make_tuple(32, 64, &aom_sad32x64_msa, -1), + make_tuple(32, 32, &aom_sad32x32_msa, -1), + make_tuple(32, 16, &aom_sad32x16_msa, -1), + make_tuple(16, 32, &aom_sad16x32_msa, -1), + make_tuple(16, 16, &aom_sad16x16_msa, -1), + make_tuple(16, 8, &aom_sad16x8_msa, -1), + make_tuple(8, 16, &aom_sad8x16_msa, -1), + make_tuple(8, 8, &aom_sad8x8_msa, -1), + make_tuple(8, 4, &aom_sad8x4_msa, -1), + make_tuple(4, 8, &aom_sad4x8_msa, -1), + make_tuple(4, 4, &aom_sad4x4_msa, -1), +}; +INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests)); + +const SadMxNAvgParam avg_msa_tests[] = { + make_tuple(64, 64, &aom_sad64x64_avg_msa, -1), + make_tuple(64, 32, &aom_sad64x32_avg_msa, -1), + make_tuple(32, 64, &aom_sad32x64_avg_msa, -1), + make_tuple(32, 32, &aom_sad32x32_avg_msa, -1), + make_tuple(32, 16, &aom_sad32x16_avg_msa, -1), + make_tuple(16, 32, &aom_sad16x32_avg_msa, -1), + make_tuple(16, 16, &aom_sad16x16_avg_msa, -1), + make_tuple(16, 8, &aom_sad16x8_avg_msa, -1), + make_tuple(8, 16, &aom_sad8x16_avg_msa, -1), + make_tuple(8, 8, &aom_sad8x8_avg_msa, -1), + make_tuple(8, 4, &aom_sad8x4_avg_msa, -1), + make_tuple(4, 8, &aom_sad4x8_avg_msa, -1), + make_tuple(4, 4, &aom_sad4x4_avg_msa, -1), +}; +INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests)); + +const SadMxNx4Param x4d_msa_tests[] = { + make_tuple(64, 64, &aom_sad64x64x4d_msa, -1), + make_tuple(64, 32, &aom_sad64x32x4d_msa, -1), + make_tuple(32, 64, &aom_sad32x64x4d_msa, -1), + make_tuple(32, 32, &aom_sad32x32x4d_msa, -1), + make_tuple(32, 16, &aom_sad32x16x4d_msa, -1), + make_tuple(16, 32, &aom_sad16x32x4d_msa, -1), + make_tuple(16, 16, &aom_sad16x16x4d_msa, -1), + make_tuple(16, 8, &aom_sad16x8x4d_msa, -1), + make_tuple(8, 16, &aom_sad8x16x4d_msa, -1), + make_tuple(8, 8, &aom_sad8x8x4d_msa, -1), + make_tuple(8, 4, &aom_sad8x4x4d_msa, -1), + make_tuple(4, 8, &aom_sad4x8x4d_msa, -1), + make_tuple(4, 4, &aom_sad4x4x4d_msa, -1), +}; +INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests)); +#endif // HAVE_MSA + +} // namespace diff --git a/third_party/aom/test/scan_test.cc b/third_party/aom/test/scan_test.cc new file mode 100644 index 000000000..16c831c8e --- /dev/null +++ b/third_party/aom/test/scan_test.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "av1/common/common_data.h" +#include "av1/common/scan.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +TEST(ScanTest, av1_augment_prob) { + const TX_SIZE tx_size = TX_4X4; + const TX_TYPE tx_type = DCT_DCT; + const int tx1d_size = tx_size_wide[tx_size]; + uint32_t prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 }; + const uint32_t ref_prob[16] = { + 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 + }; + av1_augment_prob(tx_size, tx_type, prob); + for (int r = 0; r < tx1d_size; ++r) { + for (int c = 0; c < tx1d_size; ++c) { + const uint32_t idx = r * tx1d_size + c; + EXPECT_EQ(ref_prob[idx], prob[idx] >> 16); + } + } + + const SCAN_ORDER *sc = get_default_scan(tx_size, tx_type, 0); + const uint32_t mask = (1 << 16) - 1; + for (int r = 0; r < tx1d_size; ++r) { + for (int c = 0; c < tx1d_size; ++c) { + const uint32_t ref_idx = r * tx1d_size + c; + const uint32_t scan_idx = mask ^ (prob[r * tx1d_size + c] & mask); + const uint32_t idx = sc->scan[scan_idx]; + EXPECT_EQ(ref_idx, idx); + } + } +} + +TEST(ScanTest, av1_update_sort_order) { + const TX_SIZE tx_size = TX_4X4; + const TX_TYPE tx_type = DCT_DCT; + const uint32_t prob[16] = { 15, 14, 11, 10, 13, 12, 9, 5, + 8, 7, 4, 2, 6, 3, 1, 0 }; + const int16_t ref_sort_order[16] = { 0, 1, 4, 5, 2, 3, 6, 8, + 9, 12, 7, 10, 13, 11, 14, 15 }; + int16_t sort_order[16]; + av1_update_sort_order(tx_size, tx_type, prob, sort_order); + for (int i = 0; i < 16; ++i) EXPECT_EQ(ref_sort_order[i], sort_order[i]); +} + +TEST(ScanTest, av1_update_scan_order) { + TX_SIZE tx_size = TX_4X4; + const TX_TYPE tx_type = DCT_DCT; + const uint32_t prob[16] = { 10, 12, 14, 9, 11, 13, 15, 5, + 8, 7, 4, 2, 6, 3, 1, 0 }; + int16_t sort_order[16]; + int16_t scan[16]; + int16_t iscan[16]; + const int16_t ref_iscan[16] = { 0, 1, 2, 6, 3, 4, 5, 10, + 7, 8, 11, 13, 9, 12, 14, 15 }; + + av1_update_sort_order(tx_size, tx_type, prob, sort_order); + av1_update_scan_order(tx_size, sort_order, scan, iscan); + + for (int i = 0; i < 16; ++i) { + EXPECT_EQ(ref_iscan[i], iscan[i]); + EXPECT_EQ(i, scan[ref_iscan[i]]); + } +} + +TEST(ScanTest, av1_update_neighbors) { + TX_SIZE tx_size = TX_4X4; + // raster order + const int16_t scan[16] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 }; + int16_t nb[(16 + 1) * 2]; + const int16_t ref_nb[(16 + 1) * 2] = { 0, 0, 0, 0, 1, 1, 2, 2, 0, + 1, 1, 4, 2, 5, 3, 6, 4, 5, + 5, 8, 6, 9, 7, 10, 8, 9, 9, + 12, 10, 13, 11, 14, 0, 0 }; + + // raster order's scan and iscan are the same + av1_update_neighbors(tx_size, scan, scan, nb); + + for (int i = 0; i < (16 + 1) * 2; ++i) { + EXPECT_EQ(ref_nb[i], nb[i]); + } +} + +} // namespace diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc new file mode 100644 index 000000000..e87fe339a --- /dev/null +++ b/third_party/aom/test/selfguided_filter_test.cc @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <ctime> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./av1_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "av1/common/mv.h" +#include "av1/common/restoration.h" + +namespace { + +using std::tr1::tuple; +using std::tr1::make_tuple; +using libaom_test::ACMRandom; + +typedef tuple<> FilterTestParam; + +class AV1SelfguidedFilterTest + : public ::testing::TestWithParam<FilterTestParam> { + public: + virtual ~AV1SelfguidedFilterTest() {} + virtual void SetUp() {} + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunSpeedTest() { + const int w = 256, h = 256; + const int NUM_ITERS = 2000; + int i, j; + + uint8_t *input = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t)); + uint8_t *output = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t)); + int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & 0xFF; + + int xqd[2] = { + SGRPROJ_PRJ_MIN0 + + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) + }; + // Fix a parameter set, since the speed depends slightly on r. + // Change this to test different combinations of values of r. + int eps = 15; + + av1_loop_restoration_precal(); + + std::clock_t start = std::clock(); + for (i = 0; i < NUM_ITERS; ++i) { + apply_selfguided_restoration(input, w, h, w, eps, xqd, output, w, tmpbuf); + } + std::clock_t end = std::clock(); + double elapsed = ((end - start) / (double)CLOCKS_PER_SEC); + + printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h, + elapsed, elapsed * 1000000. / NUM_ITERS); + + aom_free(input); + aom_free(output); + aom_free(tmpbuf); + } + + void RunCorrectnessTest() { + // Set the maximum width/height to test here. We actually test a small + // range of sizes *up to* this size, so that we can check, eg., + // the behaviour on tiles which are not a multiple of 4 wide. + const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; + const int NUM_ITERS = 81; + int i, j, k; + + uint8_t *input = + (uint8_t *)aom_memalign(16, stride * max_h * sizeof(uint8_t)); + uint8_t *output = + (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t)); + uint8_t *output2 = + (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t)); + int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + av1_loop_restoration_precal(); + + for (i = 0; i < NUM_ITERS; ++i) { + for (j = 0; j < max_h; ++j) + for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & 0xFF; + + int xqd[2] = { + SGRPROJ_PRJ_MIN0 + + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) + }; + int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); + + // Test various tile sizes around 256x256 + int test_w = max_w - (i / 9); + int test_h = max_h - (i % 9); + + apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd, + output, out_stride, tmpbuf); + apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd, + output2, out_stride, tmpbuf); + for (j = 0; j < test_h; ++j) + for (k = 0; k < test_w; ++k) + ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); + } + + aom_free(input); + aom_free(output); + aom_free(output2); + aom_free(tmpbuf); + } +}; + +TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); } +TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } + +const FilterTestParam params[] = { make_tuple() }; + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest, + ::testing::ValuesIn(params)); +#endif + +#if CONFIG_HIGHBITDEPTH + +typedef tuple<int> HighbdFilterTestParam; + +class AV1HighbdSelfguidedFilterTest + : public ::testing::TestWithParam<HighbdFilterTestParam> { + public: + virtual ~AV1HighbdSelfguidedFilterTest() {} + virtual void SetUp() {} + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + void RunSpeedTest() { + const int w = 256, h = 256; + const int NUM_ITERS = 2000; + int i, j; + int bit_depth = GET_PARAM(0); + int mask = (1 << bit_depth) - 1; + + uint16_t *input = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t)); + uint16_t *output = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t)); + int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & mask; + + int xqd[2] = { + SGRPROJ_PRJ_MIN0 + + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) + }; + // Fix a parameter set, since the speed depends slightly on r. + // Change this to test different combinations of values of r. + int eps = 15; + + av1_loop_restoration_precal(); + + std::clock_t start = std::clock(); + for (i = 0; i < NUM_ITERS; ++i) { + apply_selfguided_restoration_highbd(input, w, h, w, bit_depth, eps, xqd, + output, w, tmpbuf); + } + std::clock_t end = std::clock(); + double elapsed = ((end - start) / (double)CLOCKS_PER_SEC); + + printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h, + elapsed, elapsed * 1000000. / NUM_ITERS); + + aom_free(input); + aom_free(output); + aom_free(tmpbuf); + } + + void RunCorrectnessTest() { + // Set the maximum width/height to test here. We actually test a small + // range of sizes *up to* this size, so that we can check, eg., + // the behaviour on tiles which are not a multiple of 4 wide. + const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; + const int NUM_ITERS = 81; + int i, j, k; + int bit_depth = GET_PARAM(0); + int mask = (1 << bit_depth) - 1; + + uint16_t *input = + (uint16_t *)aom_memalign(16, stride * max_h * sizeof(uint16_t)); + uint16_t *output = + (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t)); + uint16_t *output2 = + (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t)); + int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE); + memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + av1_loop_restoration_precal(); + + for (i = 0; i < NUM_ITERS; ++i) { + for (j = 0; j < max_h; ++j) + for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & mask; + + int xqd[2] = { + SGRPROJ_PRJ_MIN0 + + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1) + }; + int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); + + // Test various tile sizes around 256x256 + int test_w = max_w - (i / 9); + int test_h = max_h - (i % 9); + + apply_selfguided_restoration_highbd(input, test_w, test_h, stride, + bit_depth, eps, xqd, output, + out_stride, tmpbuf); + apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride, + bit_depth, eps, xqd, output2, + out_stride, tmpbuf); + for (j = 0; j < test_h; ++j) + for (k = 0; k < test_w; ++k) + ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); + } + + aom_free(input); + aom_free(output); + aom_free(output2); + aom_free(tmpbuf); + } +}; + +TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); } +TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } + +const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10), + make_tuple(12) }; + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest, + ::testing::ValuesIn(highbd_params)); +#endif +#endif + +} // namespace diff --git a/third_party/aom/test/set_maps.sh b/third_party/aom/test/set_maps.sh new file mode 100755 index 000000000..4f59b06d6 --- /dev/null +++ b/third_party/aom/test/set_maps.sh @@ -0,0 +1,52 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom set_maps example. To add new tests to this file, +## do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to set_maps_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required, and set_maps must exist in +# $LIBAOM_BIN_PATH. +set_maps_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ -z "$(aom_tool_path set_maps)" ]; then + elog "set_maps not found. It must exist in LIBAOM_BIN_PATH or its parent." + return 1 + fi +} + +# Runs set_maps using the codec specified by $1. +set_maps() { + local encoder="$(aom_tool_path set_maps)" + local codec="$1" + local output_file="${AOM_TEST_OUTPUT_DIR}/set_maps_${codec}.ivf" + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ + ${devnull} + + [ -e "${output_file}" ] || return 1 +} + +set_maps_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + set_maps av1 || return 1 + fi +} + +set_maps_tests="set_maps_av1" + +run_tests set_maps_verify_environment "${set_maps_tests}" diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h new file mode 100644 index 000000000..28bd64a5b --- /dev/null +++ b/third_party/aom/test/simd_cmp_impl.h @@ -0,0 +1,1212 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <assert.h> +#include <string> +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "aom_dsp/aom_simd.h" +#undef SIMD_INLINE +#define SIMD_INLINE static // Don't enforce inlining +#include "aom_dsp/simd/v128_intrinsics_c.h" + +// Machine tuned code goes into this file. This file is included from +// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros +// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX(). + +using libaom_test::ACMRandom; + +namespace SIMD_NAMESPACE { + +// Wrap templates around intrinsics using immediate values +template <int shift> +v64 imm_v64_shl_n_byte(v64 a) { + return v64_shl_n_byte(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_byte(v64 a) { + return v64_shr_n_byte(a, shift); +} +template <int shift> +v64 imm_v64_shl_n_8(v64 a) { + return v64_shl_n_8(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_u8(v64 a) { + return v64_shr_n_u8(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_s8(v64 a) { + return v64_shr_n_s8(a, shift); +} +template <int shift> +v64 imm_v64_shl_n_16(v64 a) { + return v64_shl_n_16(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_u16(v64 a) { + return v64_shr_n_u16(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_s16(v64 a) { + return v64_shr_n_s16(a, shift); +} +template <int shift> +v64 imm_v64_shl_n_32(v64 a) { + return v64_shl_n_32(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_u32(v64 a) { + return v64_shr_n_u32(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_s32(v64 a) { + return v64_shr_n_s32(a, shift); +} +template <int shift> +v64 imm_v64_align(v64 a, v64 b) { + return v64_align(a, b, shift); +} + +// Wrap templates around corresponding C implementations of the above +template <int shift> +c_v64 c_imm_v64_shl_n_byte(c_v64 a) { + return c_v64_shl_n_byte(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_byte(c_v64 a) { + return c_v64_shr_n_byte(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shl_n_8(c_v64 a) { + return c_v64_shl_n_8(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_u8(c_v64 a) { + return c_v64_shr_n_u8(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_s8(c_v64 a) { + return c_v64_shr_n_s8(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shl_n_16(c_v64 a) { + return c_v64_shl_n_16(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_u16(c_v64 a) { + return c_v64_shr_n_u16(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_s16(c_v64 a) { + return c_v64_shr_n_s16(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shl_n_32(c_v64 a) { + return c_v64_shl_n_32(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_u32(c_v64 a) { + return c_v64_shr_n_u32(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_s32(c_v64 a) { + return c_v64_shr_n_s32(a, shift); +} +template <int shift> +c_v64 c_imm_v64_align(c_v64 a, c_v64 b) { + return c_v64_align(a, b, shift); +} + +template <int shift> +v128 imm_v128_shl_n_byte(v128 a) { + return v128_shl_n_byte(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_byte(v128 a) { + return v128_shr_n_byte(a, shift); +} +template <int shift> +v128 imm_v128_shl_n_8(v128 a) { + return v128_shl_n_8(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_u8(v128 a) { + return v128_shr_n_u8(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_s8(v128 a) { + return v128_shr_n_s8(a, shift); +} +template <int shift> +v128 imm_v128_shl_n_16(v128 a) { + return v128_shl_n_16(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_u16(v128 a) { + return v128_shr_n_u16(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_s16(v128 a) { + return v128_shr_n_s16(a, shift); +} +template <int shift> +v128 imm_v128_shl_n_32(v128 a) { + return v128_shl_n_32(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_u32(v128 a) { + return v128_shr_n_u32(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_s32(v128 a) { + return v128_shr_n_s32(a, shift); +} +template <int shift> +v128 imm_v128_align(v128 a, v128 b) { + return v128_align(a, b, shift); +} + +template <int shift> +c_v128 c_imm_v128_shl_n_byte(c_v128 a) { + return c_v128_shl_n_byte(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_byte(c_v128 a) { + return c_v128_shr_n_byte(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shl_n_8(c_v128 a) { + return c_v128_shl_n_8(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_u8(c_v128 a) { + return c_v128_shr_n_u8(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_s8(c_v128 a) { + return c_v128_shr_n_s8(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shl_n_16(c_v128 a) { + return c_v128_shl_n_16(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_u16(c_v128 a) { + return c_v128_shr_n_u16(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_s16(c_v128 a) { + return c_v128_shr_n_s16(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shl_n_32(c_v128 a) { + return c_v128_shl_n_32(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_u32(c_v128 a) { + return c_v128_shr_n_u32(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_s32(c_v128 a) { + return c_v128_shr_n_s32(a, shift); +} +template <int shift> +c_v128 c_imm_v128_align(c_v128 a, c_v128 b) { + return c_v128_align(a, b, shift); +} + +// Wrappers around the the SAD and SSD functions +uint32_t v64_sad_u8(v64 a, v64 b) { + return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b)); +} +uint32_t v64_ssd_u8(v64 a, v64 b) { + return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b)); +} + +uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) { + return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b)); +} +uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) { + return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b)); +} +uint32_t v128_sad_u8(v128 a, v128 b) { + return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a, b)); +} +uint32_t v128_ssd_u8(v128 a, v128 b) { + return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a, b)); +} +uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) { + return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a, b)); +} +uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) { + return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b)); +} + +namespace { + +typedef void (*fptr)(); + +typedef struct { + const char *name; + fptr ref; + fptr simd; +} mapping; + +#define MAP(name) \ + { \ + #name, reinterpret_cast < fptr > (c_##name), \ + reinterpret_cast < fptr > (name) \ + } + +const mapping m[] = { MAP(v64_sad_u8), + MAP(v64_ssd_u8), + MAP(v64_add_8), + MAP(v64_add_16), + MAP(v64_sadd_s16), + MAP(v64_add_32), + MAP(v64_sub_8), + MAP(v64_ssub_u8), + MAP(v64_ssub_s8), + MAP(v64_sub_16), + MAP(v64_ssub_s16), + MAP(v64_ssub_u16), + MAP(v64_sub_32), + MAP(v64_ziplo_8), + MAP(v64_ziphi_8), + MAP(v64_ziplo_16), + MAP(v64_ziphi_16), + MAP(v64_ziplo_32), + MAP(v64_ziphi_32), + MAP(v64_pack_s32_s16), + MAP(v64_pack_s16_u8), + MAP(v64_pack_s16_s8), + MAP(v64_unziphi_8), + MAP(v64_unziplo_8), + MAP(v64_unziphi_16), + MAP(v64_unziplo_16), + MAP(v64_or), + MAP(v64_xor), + MAP(v64_and), + MAP(v64_andn), + MAP(v64_mullo_s16), + MAP(v64_mulhi_s16), + MAP(v64_mullo_s32), + MAP(v64_madd_s16), + MAP(v64_madd_us8), + MAP(v64_avg_u8), + MAP(v64_rdavg_u8), + MAP(v64_avg_u16), + MAP(v64_min_u8), + MAP(v64_max_u8), + MAP(v64_min_s8), + MAP(v64_max_s8), + MAP(v64_min_s16), + MAP(v64_max_s16), + MAP(v64_cmpgt_s8), + MAP(v64_cmplt_s8), + MAP(v64_cmpeq_8), + MAP(v64_cmpgt_s16), + MAP(v64_cmplt_s16), + MAP(v64_cmpeq_16), + MAP(v64_shuffle_8), + MAP(imm_v64_align<1>), + MAP(imm_v64_align<2>), + MAP(imm_v64_align<3>), + MAP(imm_v64_align<4>), + MAP(imm_v64_align<5>), + MAP(imm_v64_align<6>), + MAP(imm_v64_align<7>), + MAP(v64_abs_s8), + MAP(v64_abs_s16), + MAP(v64_unpacklo_u8_s16), + MAP(v64_unpackhi_u8_s16), + MAP(v64_unpacklo_s8_s16), + MAP(v64_unpackhi_s8_s16), + MAP(v64_unpacklo_u16_s32), + MAP(v64_unpacklo_s16_s32), + MAP(v64_unpackhi_u16_s32), + MAP(v64_unpackhi_s16_s32), + MAP(imm_v64_shr_n_byte<1>), + MAP(imm_v64_shr_n_byte<2>), + MAP(imm_v64_shr_n_byte<3>), + MAP(imm_v64_shr_n_byte<4>), + MAP(imm_v64_shr_n_byte<5>), + MAP(imm_v64_shr_n_byte<6>), + MAP(imm_v64_shr_n_byte<7>), + MAP(imm_v64_shl_n_byte<1>), + MAP(imm_v64_shl_n_byte<2>), + MAP(imm_v64_shl_n_byte<3>), + MAP(imm_v64_shl_n_byte<4>), + MAP(imm_v64_shl_n_byte<5>), + MAP(imm_v64_shl_n_byte<6>), + MAP(imm_v64_shl_n_byte<7>), + MAP(imm_v64_shl_n_8<1>), + MAP(imm_v64_shl_n_8<2>), + MAP(imm_v64_shl_n_8<3>), + MAP(imm_v64_shl_n_8<4>), + MAP(imm_v64_shl_n_8<5>), + MAP(imm_v64_shl_n_8<6>), + MAP(imm_v64_shl_n_8<7>), + MAP(imm_v64_shr_n_u8<1>), + MAP(imm_v64_shr_n_u8<2>), + MAP(imm_v64_shr_n_u8<3>), + MAP(imm_v64_shr_n_u8<4>), + MAP(imm_v64_shr_n_u8<5>), + MAP(imm_v64_shr_n_u8<6>), + MAP(imm_v64_shr_n_u8<7>), + MAP(imm_v64_shr_n_s8<1>), + MAP(imm_v64_shr_n_s8<2>), + MAP(imm_v64_shr_n_s8<3>), + MAP(imm_v64_shr_n_s8<4>), + MAP(imm_v64_shr_n_s8<5>), + MAP(imm_v64_shr_n_s8<6>), + MAP(imm_v64_shr_n_s8<7>), + MAP(imm_v64_shl_n_16<1>), + MAP(imm_v64_shl_n_16<2>), + MAP(imm_v64_shl_n_16<4>), + MAP(imm_v64_shl_n_16<6>), + MAP(imm_v64_shl_n_16<8>), + MAP(imm_v64_shl_n_16<10>), + MAP(imm_v64_shl_n_16<12>), + MAP(imm_v64_shl_n_16<14>), + MAP(imm_v64_shr_n_u16<1>), + MAP(imm_v64_shr_n_u16<2>), + MAP(imm_v64_shr_n_u16<4>), + MAP(imm_v64_shr_n_u16<6>), + MAP(imm_v64_shr_n_u16<8>), + MAP(imm_v64_shr_n_u16<10>), + MAP(imm_v64_shr_n_u16<12>), + MAP(imm_v64_shr_n_u16<14>), + MAP(imm_v64_shr_n_s16<1>), + MAP(imm_v64_shr_n_s16<2>), + MAP(imm_v64_shr_n_s16<4>), + MAP(imm_v64_shr_n_s16<6>), + MAP(imm_v64_shr_n_s16<8>), + MAP(imm_v64_shr_n_s16<10>), + MAP(imm_v64_shr_n_s16<12>), + MAP(imm_v64_shr_n_s16<14>), + MAP(imm_v64_shl_n_32<1>), + MAP(imm_v64_shl_n_32<4>), + MAP(imm_v64_shl_n_32<8>), + MAP(imm_v64_shl_n_32<12>), + MAP(imm_v64_shl_n_32<16>), + MAP(imm_v64_shl_n_32<20>), + MAP(imm_v64_shl_n_32<24>), + MAP(imm_v64_shl_n_32<28>), + MAP(imm_v64_shr_n_u32<1>), + MAP(imm_v64_shr_n_u32<4>), + MAP(imm_v64_shr_n_u32<8>), + MAP(imm_v64_shr_n_u32<12>), + MAP(imm_v64_shr_n_u32<16>), + MAP(imm_v64_shr_n_u32<20>), + MAP(imm_v64_shr_n_u32<24>), + MAP(imm_v64_shr_n_u32<28>), + MAP(imm_v64_shr_n_s32<1>), + MAP(imm_v64_shr_n_s32<4>), + MAP(imm_v64_shr_n_s32<8>), + MAP(imm_v64_shr_n_s32<12>), + MAP(imm_v64_shr_n_s32<16>), + MAP(imm_v64_shr_n_s32<20>), + MAP(imm_v64_shr_n_s32<24>), + MAP(imm_v64_shr_n_s32<28>), + MAP(v64_shl_8), + MAP(v64_shr_u8), + MAP(v64_shr_s8), + MAP(v64_shl_16), + MAP(v64_shr_u16), + MAP(v64_shr_s16), + MAP(v64_shl_32), + MAP(v64_shr_u32), + MAP(v64_shr_s32), + MAP(v64_hadd_u8), + MAP(v64_hadd_s16), + MAP(v64_dotp_s16), + MAP(v64_dotp_su8), + MAP(v64_u64), + MAP(v64_low_u32), + MAP(v64_high_u32), + MAP(v64_low_s32), + MAP(v64_high_s32), + MAP(v64_dup_8), + MAP(v64_dup_16), + MAP(v64_dup_32), + MAP(v64_from_32), + MAP(v64_zero), + MAP(v64_from_16), + MAP(v128_sad_u8), + MAP(v128_ssd_u8), + MAP(v128_add_8), + MAP(v128_add_16), + MAP(v128_sadd_s16), + MAP(v128_add_32), + MAP(v128_sub_8), + MAP(v128_ssub_u8), + MAP(v128_ssub_s8), + MAP(v128_sub_16), + MAP(v128_ssub_s16), + MAP(v128_ssub_u16), + MAP(v128_sub_32), + MAP(v128_ziplo_8), + MAP(v128_ziphi_8), + MAP(v128_ziplo_16), + MAP(v128_ziphi_16), + MAP(v128_ziplo_32), + MAP(v128_ziphi_32), + MAP(v128_ziplo_64), + MAP(v128_ziphi_64), + MAP(v128_unziphi_8), + MAP(v128_unziplo_8), + MAP(v128_unziphi_16), + MAP(v128_unziplo_16), + MAP(v128_unziphi_32), + MAP(v128_unziplo_32), + MAP(v128_pack_s32_s16), + MAP(v128_pack_s16_u8), + MAP(v128_pack_s16_s8), + MAP(v128_or), + MAP(v128_xor), + MAP(v128_and), + MAP(v128_andn), + MAP(v128_mullo_s16), + MAP(v128_mulhi_s16), + MAP(v128_mullo_s32), + MAP(v128_madd_s16), + MAP(v128_madd_us8), + MAP(v128_avg_u8), + MAP(v128_rdavg_u8), + MAP(v128_avg_u16), + MAP(v128_min_u8), + MAP(v128_max_u8), + MAP(v128_min_s8), + MAP(v128_max_s8), + MAP(v128_min_s16), + MAP(v128_max_s16), + MAP(v128_cmpgt_s8), + MAP(v128_cmplt_s8), + MAP(v128_cmpeq_8), + MAP(v128_cmpgt_s16), + MAP(v128_cmpeq_16), + MAP(v128_cmplt_s16), + MAP(v128_shuffle_8), + MAP(imm_v128_align<1>), + MAP(imm_v128_align<2>), + MAP(imm_v128_align<3>), + MAP(imm_v128_align<4>), + MAP(imm_v128_align<5>), + MAP(imm_v128_align<6>), + MAP(imm_v128_align<7>), + MAP(imm_v128_align<8>), + MAP(imm_v128_align<9>), + MAP(imm_v128_align<10>), + MAP(imm_v128_align<11>), + MAP(imm_v128_align<12>), + MAP(imm_v128_align<13>), + MAP(imm_v128_align<14>), + MAP(imm_v128_align<15>), + MAP(v128_abs_s8), + MAP(v128_abs_s16), + MAP(v128_padd_s16), + MAP(v128_unpacklo_u16_s32), + MAP(v128_unpacklo_s16_s32), + MAP(v128_unpackhi_u16_s32), + MAP(v128_unpackhi_s16_s32), + MAP(imm_v128_shr_n_byte<1>), + MAP(imm_v128_shr_n_byte<2>), + MAP(imm_v128_shr_n_byte<3>), + MAP(imm_v128_shr_n_byte<4>), + MAP(imm_v128_shr_n_byte<5>), + MAP(imm_v128_shr_n_byte<6>), + MAP(imm_v128_shr_n_byte<7>), + MAP(imm_v128_shr_n_byte<8>), + MAP(imm_v128_shr_n_byte<9>), + MAP(imm_v128_shr_n_byte<10>), + MAP(imm_v128_shr_n_byte<11>), + MAP(imm_v128_shr_n_byte<12>), + MAP(imm_v128_shr_n_byte<13>), + MAP(imm_v128_shr_n_byte<14>), + MAP(imm_v128_shr_n_byte<15>), + MAP(imm_v128_shl_n_byte<1>), + MAP(imm_v128_shl_n_byte<2>), + MAP(imm_v128_shl_n_byte<3>), + MAP(imm_v128_shl_n_byte<4>), + MAP(imm_v128_shl_n_byte<5>), + MAP(imm_v128_shl_n_byte<6>), + MAP(imm_v128_shl_n_byte<7>), + MAP(imm_v128_shl_n_byte<8>), + MAP(imm_v128_shl_n_byte<9>), + MAP(imm_v128_shl_n_byte<10>), + MAP(imm_v128_shl_n_byte<11>), + MAP(imm_v128_shl_n_byte<12>), + MAP(imm_v128_shl_n_byte<13>), + MAP(imm_v128_shl_n_byte<14>), + MAP(imm_v128_shl_n_byte<15>), + MAP(imm_v128_shl_n_8<1>), + MAP(imm_v128_shl_n_8<2>), + MAP(imm_v128_shl_n_8<3>), + MAP(imm_v128_shl_n_8<4>), + MAP(imm_v128_shl_n_8<5>), + MAP(imm_v128_shl_n_8<6>), + MAP(imm_v128_shl_n_8<7>), + MAP(imm_v128_shr_n_u8<1>), + MAP(imm_v128_shr_n_u8<2>), + MAP(imm_v128_shr_n_u8<3>), + MAP(imm_v128_shr_n_u8<4>), + MAP(imm_v128_shr_n_u8<5>), + MAP(imm_v128_shr_n_u8<6>), + MAP(imm_v128_shr_n_u8<7>), + MAP(imm_v128_shr_n_s8<1>), + MAP(imm_v128_shr_n_s8<2>), + MAP(imm_v128_shr_n_s8<3>), + MAP(imm_v128_shr_n_s8<4>), + MAP(imm_v128_shr_n_s8<5>), + MAP(imm_v128_shr_n_s8<6>), + MAP(imm_v128_shr_n_s8<7>), + MAP(imm_v128_shl_n_16<1>), + MAP(imm_v128_shl_n_16<2>), + MAP(imm_v128_shl_n_16<4>), + MAP(imm_v128_shl_n_16<6>), + MAP(imm_v128_shl_n_16<8>), + MAP(imm_v128_shl_n_16<10>), + MAP(imm_v128_shl_n_16<12>), + MAP(imm_v128_shl_n_16<14>), + MAP(imm_v128_shr_n_u16<1>), + MAP(imm_v128_shr_n_u16<2>), + MAP(imm_v128_shr_n_u16<4>), + MAP(imm_v128_shr_n_u16<6>), + MAP(imm_v128_shr_n_u16<8>), + MAP(imm_v128_shr_n_u16<10>), + MAP(imm_v128_shr_n_u16<12>), + MAP(imm_v128_shr_n_u16<14>), + MAP(imm_v128_shr_n_s16<1>), + MAP(imm_v128_shr_n_s16<2>), + MAP(imm_v128_shr_n_s16<4>), + MAP(imm_v128_shr_n_s16<6>), + MAP(imm_v128_shr_n_s16<8>), + MAP(imm_v128_shr_n_s16<10>), + MAP(imm_v128_shr_n_s16<12>), + MAP(imm_v128_shr_n_s16<14>), + MAP(imm_v128_shl_n_32<1>), + MAP(imm_v128_shl_n_32<4>), + MAP(imm_v128_shl_n_32<8>), + MAP(imm_v128_shl_n_32<12>), + MAP(imm_v128_shl_n_32<16>), + MAP(imm_v128_shl_n_32<20>), + MAP(imm_v128_shl_n_32<24>), + MAP(imm_v128_shl_n_32<28>), + MAP(imm_v128_shr_n_u32<1>), + MAP(imm_v128_shr_n_u32<4>), + MAP(imm_v128_shr_n_u32<8>), + MAP(imm_v128_shr_n_u32<12>), + MAP(imm_v128_shr_n_u32<16>), + MAP(imm_v128_shr_n_u32<20>), + MAP(imm_v128_shr_n_u32<24>), + MAP(imm_v128_shr_n_u32<28>), + MAP(imm_v128_shr_n_s32<1>), + MAP(imm_v128_shr_n_s32<4>), + MAP(imm_v128_shr_n_s32<8>), + MAP(imm_v128_shr_n_s32<12>), + MAP(imm_v128_shr_n_s32<16>), + MAP(imm_v128_shr_n_s32<20>), + MAP(imm_v128_shr_n_s32<24>), + MAP(imm_v128_shr_n_s32<28>), + MAP(v128_from_v64), + MAP(v128_zip_8), + MAP(v128_zip_16), + MAP(v128_zip_32), + MAP(v128_mul_s16), + MAP(v128_unpack_u8_s16), + MAP(v128_unpack_s8_s16), + MAP(v128_unpack_u16_s32), + MAP(v128_unpack_s16_s32), + MAP(v128_shl_8), + MAP(v128_shr_u8), + MAP(v128_shr_s8), + MAP(v128_shl_16), + MAP(v128_shr_u16), + MAP(v128_shr_s16), + MAP(v128_shl_32), + MAP(v128_shr_u32), + MAP(v128_shr_s32), + MAP(v128_hadd_u8), + MAP(v128_dotp_s16), + MAP(v128_low_u32), + MAP(v128_low_v64), + MAP(v128_high_v64), + MAP(v128_from_64), + MAP(v128_from_32), + MAP(v128_zero), + MAP(v128_dup_8), + MAP(v128_dup_16), + MAP(v128_dup_32), + MAP(v128_unpacklo_u8_s16), + MAP(v128_unpackhi_u8_s16), + MAP(v128_unpacklo_s8_s16), + MAP(v128_unpackhi_s8_s16), + MAP(u32_load_unaligned), + MAP(u32_store_unaligned), + MAP(v64_load_unaligned), + MAP(v64_store_unaligned), + MAP(v128_load_unaligned), + MAP(v128_store_unaligned), + { NULL, NULL, NULL } }; +#undef MAP + +// Map reference functions to machine tuned functions. Since the +// functions depend on machine tuned types, the non-machine tuned +// instantiations of the test can't refer to these functions directly, +// so we refer to them by name and do the mapping here. +void Map(const char *name, fptr *ref, fptr *simd) { + unsigned int i; + for (i = 0; m[i].name && strcmp(name, m[i].name); i++) { + } + + *ref = m[i].ref; + *simd = m[i].simd; +} + +// Used for printing errors in TestSimd1Arg and TestSimd2Args +std::string Print(const uint8_t *a, int size) { + std::string text = "0x"; + for (int i = 0; i < size; i++) { + const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i]; + // Same as snprintf(..., ..., "%02x", c) + text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10); + text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10); + } + + return text; +} + +// Used in TestSimd1Arg and TestSimd2Args to restrict argument ranges +void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) { + switch (maskwidth) { + case 0: { + break; + } + case 8: { + for (int i = 0; i < size; i++) s[i] &= mask; + break; + } + case 16: { + uint16_t *t = reinterpret_cast<uint16_t *>(s); + assert(!(reinterpret_cast<uintptr_t>(s) & 1)); + for (int i = 0; i < size / 2; i++) t[i] &= mask; + break; + } + case 32: { + uint32_t *t = reinterpret_cast<uint32_t *>(s); + assert(!(reinterpret_cast<uintptr_t>(s) & 3)); + for (int i = 0; i < size / 4; i++) t[i] &= mask; + break; + } + case 64: { + uint64_t *t = reinterpret_cast<uint64_t *>(s); + assert(!(reinterpret_cast<uintptr_t>(s) & 7)); + for (int i = 0; i < size / 8; i++) t[i] &= mask; + break; + } + default: { + FAIL() << "Unsupported mask width"; + break; + } + } +} + +// We need some extra load/store functions +void u64_store_aligned(void *p, uint64_t a) { + v64_store_aligned(p, v64_from_64(a)); +} +void s32_store_aligned(void *p, int32_t a) { + u32_store_aligned(p, static_cast<uint32_t>(a)); +} +void s64_store_aligned(void *p, int64_t a) { + v64_store_aligned(p, v64_from_64(static_cast<uint64_t>(a))); +} + +void c_u64_store_aligned(void *p, uint64_t a) { + c_v64_store_aligned(p, c_v64_from_64(a)); +} + +void c_s32_store_aligned(void *p, int32_t a) { + c_u32_store_aligned(p, static_cast<uint32_t>(a)); +} + +void c_s64_store_aligned(void *p, int64_t a) { + c_v64_store_aligned(p, c_v64_from_64(static_cast<uint64_t>(a))); +} + +uint64_t u64_load_aligned(const void *p) { + return v64_u64(v64_load_aligned(p)); +} +uint16_t u16_load_aligned(const void *p) { + return *(reinterpret_cast<const uint16_t *>(p)); +} +uint8_t u8_load_aligned(const void *p) { + return *(reinterpret_cast<const uint8_t *>(p)); +} + +uint64_t c_u64_load_aligned(const void *p) { + return c_v64_u64(c_v64_load_aligned(p)); +} +uint16_t c_u16_load_aligned(const void *p) { + return *(reinterpret_cast<const uint16_t *>(p)); +} +uint8_t c_u8_load_aligned(const void *p) { + return *(reinterpret_cast<const uint8_t *>(p)); +} + +// CompareSimd1Arg and CompareSimd2Args compare intrinsics taking 1 or +// 2 arguments respectively with their corresponding C reference. +// Ideally, the loads and stores should have gone into the template +// parameter list, but v64 and v128 could be typedef'ed to the same +// type (which is the case on x86) and then we can't instantiate both +// v64 and v128, so the function return and argument types, including +// the always differing types in the C equivalent are used instead. +// The function arguments must be void pointers and then go through a +// cast to avoid matching errors in the branches eliminated by the +// typeid tests in the calling function. +template <typename Ret, typename Arg, typename CRet, typename CArg> +int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store, + fptr c_load, fptr c_simd, void *ref_d, const void *a) { + void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store; + Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load; + Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd; + void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store; + CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load; + CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd; + + // Call reference and intrinsic + my_c_store(ref_d, my_c_simd(my_c_load(a))); + my_store(d, my_simd(my_load(a))); + + // Compare results + return memcmp(ref_d, d, sizeof(CRet)); +} + +template <typename Ret, typename Arg1, typename Arg2, typename CRet, + typename CArg1, typename CArg2> +int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d, + fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd, + void *ref_d, const void *a, const void *b) { + void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store; + Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1; + Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2; + Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd; + void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store; + CArg1 (*const my_c_load1)(const void *) = + (CArg1(*const)(const void *))c_load1; + CArg2 (*const my_c_load2)(const void *) = + (CArg2(*const)(const void *))c_load2; + CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd; + + // Call reference and intrinsic + my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b))); + my_store(d, my_simd(my_load1(a), my_load2(b))); + + // Compare results + return memcmp(ref_d, d, sizeof(CRet)); +} + +} // namespace + +template <typename CRet, typename CArg> +void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + fptr ref_simd; + fptr simd; + int error = 0; + DECLARE_ALIGNED(32, uint8_t, s[sizeof(CArg)]); + DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]); + DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]); + memset(ref_d, 0, sizeof(ref_d)); + memset(d, 0, sizeof(d)); + + Map(name, &ref_simd, &simd); + if (simd == NULL || ref_simd == NULL) { + FAIL() << "Internal error: Unknown intrinsic function " << name; + } + for (unsigned int count = 0; + count < iterations && !error && !testing::Test::HasFailure(); count++) { + for (unsigned int c = 0; c < sizeof(CArg); c++) s[c] = rnd.Rand8(); + + if (maskwidth) { + SetMask(s, sizeof(CArg), mask, maskwidth); + } + + if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) { + // V64_V64 + error = CompareSimd1Arg<v64, v64, CRet, CArg>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(uint8_t)) { + // V64_U8 + error = CompareSimd1Arg<v64, uint8_t, CRet, CArg>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(u8_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(uint16_t)) { + // V64_U16 + error = CompareSimd1Arg<v64, uint16_t, CRet, CArg>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(u16_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(uint32_t)) { + // V64_U32 + error = CompareSimd1Arg<v64, uint32_t, CRet, CArg>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint64_t) && + typeid(CArg) == typeid(c_v64)) { + // U64_V64 + error = CompareSimd1Arg<uint64_t, v64, CRet, CArg>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(int64_t) && + typeid(CArg) == typeid(c_v64)) { + // S64_V64 + error = CompareSimd1Arg<int64_t, v64, CRet, CArg>( + reinterpret_cast<fptr>(s64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg) == typeid(c_v64)) { + // U32_V64 + error = CompareSimd1Arg<uint32_t, v64, CRet, CArg>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(int32_t) && + typeid(CArg) == typeid(c_v64)) { + // S32_V64 + error = CompareSimd1Arg<int32_t, v64, CRet, CArg>( + reinterpret_cast<fptr>(s32_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s32_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg) == typeid(c_v128)) { + // U32_V128 + error = CompareSimd1Arg<uint32_t, v128, CRet, CArg>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint64_t) && + typeid(CArg) == typeid(c_v128)) { + // U64_V128 + error = CompareSimd1Arg<uint64_t, v128, CRet, CArg>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(c_v128)) { + // V64_V128 + error = CompareSimd1Arg<v64, v128, CRet, CArg>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(c_v128)) { + // V128_V128 + error = CompareSimd1Arg<v128, v128, CRet, CArg>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(c_v64)) { + // V128_V64 + error = CompareSimd1Arg<v128, v64, CRet, CArg>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(uint8_t)) { + // V128_U8 + error = CompareSimd1Arg<v128, uint8_t, CRet, CArg>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u8_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(uint16_t)) { + // V128_U16 + error = CompareSimd1Arg<v128, uint16_t, CRet, CArg>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u16_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(uint32_t)) { + // V128_U32 + error = CompareSimd1Arg<v128, uint32_t, CRet, CArg>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s); + } else { + FAIL() << "Internal error: Unknown intrinsic function " + << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name() + << ")"; + } + } + + EXPECT_EQ(0, error) << "Error: mismatch for " << name << "(" + << Print(s, sizeof(s)) << ") -> " << Print(d, sizeof(d)) + << " (simd), " << Print(ref_d, sizeof(ref_d)) << " (ref)"; +} + +template <typename CRet, typename CArg1, typename CArg2> +void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + fptr ref_simd; + fptr simd; + int error = 0; + DECLARE_ALIGNED(32, uint8_t, s1[sizeof(CArg1)]); + DECLARE_ALIGNED(32, uint8_t, s2[sizeof(CArg2)]); + DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]); + DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]); + memset(ref_d, 0, sizeof(ref_d)); + memset(d, 0, sizeof(d)); + + Map(name, &ref_simd, &simd); + if (simd == NULL || ref_simd == NULL) { + FAIL() << "Internal error: Unknown intrinsic function " << name; + } + + for (unsigned int count = 0; + count < iterations && !error && !testing::Test::HasFailure(); count++) { + for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8(); + + for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8(); + + if (maskwidth) SetMask(s2, sizeof(CArg2), mask, maskwidth); + + if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(c_v64)) { + // V64_V64V64 + error = CompareSimd2Args<v64, v64, v64, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg1) == typeid(uint32_t) && + typeid(CArg2) == typeid(uint32_t)) { + // V64_U32U32 + error = CompareSimd2Args<v64, uint32_t, uint32_t, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(u32_load_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(c_v64)) { + // U32_V64V64 + error = CompareSimd2Args<uint32_t, v64, v64, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(int64_t) && + typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(c_v64)) { + // S64_V64V64 + error = CompareSimd2Args<int64_t, v64, v64, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(s64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(uint32_t)) { + // V64_V64U32 + error = CompareSimd2Args<v64, v64, uint32_t, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // V128_V128V128 + error = CompareSimd2Args<v128, v128, v128, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // U32_V128V128 + error = CompareSimd2Args<uint32_t, v128, v128, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(int64_t) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // S64_V128V128 + error = CompareSimd2Args<int64_t, v128, v128, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(s64_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s64_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg1) == typeid(uint64_t) && + typeid(CArg2) == typeid(uint64_t)) { + // V128_U64U64 + error = CompareSimd2Args<v128, uint64_t, uint64_t, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u64_load_aligned), + reinterpret_cast<fptr>(u64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u64_load_aligned), + reinterpret_cast<fptr>(c_u64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(c_v64)) { + // V128_V64V64 + error = CompareSimd2Args<v128, v64, v64, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(uint32_t)) { + // V128_V128U32 + error = CompareSimd2Args<v128, v128, uint32_t, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else { + FAIL() << "Internal error: Unknown intrinsic function " + << typeid(CRet).name() << " " << name << "(" + << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")"; + } + } + + EXPECT_EQ(0, error) << "Error: mismatch for " << name << "(" + << Print(s1, sizeof(s1)) << ", " << Print(s2, sizeof(s2)) + << ") -> " << Print(d, sizeof(d)) << " (simd), " + << Print(ref_d, sizeof(ref_d)) << " (ref)"; +} + +// Instantiations to make the functions callable from another files +template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, uint16_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, uint32_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint32_t, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<int32_t, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<c_v64, uint32_t, uint32_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd1Arg<c_v128, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, uint8_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint64_t, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<c_v128, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v128, c_v128, uint32_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); + +} // namespace SIMD_NAMESPACE diff --git a/third_party/aom/test/simd_cmp_neon.cc b/third_party/aom/test/simd_cmp_neon.cc new file mode 100644 index 000000000..c8004cc8b --- /dev/null +++ b/third_party/aom/test/simd_cmp_neon.cc @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if defined(__OPTIMIZE__) && __OPTIMIZE__ +#define ARCH NEON +#define ARCH_POSTFIX(name) name##_neon +#define SIMD_NAMESPACE simd_test_neon +#include "./simd_cmp_impl.h" +#endif diff --git a/third_party/aom/test/simd_cmp_sse2.cc b/third_party/aom/test/simd_cmp_sse2.cc new file mode 100644 index 000000000..67cb43c10 --- /dev/null +++ b/third_party/aom/test/simd_cmp_sse2.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSE2 +#define ARCH_POSTFIX(name) name##_sse2 +#define SIMD_NAMESPACE simd_test_sse2 +#include "./simd_cmp_impl.h" +#endif diff --git a/third_party/aom/test/simd_cmp_sse4.cc b/third_party/aom/test/simd_cmp_sse4.cc new file mode 100644 index 000000000..ba826d898 --- /dev/null +++ b/third_party/aom/test/simd_cmp_sse4.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSE4_1 +#define ARCH_POSTFIX(name) name##_sse4_1 +#define SIMD_NAMESPACE simd_test_sse4_1 +#include "./simd_cmp_impl.h" +#endif diff --git a/third_party/aom/test/simd_cmp_ssse3.cc b/third_party/aom/test/simd_cmp_ssse3.cc new file mode 100644 index 000000000..a6c7000fd --- /dev/null +++ b/third_party/aom/test/simd_cmp_ssse3.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSSE3 +#define ARCH_POSTFIX(name) name##_ssse3 +#define SIMD_NAMESPACE simd_test_ssse3 +#include "./simd_cmp_impl.h" +#endif diff --git a/third_party/aom/test/simd_impl.h b/third_party/aom/test/simd_impl.h new file mode 100644 index 000000000..5cfda675d --- /dev/null +++ b/third_party/aom/test/simd_impl.h @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#define SIMD_CHECK 1 +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "aom_dsp/aom_simd_inline.h" +#include "aom_dsp/simd/v128_intrinsics_c.h" + +namespace SIMD_NAMESPACE { + +template <typename param_signature> +class TestIntrinsic : public ::testing::TestWithParam<param_signature> { + public: + virtual ~TestIntrinsic() {} + virtual void SetUp() { + mask = std::tr1::get<0>(this->GetParam()); + maskwidth = std::tr1::get<1>(this->GetParam()); + name = std::tr1::get<2>(this->GetParam()); + } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + uint32_t mask, maskwidth; + const char *name; +}; + +// Create one typedef for each function signature +#define TYPEDEF_SIMD(name) \ + typedef TestIntrinsic<std::tr1::tuple<uint32_t, uint32_t, const char *> > \ + ARCH_POSTFIX(name) + +TYPEDEF_SIMD(V64_U8); +TYPEDEF_SIMD(V64_U16); +TYPEDEF_SIMD(V64_U32); +TYPEDEF_SIMD(V64_V64); +TYPEDEF_SIMD(U32_V64); +TYPEDEF_SIMD(S32_V64); +TYPEDEF_SIMD(U64_V64); +TYPEDEF_SIMD(S64_V64); +TYPEDEF_SIMD(V64_U32U32); +TYPEDEF_SIMD(V64_V64V64); +TYPEDEF_SIMD(S64_V64V64); +TYPEDEF_SIMD(V64_V64U32); +TYPEDEF_SIMD(U32_V64V64); +TYPEDEF_SIMD(V128_V64); +TYPEDEF_SIMD(V128_V128); +TYPEDEF_SIMD(U32_V128); +TYPEDEF_SIMD(U64_V128); +TYPEDEF_SIMD(V64_V128); +TYPEDEF_SIMD(V128_U8); +TYPEDEF_SIMD(V128_U16); +TYPEDEF_SIMD(V128_U32); +TYPEDEF_SIMD(V128_U64U64); +TYPEDEF_SIMD(V128_V64V64); +TYPEDEF_SIMD(V128_V128V128); +TYPEDEF_SIMD(S64_V128V128); +TYPEDEF_SIMD(V128_V128U32); +TYPEDEF_SIMD(U32_V128V128); + +// Google Test allows up to 50 tests per case, so split the largest +typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2); +typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2); +typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2); +typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3); +typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2); + +// These functions are machine tuned located elsewhere +template <typename c_ret, typename c_arg> +void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name); + +template <typename c_ret, typename c_arg1, typename c_arg2> +void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name); + +const int kIterations = 65536; + +// Add a macro layer since TEST_P will quote the name so we need to +// expand it first with the prefix. +#define MY_TEST_P(name, test) TEST_P(name, test) + +MY_TEST_P(ARCH_POSTFIX(V64_U8), TestIntrinsics) { + TestSimd1Arg<c_v64, uint8_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_U16), TestIntrinsics) { + TestSimd1Arg<c_v64, uint16_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_U32), TestIntrinsics) { + TestSimd1Arg<c_v64, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V64), TestIntrinsics) { + TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U64_V64), TestIntrinsics) { + TestSimd1Arg<uint64_t, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S64_V64), TestIntrinsics) { + TestSimd1Arg<int64_t, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V64), TestIntrinsics) { + TestSimd1Arg<uint32_t, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S32_V64), TestIntrinsics) { + TestSimd1Arg<int32_t, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_U32U32), TestIntrinsics) { + TestSimd2Args<c_v64, uint32_t, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V64V64), TestIntrinsics) { + TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S64_V64V64), TestIntrinsics) { + TestSimd2Args<int64_t, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V64V64), TestIntrinsics) { + TestSimd2Args<uint32_t, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V64U32), TestIntrinsics) { + TestSimd2Args<c_v64, c_v64, uint32_t>(kIterations, mask, maskwidth, name); +} + +// Google Test allows up to 50 tests per case, so split the largest +MY_TEST_P(ARCH_POSTFIX(V64_V64_Part2), TestIntrinsics) { + TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V64V64_Part2), TestIntrinsics) { + TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V128), TestIntrinsics) { + TestSimd1Arg<uint32_t, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U64_V128), TestIntrinsics) { + TestSimd1Arg<uint64_t, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V128), TestIntrinsics) { + TestSimd1Arg<c_v64, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U8), TestIntrinsics) { + TestSimd1Arg<c_v128, uint8_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U16), TestIntrinsics) { + TestSimd1Arg<c_v128, uint16_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) { + TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) { + TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) { + TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) { + TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U64U64), TestIntrinsics) { + TestSimd2Args<c_v128, uint64_t, uint64_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V64V64), TestIntrinsics) { + TestSimd2Args<c_v128, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128U32), TestIntrinsics) { + TestSimd2Args<c_v128, c_v128, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128V128_Part2), TestIntrinsics) { + TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128_Part2), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +// Add a macro layer since INSTANTIATE_TEST_CASE_P will quote the name +// so we need to expand it first with the prefix +#define INSTANTIATE(name, type, ...) \ + INSTANTIATE_TEST_CASE_P(name, type, ::testing::Values(__VA_ARGS__)) + +#define SIMD_TUPLE(name, mask, maskwidth) \ + std::tr1::make_tuple(mask, maskwidth, static_cast<const char *>(#name)) + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64), + (SIMD_TUPLE(v64_sad_u8, 0U, 0U), SIMD_TUPLE(v64_ssd_u8, 0U, 0U))); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V64_V64V64), SIMD_TUPLE(v64_add_8, 0U, 0U), + SIMD_TUPLE(v64_add_16, 0U, 0U), SIMD_TUPLE(v64_sadd_s16, 0U, 0U), + SIMD_TUPLE(v64_add_32, 0U, 0U), SIMD_TUPLE(v64_sub_8, 0U, 0U), + SIMD_TUPLE(v64_ssub_u8, 0U, 0U), SIMD_TUPLE(v64_ssub_s8, 0U, 0U), + SIMD_TUPLE(v64_sub_16, 0U, 0U), SIMD_TUPLE(v64_ssub_s16, 0U, 0U), + SIMD_TUPLE(v64_ssub_u16, 0U, 0U), SIMD_TUPLE(v64_sub_32, 0U, 0U), + SIMD_TUPLE(v64_ziplo_8, 0U, 0U), SIMD_TUPLE(v64_ziphi_8, 0U, 0U), + SIMD_TUPLE(v64_ziplo_16, 0U, 0U), SIMD_TUPLE(v64_ziphi_16, 0U, 0U), + SIMD_TUPLE(v64_ziplo_32, 0U, 0U), SIMD_TUPLE(v64_ziphi_32, 0U, 0U), + SIMD_TUPLE(v64_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v64_pack_s16_u8, 0U, 0U), + SIMD_TUPLE(v64_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v64_unziphi_8, 0U, 0U), + SIMD_TUPLE(v64_unziplo_8, 0U, 0U), SIMD_TUPLE(v64_unziphi_16, 0U, 0U), + SIMD_TUPLE(v64_unziplo_16, 0U, 0U), SIMD_TUPLE(v64_or, 0U, 0U), + SIMD_TUPLE(v64_xor, 0U, 0U), SIMD_TUPLE(v64_and, 0U, 0U), + SIMD_TUPLE(v64_andn, 0U, 0U), SIMD_TUPLE(v64_mullo_s16, 0U, 0U), + SIMD_TUPLE(v64_mulhi_s16, 0U, 0U), SIMD_TUPLE(v64_mullo_s32, 0U, 0U), + SIMD_TUPLE(v64_madd_s16, 0U, 0U), SIMD_TUPLE(v64_madd_us8, 0U, 0U), + SIMD_TUPLE(v64_avg_u8, 0U, 0U), SIMD_TUPLE(v64_rdavg_u8, 0U, 0U), + SIMD_TUPLE(v64_avg_u16, 0U, 0U), SIMD_TUPLE(v64_min_u8, 0U, 0U), + SIMD_TUPLE(v64_max_u8, 0U, 0U), SIMD_TUPLE(v64_min_s8, 0U, 0U), + SIMD_TUPLE(v64_max_s8, 0U, 0U), SIMD_TUPLE(v64_min_s16, 0U, 0U), + SIMD_TUPLE(v64_max_s16, 0U, 0U), SIMD_TUPLE(v64_cmpgt_s8, 0U, 0U), + SIMD_TUPLE(v64_cmplt_s8, 0U, 0U), SIMD_TUPLE(v64_cmpeq_8, 0U, 0U), + SIMD_TUPLE(v64_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v64_cmplt_s16, 0U, 0U), + SIMD_TUPLE(v64_cmpeq_16, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U), + SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_align<7>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s8, 0U, 0U), + SIMD_TUPLE(v64_abs_s16, 0U, 0U), + SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U), + SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U), + SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U), + SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U), + SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U), + SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U), + SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U), + SIMD_TUPLE(v64_unpackhi_s16_s32, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2), + SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<10>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<14>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<10>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<14>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<16>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<20>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<24>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<28>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<16>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<20>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<24>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<28>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<16>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<20>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<24>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<28>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64U32), SIMD_TUPLE(v64_shl_8, 7U, 32U), + SIMD_TUPLE(v64_shr_u8, 7U, 32U), SIMD_TUPLE(v64_shr_s8, 7U, 32U), + SIMD_TUPLE(v64_shl_16, 15U, 32U), SIMD_TUPLE(v64_shr_u16, 15U, 32U), + SIMD_TUPLE(v64_shr_s16, 15U, 32U), SIMD_TUPLE(v64_shl_32, 31U, 32U), + SIMD_TUPLE(v64_shr_u32, 31U, 32U), + SIMD_TUPLE(v64_shr_s32, 31U, 32U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V64), SIMD_TUPLE(v64_hadd_u8, 0U, 0U), + SIMD_TUPLE(v64_u64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64), SIMD_TUPLE(v64_hadd_s16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64), SIMD_TUPLE(v64_low_u32, 0U, 0U), + SIMD_TUPLE(v64_high_u32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S32_V64), SIMD_TUPLE(v64_low_s32, 0U, 0U), + SIMD_TUPLE(v64_high_s32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64V64), SIMD_TUPLE(v64_dotp_s16, 0U, 0U), + SIMD_TUPLE(v64_dotp_su8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U8), SIMD_TUPLE(v64_dup_8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U16), SIMD_TUPLE(v64_dup_16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U), + SIMD_TUPLE(v128_ssd_u8, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U), + SIMD_TUPLE(v128_add_16, 0U, 0U), SIMD_TUPLE(v128_sadd_s16, 0U, 0U), + SIMD_TUPLE(v128_add_32, 0U, 0U), SIMD_TUPLE(v128_sub_8, 0U, 0U), + SIMD_TUPLE(v128_ssub_u8, 0U, 0U), SIMD_TUPLE(v128_ssub_s8, 0U, 0U), + SIMD_TUPLE(v128_sub_16, 0U, 0U), SIMD_TUPLE(v128_ssub_s16, 0U, 0U), + SIMD_TUPLE(v128_ssub_u16, 0U, 0U), SIMD_TUPLE(v128_sub_32, 0U, 0U), + SIMD_TUPLE(v128_ziplo_8, 0U, 0U), SIMD_TUPLE(v128_ziphi_8, 0U, 0U), + SIMD_TUPLE(v128_ziplo_16, 0U, 0U), SIMD_TUPLE(v128_ziphi_16, 0U, 0U), + SIMD_TUPLE(v128_ziplo_32, 0U, 0U), SIMD_TUPLE(v128_ziphi_32, 0U, 0U), + SIMD_TUPLE(v128_ziplo_64, 0U, 0U), SIMD_TUPLE(v128_ziphi_64, 0U, 0U), + SIMD_TUPLE(v128_unziphi_8, 0U, 0U), SIMD_TUPLE(v128_unziplo_8, 0U, 0U), + SIMD_TUPLE(v128_unziphi_16, 0U, 0U), SIMD_TUPLE(v128_unziplo_16, 0U, 0U), + SIMD_TUPLE(v128_unziphi_32, 0U, 0U), SIMD_TUPLE(v128_unziplo_32, 0U, 0U), + SIMD_TUPLE(v128_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v128_pack_s16_u8, 0U, 0U), + SIMD_TUPLE(v128_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v128_or, 0U, 0U), + SIMD_TUPLE(v128_xor, 0U, 0U), SIMD_TUPLE(v128_and, 0U, 0U), + SIMD_TUPLE(v128_andn, 0U, 0U), SIMD_TUPLE(v128_mullo_s16, 0U, 0U), + SIMD_TUPLE(v128_mulhi_s16, 0U, 0U), SIMD_TUPLE(v128_mullo_s32, 0U, 0U), + SIMD_TUPLE(v128_madd_s16, 0U, 0U), SIMD_TUPLE(v128_madd_us8, 0U, 0U), + SIMD_TUPLE(v128_avg_u8, 0U, 0U), SIMD_TUPLE(v128_rdavg_u8, 0U, 0U), + SIMD_TUPLE(v128_avg_u16, 0U, 0U), SIMD_TUPLE(v128_min_u8, 0U, 0U), + SIMD_TUPLE(v128_max_u8, 0U, 0U), SIMD_TUPLE(v128_min_s8, 0U, 0U), + SIMD_TUPLE(v128_max_s8, 0U, 0U), SIMD_TUPLE(v128_min_s16, 0U, 0U), + SIMD_TUPLE(v128_max_s16, 0U, 0U), SIMD_TUPLE(v128_cmpgt_s8, 0U, 0U), + SIMD_TUPLE(v128_cmplt_s8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_8, 0U, 0U), + SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2), + SIMD_TUPLE(v128_cmpeq_16, 0U, 0U), + SIMD_TUPLE(v128_cmplt_s16, 0U, 0U), + SIMD_TUPLE(v128_shuffle_8, 15U, 8U), + SIMD_TUPLE(imm_v128_align<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<9>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<11>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<13>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<15>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U), + SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U), + SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U), + SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U), + SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U), + SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<9>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<11>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<13>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<15>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<9>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<11>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<13>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<15>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2), + SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3), + SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<28>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U), + SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U), + SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64U64), SIMD_TUPLE(v128_from_64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64), + SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U), + SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U), + SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U), + SIMD_TUPLE(v128_shl_16, 15U, 32U), + SIMD_TUPLE(v128_shr_u16, 15U, 32U), + SIMD_TUPLE(v128_shr_s16, 15U, 32U), + SIMD_TUPLE(v128_shl_32, 31U, 32U), + SIMD_TUPLE(v128_shr_u32, 31U, 32U), + SIMD_TUPLE(v128_shr_s32, 31U, 32U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V128), SIMD_TUPLE(v128_low_v64, 0U, 0U), + SIMD_TUPLE(v128_high_v64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U8), SIMD_TUPLE(v128_dup_8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128), + SIMD_TUPLE(v128_dotp_s16, 0U, 0U)); + +} // namespace SIMD_NAMESPACE diff --git a/third_party/aom/test/simd_neon_test.cc b/third_party/aom/test/simd_neon_test.cc new file mode 100644 index 000000000..0565fb4e2 --- /dev/null +++ b/third_party/aom/test/simd_neon_test.cc @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if defined(__OPTIMIZE__) && __OPTIMIZE__ +#define ARCH NEON +#define ARCH_POSTFIX(name) name##_neon +#define SIMD_NAMESPACE simd_test_neon +#include "./simd_impl.h" +#endif diff --git a/third_party/aom/test/simd_sse2_test.cc b/third_party/aom/test/simd_sse2_test.cc new file mode 100644 index 000000000..a0b49d77e --- /dev/null +++ b/third_party/aom/test/simd_sse2_test.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSE2 +#define ARCH_POSTFIX(name) name##_sse2 +#define SIMD_NAMESPACE simd_test_sse2 +#include "./simd_impl.h" +#endif diff --git a/third_party/aom/test/simd_sse4_test.cc b/third_party/aom/test/simd_sse4_test.cc new file mode 100644 index 000000000..73c96427f --- /dev/null +++ b/third_party/aom/test/simd_sse4_test.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSE4_1 +#define ARCH_POSTFIX(name) name##_sse4_1 +#define SIMD_NAMESPACE simd_test_sse4_1 +#include "./simd_impl.h" +#endif diff --git a/third_party/aom/test/simd_ssse3_test.cc b/third_party/aom/test/simd_ssse3_test.cc new file mode 100644 index 000000000..9ebeeef1b --- /dev/null +++ b/third_party/aom/test/simd_ssse3_test.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSSE3 +#define ARCH_POSTFIX(name) name##_ssse3 +#define SIMD_NAMESPACE simd_test_ssse3 +#include "./simd_impl.h" +#endif diff --git a/third_party/aom/test/simple_decoder.sh b/third_party/aom/test/simple_decoder.sh new file mode 100755 index 000000000..ac3a07b18 --- /dev/null +++ b/third_party/aom/test/simple_decoder.sh @@ -0,0 +1,58 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom simple_decoder example code. To add new tests to +## this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to simple_decoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +simple_decoder_verify_environment() { + if [ ! "$(av1_encode_available)" = "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then + return 1 + fi +} + +# Runs simple_decoder using $1 as input file. $2 is the codec name, and is used +# solely to name the output file. +simple_decoder() { + local decoder="${LIBAOM_BIN_PATH}/simple_decoder${AOM_TEST_EXE_SUFFIX}" + local input_file="$1" + local codec="$2" + local output_file="${AOM_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw" + + if [ ! -x "${decoder}" ]; then + elog "${decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ + ${devnull} + + [ -e "${output_file}" ] || return 1 +} + +simple_decoder_av1() { + if [ "$(av1_decode_available)" = "yes" ]; then + if [ ! -e "${AV1_IVF_FILE}" ]; then + local file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf" + encode_yuv_raw_input_av1 "${file}" --ivf + simple_decoder "${file}" av1 || return 1 + else + simple_decoder "${AV1_IVF_FILE}" av1 || return 1 + fi + fi +} + +simple_decoder_tests="simple_decoder_av1" + +run_tests simple_decoder_verify_environment "${simple_decoder_tests}" diff --git a/third_party/aom/test/simple_encoder.sh b/third_party/aom/test/simple_encoder.sh new file mode 100755 index 000000000..5cd6b46a1 --- /dev/null +++ b/third_party/aom/test/simple_encoder.sh @@ -0,0 +1,53 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom simple_encoder example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to simple_encoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +simple_encoder_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi +} + +# Runs simple_encoder using the codec specified by $1 with a frame limit of 100. +simple_encoder() { + local encoder="${LIBAOM_BIN_PATH}/simple_encoder${AOM_TEST_EXE_SUFFIX}" + local codec="$1" + local output_file="${AOM_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf" + + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 5 \ + ${devnull} + + [ -e "${output_file}" ] || return 1 +} + + +simple_encoder_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + simple_encoder av1 || return 1 + fi +} + +simple_encoder_tests="simple_encoder_av1" + +run_tests simple_encoder_verify_environment "${simple_encoder_tests}" diff --git a/third_party/aom/test/subtract_test.cc b/third_party/aom/test/subtract_test.cc new file mode 100644 index 000000000..c90ca8d56 --- /dev/null +++ b/third_party/aom/test/subtract_test.cc @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#if CONFIG_AV1 +#include "av1/common/blockd.h" +#endif +#include "aom_mem/aom_mem.h" +#include "aom_ports/mem.h" + +#define USE_SPEED_TEST (0) + +typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr, + ptrdiff_t diff_stride, const uint8_t *src_ptr, + ptrdiff_t src_stride, const uint8_t *pred_ptr, + ptrdiff_t pred_stride); + +namespace { + +class AV1SubtractBlockTest : public ::testing::TestWithParam<SubtractFunc> { + public: + virtual void TearDown() { libaom_test::ClearSystemState(); } +}; + +using libaom_test::ACMRandom; + +TEST_P(AV1SubtractBlockTest, SimpleSubtract) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + // FIXME(rbultje) split in its own file + for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES; + bsize = static_cast<BLOCK_SIZE>(static_cast<int>(bsize) + 1)) { + const int block_width = block_size_wide[bsize]; + const int block_height = block_size_high[bsize]; + int16_t *diff = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(*diff) * block_width * block_height * 2)); + uint8_t *pred = reinterpret_cast<uint8_t *>( + aom_memalign(16, block_width * block_height * 2)); + uint8_t *src = reinterpret_cast<uint8_t *>( + aom_memalign(16, block_width * block_height * 2)); + + for (int n = 0; n < 100; n++) { + for (int r = 0; r < block_height; ++r) { + for (int c = 0; c < block_width * 2; ++c) { + src[r * block_width * 2 + c] = rnd.Rand8(); + pred[r * block_width * 2 + c] = rnd.Rand8(); + } + } + + GetParam()(block_height, block_width, diff, block_width, src, block_width, + pred, block_width); + + for (int r = 0; r < block_height; ++r) { + for (int c = 0; c < block_width; ++c) { + EXPECT_EQ(diff[r * block_width + c], + (src[r * block_width + c] - pred[r * block_width + c])) + << "r = " << r << ", c = " << c << ", bs = " << bsize; + } + } + + GetParam()(block_height, block_width, diff, block_width * 2, src, + block_width * 2, pred, block_width * 2); + + for (int r = 0; r < block_height; ++r) { + for (int c = 0; c < block_width; ++c) { + EXPECT_EQ( + diff[r * block_width * 2 + c], + (src[r * block_width * 2 + c] - pred[r * block_width * 2 + c])) + << "r = " << r << ", c = " << c << ", bs = " << bsize; + } + } + } + aom_free(diff); + aom_free(pred); + aom_free(src); + } +} + +INSTANTIATE_TEST_CASE_P(C, AV1SubtractBlockTest, + ::testing::Values(aom_subtract_block_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, AV1SubtractBlockTest, + ::testing::Values(aom_subtract_block_sse2)); +#endif +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, AV1SubtractBlockTest, + ::testing::Values(aom_subtract_block_neon)); +#endif +#if HAVE_MSA +INSTANTIATE_TEST_CASE_P(MSA, AV1SubtractBlockTest, + ::testing::Values(aom_subtract_block_msa)); +#endif + +typedef void (*HBDSubtractFunc)(int rows, int cols, int16_t *diff_ptr, + ptrdiff_t diff_stride, const uint8_t *src_ptr, + ptrdiff_t src_stride, const uint8_t *pred_ptr, + ptrdiff_t pred_stride, int bd); + +using ::std::tr1::get; +using ::std::tr1::make_tuple; +using ::std::tr1::tuple; + +// <width, height, bit_dpeth, subtract> +typedef tuple<int, int, int, HBDSubtractFunc> Params; + +#if CONFIG_HIGHBITDEPTH +class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> { + public: + virtual void SetUp() { + block_width_ = GET_PARAM(0); + block_height_ = GET_PARAM(1); + bit_depth_ = static_cast<aom_bit_depth_t>(GET_PARAM(2)); + func_ = GET_PARAM(3); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + + const size_t max_width = 128; + const size_t max_block_size = max_width * max_width; + src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + aom_memalign(16, max_block_size * sizeof(uint16_t)))); + pred_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + aom_memalign(16, max_block_size * sizeof(uint16_t)))); + diff_ = reinterpret_cast<int16_t *>( + aom_memalign(16, max_block_size * sizeof(int16_t))); + } + + virtual void TearDown() { + aom_free(CONVERT_TO_SHORTPTR(src_)); + aom_free(CONVERT_TO_SHORTPTR(pred_)); + aom_free(diff_); + } + + protected: + void RunForSpeed(); + void CheckResult(); + + private: + ACMRandom rnd_; + int block_height_; + int block_width_; + aom_bit_depth_t bit_depth_; + HBDSubtractFunc func_; + uint8_t *src_; + uint8_t *pred_; + int16_t *diff_; +}; + +void AV1HBDSubtractBlockTest::RunForSpeed() { + const int test_num = 200000; + const int max_width = 128; + const int max_block_size = max_width * max_width; + const int mask = (1 << bit_depth_) - 1; + int i, j; + + for (j = 0; j < max_block_size; ++j) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask; + CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask; + } + + for (i = 0; i < test_num; ++i) { + func_(block_height_, block_width_, diff_, block_width_, src_, block_width_, + pred_, block_width_, bit_depth_); + } +} + +void AV1HBDSubtractBlockTest::CheckResult() { + const int test_num = 100; + const int max_width = 128; + const int max_block_size = max_width * max_width; + const int mask = (1 << bit_depth_) - 1; + int i, j; + + for (i = 0; i < test_num; ++i) { + for (j = 0; j < max_block_size; ++j) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask; + CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask; + } + + func_(block_height_, block_width_, diff_, block_width_, src_, block_width_, + pred_, block_width_, bit_depth_); + + for (int r = 0; r < block_height_; ++r) { + for (int c = 0; c < block_width_; ++c) { + EXPECT_EQ(diff_[r * block_width_ + c], + (CONVERT_TO_SHORTPTR(src_)[r * block_width_ + c] - + CONVERT_TO_SHORTPTR(pred_)[r * block_width_ + c])) + << "r = " << r << ", c = " << c << ", test: " << i; + } + } + } +} + +TEST_P(AV1HBDSubtractBlockTest, CheckResult) { CheckResult(); } + +#if USE_SPEED_TEST +TEST_P(AV1HBDSubtractBlockTest, CheckSpeed) { RunForSpeed(); } +#endif // USE_SPEED_TEST + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, AV1HBDSubtractBlockTest, + ::testing::Values(make_tuple(4, 4, 12, &aom_highbd_subtract_block_sse2), + make_tuple(4, 4, 12, &aom_highbd_subtract_block_c), + make_tuple(4, 8, 12, &aom_highbd_subtract_block_sse2), + make_tuple(4, 8, 12, &aom_highbd_subtract_block_c), + make_tuple(8, 4, 12, &aom_highbd_subtract_block_sse2), + make_tuple(8, 4, 12, &aom_highbd_subtract_block_c), + make_tuple(8, 8, 12, &aom_highbd_subtract_block_sse2), + make_tuple(8, 8, 12, &aom_highbd_subtract_block_c), + make_tuple(8, 16, 12, &aom_highbd_subtract_block_sse2), + make_tuple(8, 16, 12, &aom_highbd_subtract_block_c), + make_tuple(16, 8, 12, &aom_highbd_subtract_block_sse2), + make_tuple(16, 8, 12, &aom_highbd_subtract_block_c), + make_tuple(16, 16, 12, &aom_highbd_subtract_block_sse2), + make_tuple(16, 16, 12, &aom_highbd_subtract_block_c), + make_tuple(16, 32, 12, &aom_highbd_subtract_block_sse2), + make_tuple(16, 32, 12, &aom_highbd_subtract_block_c), + make_tuple(32, 16, 12, &aom_highbd_subtract_block_sse2), + make_tuple(32, 16, 12, &aom_highbd_subtract_block_c), + make_tuple(32, 32, 12, &aom_highbd_subtract_block_sse2), + make_tuple(32, 32, 12, &aom_highbd_subtract_block_c), + make_tuple(32, 64, 12, &aom_highbd_subtract_block_sse2), + make_tuple(32, 64, 12, &aom_highbd_subtract_block_c), + make_tuple(64, 32, 12, &aom_highbd_subtract_block_sse2), + make_tuple(64, 32, 12, &aom_highbd_subtract_block_c), + make_tuple(64, 64, 12, &aom_highbd_subtract_block_sse2), + make_tuple(64, 64, 12, &aom_highbd_subtract_block_c), + make_tuple(64, 128, 12, &aom_highbd_subtract_block_sse2), + make_tuple(64, 128, 12, &aom_highbd_subtract_block_c), + make_tuple(128, 64, 12, &aom_highbd_subtract_block_sse2), + make_tuple(128, 64, 12, &aom_highbd_subtract_block_c), + make_tuple(128, 128, 12, &aom_highbd_subtract_block_sse2), + make_tuple(128, 128, 12, &aom_highbd_subtract_block_c))); +#endif // HAVE_SSE2 +#endif // CONFIG_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/sum_squares_test.cc b/third_party/aom/test/sum_squares_test.cc new file mode 100644 index 000000000..b8701c196 --- /dev/null +++ b/third_party/aom/test/sum_squares_test.cc @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cmath> +#include <cstdlib> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "test/function_equivalence_test.h" + +using libaom_test::ACMRandom; +using libaom_test::FunctionEquivalenceTest; + +namespace { +const int kNumIterations = 10000; + +static const int16_t kInt13Max = (1 << 12) - 1; + +typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width, + int height); +typedef libaom_test::FuncParam<SSI16Func> TestFuncs; + +class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> { + public: + virtual ~SumSquaresTest() {} + virtual void SetUp() { params_ = this->GetParam(); } + + virtual void TearDown() { libaom_test::ClearSystemState(); } + + protected: + TestFuncs params_; +}; + +TEST_P(SumSquaresTest, OperationCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, int16_t, src[256 * 256]); + + int failed = 0; + + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + + for (int k = 0; k < kNumIterations; k++) { + int width = 4 * rnd(32); // Up to 128x128 + int height = 4 * rnd(32); // Up to 128x128 + int stride = 4 << rnd(7); // Up to 256 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd(7); + } + + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src[ii * stride + jj] = rnd(2) ? rnd(limit) : -rnd(limit); + } + } + + const uint64_t res_ref = params_.ref_func(src, stride, width, height); + uint64_t res_tst; + ASM_REGISTER_STATE_CHECK(res_tst = + params_.tst_func(src, stride, width, height)); + + if (!failed) { + failed = res_ref != res_tst; + EXPECT_EQ(res_ref, res_tst) + << "Error: Sum Squares Test" + << " C output does not match optimized output."; + } + } +} + +TEST_P(SumSquaresTest, ExtremeValues) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, int16_t, src[256 * 256]); + + int failed = 0; + + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + + for (int k = 0; k < kNumIterations; k++) { + int width = 4 * rnd(32); // Up to 128x128 + int height = 4 * rnd(32); // Up to 128x128 + int stride = 4 << rnd(7); // Up to 256 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd(7); + } + + int val = rnd(2) ? limit - 1 : -(limit - 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src[ii * stride + jj] = val; + } + } + + const uint64_t res_ref = params_.ref_func(src, stride, width, height); + uint64_t res_tst; + ASM_REGISTER_STATE_CHECK(res_tst = + params_.tst_func(src, stride, width, height)); + + if (!failed) { + failed = res_ref != res_tst; + EXPECT_EQ(res_ref, res_tst) + << "Error: Sum Squares Test" + << " C output does not match optimized output."; + } + } +} + +#if HAVE_SSE2 + +INSTANTIATE_TEST_CASE_P( + SSE2, SumSquaresTest, + ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, + &aom_sum_squares_2d_i16_sse2))); + +#endif // HAVE_SSE2 + +////////////////////////////////////////////////////////////////////////////// +// 1D version +////////////////////////////////////////////////////////////////////////////// + +typedef uint64_t (*F1D)(const int16_t *src, uint32_t N); +typedef libaom_test::FuncParam<F1D> TestFuncs1D; + +class SumSquares1DTest : public FunctionEquivalenceTest<F1D> { + protected: + static const int kIterations = 1000; + static const int kMaxSize = 256; +}; + +TEST_P(SumSquares1DTest, RandomValues) { + DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < kMaxSize * kMaxSize; ++i) + src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max; + + const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize + : rng_(kMaxSize) + 1; + + const uint64_t ref_res = params_.ref_func(src, N); + uint64_t tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(SumSquares1DTest, ExtremeValues) { + DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + if (rng_(2)) { + for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max; + } else { + for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max; + } + + const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize + : rng_(kMaxSize) + 1; + + const uint64_t ref_res = params_.ref_func(src, N); + uint64_t tst_res; + ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, SumSquares1DTest, + ::testing::Values(TestFuncs1D( + aom_sum_squares_i16_c, aom_sum_squares_i16_sse2))); + +#endif // HAVE_SSE2 +} // namespace diff --git a/third_party/aom/test/superframe_test.cc b/third_party/aom/test/superframe_test.cc new file mode 100644 index 000000000..0f54baeaf --- /dev/null +++ b/third_party/aom/test/superframe_test.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <climits> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +const int kTestMode = 0; +const int kTileCols = 1; +const int kTileRows = 2; + +typedef std::tr1::tuple<libaom_test::TestMode, int, int> SuperframeTestParam; + +class SuperframeTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<SuperframeTestParam> { + protected: + SuperframeTest() + : EncoderTest(GET_PARAM(0)), modified_buf_(NULL), last_sf_pts_(0) {} + virtual ~SuperframeTest() {} + + virtual void SetUp() { + InitializeConfig(); + const SuperframeTestParam input = GET_PARAM(1); + const libaom_test::TestMode mode = std::tr1::get<kTestMode>(input); + SetMode(mode); + sf_count_ = 0; + sf_count_max_ = INT_MAX; + n_tile_cols_ = std::tr1::get<kTileCols>(input); + n_tile_rows_ = std::tr1::get<kTileRows>(input); + } + + virtual void TearDown() { delete[] modified_buf_; } + + virtual void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_CPUUSED, 2); + encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_); + encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_); +#if CONFIG_LOOPFILTERING_ACROSS_TILES + encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0); +#endif // CONFIG_LOOPFILTERING_ACROSS_TILES + } + } + + virtual const aom_codec_cx_pkt_t *MutateEncoderOutputHook( + const aom_codec_cx_pkt_t *pkt) { + if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return pkt; + + const uint8_t *buffer = reinterpret_cast<uint8_t *>(pkt->data.frame.buf); + const uint8_t marker = buffer[pkt->data.frame.sz - 1]; + const int frames = (marker & 0x7) + 1; + const int mag = ((marker >> 3) & 3) + 1; + const unsigned int index_sz = 2 + mag * (frames - 1); + if ((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz && + buffer[pkt->data.frame.sz - index_sz] == marker) { + // frame is a superframe. strip off the index. + if (modified_buf_) delete[] modified_buf_; + modified_buf_ = new uint8_t[pkt->data.frame.sz - index_sz]; + memcpy(modified_buf_, pkt->data.frame.buf, pkt->data.frame.sz - index_sz); + modified_pkt_ = *pkt; + modified_pkt_.data.frame.buf = modified_buf_; + modified_pkt_.data.frame.sz -= index_sz; + + sf_count_++; + last_sf_pts_ = pkt->data.frame.pts; + return &modified_pkt_; + } + + // Make sure we do a few frames after the last SF + abort_ |= + sf_count_ > sf_count_max_ && pkt->data.frame.pts - last_sf_pts_ >= 5; + return pkt; + } + + int sf_count_; + int sf_count_max_; + aom_codec_cx_pkt_t modified_pkt_; + uint8_t *modified_buf_; + aom_codec_pts_t last_sf_pts_; + + private: + int n_tile_cols_; + int n_tile_rows_; +}; + +TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) { + sf_count_max_ = 0; // early exit on successful test. + cfg_.g_lag_in_frames = 25; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 40); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_EXT_REFS + // NOTE: The use of BWDREF_FRAME will enable the coding of more non-show + // frames besides ALTREF_FRAME. + EXPECT_GE(sf_count_, 1); +#else + EXPECT_EQ(sf_count_, 1); +#endif // CONFIG_EXT_REFS +} + +// The superframe index is currently mandatory with both ANS and DAALA_EC due +// to the decoder starting at the end of the buffer. +#if CONFIG_EXT_TILE +// Single tile does not work with ANS (see comment above). +#if CONFIG_ANS || CONFIG_DAALA_EC +const int tile_col_values[] = { 1, 2 }; +#else +const int tile_col_values[] = { 1, 2, 32 }; +#endif +const int tile_row_values[] = { 1, 2, 32 }; +AV1_INSTANTIATE_TEST_CASE( + SuperframeTest, + ::testing::Combine(::testing::Values(::libaom_test::kTwoPassGood), + ::testing::ValuesIn(tile_col_values), + ::testing::ValuesIn(tile_row_values))); +#else +#if !CONFIG_ANS && !CONFIG_DAALA_EC +AV1_INSTANTIATE_TEST_CASE( + SuperframeTest, + ::testing::Combine(::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Values(0), ::testing::Values(0))); +#endif // !CONFIG_ANS +#endif // CONFIG_EXT_TILE +} // namespace diff --git a/third_party/aom/test/test-data.mk b/third_party/aom/test/test-data.mk new file mode 100644 index 000000000..168144a00 --- /dev/null +++ b/third_party/aom/test/test-data.mk @@ -0,0 +1,45 @@ +LIBAOM_TEST_SRCS-yes += test-data.mk + +# Encoder test source +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv + +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m +LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv + +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += desktop_credits.y4m +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.y4m +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += rush_hour_444.y4m +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += screendata.y4m +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_640_480_30.yuv + +ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) +# Encode / Decode test +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.yuv +endif # CONFIG_DECODE_PERF_TESTS + +ifeq ($(CONFIG_ENCODE_PERF_TESTS),yes) +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += desktop_640_360_30.yuv +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += kirland_640_480_30.yuv +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += macmarcomoving_640_480_30.yuv +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += macmarcostationary_640_480_30.yuv +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.yuv +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += tacomanarrows_640_480_30.yuv +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += tacomasmallcameramovement_640_480_30.yuv +LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += thaloundeskmtg_640_480_30.yuv +endif # CONFIG_ENCODE_PERF_TESTS + +# sort and remove duplicates +LIBAOM_TEST_DATA-yes := $(sort $(LIBAOM_TEST_DATA-yes)) + diff --git a/third_party/aom/test/test-data.sha1 b/third_party/aom/test/test-data.sha1 new file mode 100644 index 000000000..3d9bfc7c4 --- /dev/null +++ b/third_party/aom/test/test-data.sha1 @@ -0,0 +1,28 @@ +d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv +b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv +a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m +0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m +ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m +c934da6fb8cc54ee2a8c17c54cf6076dac37ead0 *park_joy_90p_10_440.yuv +614c32ae1eca391e867c70d19974f0d62664dd99 *park_joy_90p_12_420.y4m +c92825f1ea25c5c37855083a69faac6ac4641a9e *park_joy_90p_12_422.y4m +b592189b885b6cc85db55cc98512a197d73d3b34 *park_joy_90p_12_444.y4m +82c1bfcca368c2f22bad7d693d690d5499ecdd11 *park_joy_90p_12_440.yuv +b9e1e90aece2be6e2c90d89e6ab2372d5f8c792d *park_joy_90p_8_420_a10-1.y4m +4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c *park_joy_90p_8_420.y4m +7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 *park_joy_90p_8_422.y4m +bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 *park_joy_90p_8_444.y4m +81e1f3843748438b8f2e71db484eb22daf72e939 *park_joy_90p_8_440.yuv +b1f1c3ec79114b9a0651af24ce634afb44a9a419 *rush_hour_444.y4m +eb438c6540eb429f74404eedfa3228d409c57874 *desktop_640_360_30.yuv +89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab *kirland_640_480_30.yuv +33c533192759e5bb4f07abfbac389dc259db4686 *macmarcomoving_640_480_30.yuv +8bfaab121080821b8f03b23467911e59ec59b8fe *macmarcostationary_640_480_30.yuv +70894878d916a599842d9ad0dcd24e10c13e5467 *niklas_640_480_30.yuv +8784b6df2d8cc946195a90ac00540500d2e522e4 *tacomanarrows_640_480_30.yuv +edd86a1f5e62fd9da9a9d46078247759c2638009 *tacomasmallcameramovement_640_480_30.yuv +9a70e8b7d14fba9234d0e51dce876635413ce444 *thaloundeskmtg_640_480_30.yuv +e7d315dbf4f3928779e0dc624311196d44491d32 *niklas_1280_720_30.yuv +717da707afcaa1f692ff1946f291054eb75a4f06 *screendata.y4m +9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m +5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m diff --git a/third_party/aom/test/test.cmake b/third_party/aom/test/test.cmake new file mode 100644 index 000000000..8d3ab7059 --- /dev/null +++ b/third_party/aom/test/test.cmake @@ -0,0 +1,315 @@ +## +## Copyright (c) 2017, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +include("${AOM_ROOT}/test/test_data_util.cmake") + +set(AOM_UNIT_TEST_WRAPPER_SOURCES + "${AOM_CONFIG_DIR}/usage_exit.c" + "${AOM_ROOT}/test/test_libaom.cc") + +set(AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/acm_random.h" + "${AOM_ROOT}/test/clear_system_state.h" + "${AOM_ROOT}/test/codec_factory.h" + "${AOM_ROOT}/test/convolve_test.cc" + "${AOM_ROOT}/test/function_equivalence_test.h" + "${AOM_ROOT}/test/md5_helper.h" + "${AOM_ROOT}/test/register_state_check.h" + "${AOM_ROOT}/test/transform_test_base.h" + "${AOM_ROOT}/test/util.h" + "${AOM_ROOT}/test/video_source.h") + +if (CONFIG_ACCOUNTING) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/accounting_test.cc") +endif () + +if (CONFIG_ADAPT_SCAN) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/scan_test.cc") +endif () + +if (CONFIG_GLOBAL_MOTION OR CONFIG_WARPED_MOTION) + if (HAVE_SSE2) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/warp_filter_test.cc" + "${AOM_ROOT}/test/warp_filter_test_util.cc" + "${AOM_ROOT}/test/warp_filter_test_util.h") + endif () +endif () + +set(AOM_UNIT_TEST_DECODER_SOURCES + "${AOM_ROOT}/test/decode_api_test.cc" + "${AOM_ROOT}/test/decode_test_driver.cc" + "${AOM_ROOT}/test/decode_test_driver.h" + "${AOM_ROOT}/test/ivf_video_source.h") + +set(AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/altref_test.cc" + "${AOM_ROOT}/test/aq_segment_test.cc" + "${AOM_ROOT}/test/datarate_test.cc" + "${AOM_ROOT}/test/dct16x16_test.cc" + "${AOM_ROOT}/test/dct32x32_test.cc" + "${AOM_ROOT}/test/encode_api_test.cc" + "${AOM_ROOT}/test/encode_test_driver.cc" + "${AOM_ROOT}/test/encode_test_driver.h" + "${AOM_ROOT}/test/error_resilience_test.cc" + "${AOM_ROOT}/test/i420_video_source.h" + "${AOM_ROOT}/test/sad_test.cc" + "${AOM_ROOT}/test/y4m_test.cc" + "${AOM_ROOT}/test/y4m_video_source.h" + "${AOM_ROOT}/test/yuv_video_source.h") + +set(AOM_DECODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/decode_perf_test.cc") +set(AOM_ENCODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/encode_perf_test.cc") +set(AOM_UNIT_TEST_WEBM_SOURCES "${AOM_ROOT}/test/webm_video_source.h") + +set(AOM_TEST_INTRA_PRED_SPEED_SOURCES + "${AOM_CONFIG_DIR}/usage_exit.c" + "${AOM_ROOT}/test/test_intra_pred_speed.cc") + +if (CONFIG_AV1) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/av1_convolve_optimz_test.cc" + "${AOM_ROOT}/test/av1_convolve_test.cc" + "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc" + "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc" + "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc" + "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc" + "${AOM_ROOT}/test/av1_txfm_test.cc" + "${AOM_ROOT}/test/av1_txfm_test.h" + "${AOM_ROOT}/test/intrapred_test.cc" + "${AOM_ROOT}/test/lpf_8_test.cc" + "${AOM_ROOT}/test/simd_cmp_impl.h") + + if (CONFIG_CDEF) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/clpf_test.cc") + endif () + + if (CONFIG_FILTER_INTRA) + if (HAVE_SSE4_1) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + # TODO: not sure if this intrinsics or a wrapper calling intrin/asm. + #"${AOM_ROOT}/test/filterintra_predictors_test.cc") + ) + endif () + endif () + + set(AOM_UNIT_TEST_COMMON_INTRIN_NEON + ${AOM_UNIT_TEST_COMMON_INTRIN_NEON} + "${AOM_ROOT}/test/simd_cmp_neon.cc" + "${AOM_ROOT}/test/simd_neon_test.cc") + set(AOM_UNIT_TEST_COMMON_INTRIN_SSE2 + ${AOM_UNIT_TEST_COMMON_INTRIN_SSE2} + "${AOM_ROOT}/test/simd_cmp_sse2.cc") + set(AOM_UNIT_TEST_COMMON_INTRIN_SSSE3 + ${AOM_UNIT_TEST_COMMON_INTRIN_SSSE3} + "${AOM_ROOT}/test/simd_cmp_ssse3.cc") + set(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1 + ${AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1} + "${AOM_ROOT}/test/simd_cmp_sse4.cc") +endif () + +if (CONFIG_AV1_ENCODER) + set(AOM_UNIT_TEST_ENCODER_SOURCES + ${AOM_UNIT_TEST_ENCODER_SOURCES} + "${AOM_ROOT}/test/active_map_test.cc" + "${AOM_ROOT}/test/arf_freq_test.cc" + "${AOM_ROOT}/test/av1_dct_test.cc" + "${AOM_ROOT}/test/av1_fht16x16_test.cc" + "${AOM_ROOT}/test/av1_fht8x8_test.cc" + "${AOM_ROOT}/test/av1_inv_txfm_test.cc" + "${AOM_ROOT}/test/avg_test.cc" + "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc" + "${AOM_ROOT}/test/blend_a64_mask_test.cc" + "${AOM_ROOT}/test/borders_test.cc" + "${AOM_ROOT}/test/cpu_speed_test.cc" + "${AOM_ROOT}/test/end_to_end_test.cc" + "${AOM_ROOT}/test/error_block_test.cc" + "${AOM_ROOT}/test/fdct4x4_test.cc" + "${AOM_ROOT}/test/fdct8x8_test.cc" + "${AOM_ROOT}/test/frame_size_tests.cc" + "${AOM_ROOT}/test/hadamard_test.cc" + "${AOM_ROOT}/test/lossless_test.cc" + "${AOM_ROOT}/test/minmax_test.cc" + "${AOM_ROOT}/test/subtract_test.cc" + "${AOM_ROOT}/test/sum_squares_test.cc" + "${AOM_ROOT}/test/variance_test.cc") + + if (CONFIG_EXT_INTER) + set(AOM_UNIT_TEST_ENCODER_SOURCES + ${AOM_UNIT_TEST_ENCODER_SOURCES} + "${AOM_ROOT}/test/av1_wedge_utils_test.cc" + "${AOM_ROOT}/test/masked_sad_test.cc" + "${AOM_ROOT}/test/masked_variance_test.cc") + endif () + + if (CONFIG_EXT_TX) + set(AOM_UNIT_TEST_ENCODER_SOURCES + ${AOM_UNIT_TEST_ENCODER_SOURCES} + "${AOM_ROOT}/test/av1_fht16x32_test.cc" + "${AOM_ROOT}/test/av1_fht16x8_test.cc" + "${AOM_ROOT}/test/av1_fht32x16_test.cc" + "${AOM_ROOT}/test/av1_fht4x4_test.cc" + "${AOM_ROOT}/test/av1_fht4x8_test.cc" + "${AOM_ROOT}/test/av1_fht8x16_test.cc" + "${AOM_ROOT}/test/av1_fht8x4_test.cc" + "${AOM_ROOT}/test/fht32x32_test.cc") + endif () + + if (CONFIG_MOTION_VAR) + set(AOM_UNIT_TEST_ENCODER_SOURCES + ${AOM_UNIT_TEST_ENCODER_SOURCES} + "${AOM_ROOT}/test/obmc_sad_test.cc" + "${AOM_ROOT}/test/obmc_variance_test.cc") + endif () +endif () + +if (CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/divu_small_test.cc" + "${AOM_ROOT}/test/ethread_test.cc" + "${AOM_ROOT}/test/idct8x8_test.cc" + "${AOM_ROOT}/test/partial_idct_test.cc" + "${AOM_ROOT}/test/superframe_test.cc" + "${AOM_ROOT}/test/binary_codes_test.cc" + "${AOM_ROOT}/test/tile_independence_test.cc") + + if (CONFIG_ANS) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/ans_codec_test.cc" + "${AOM_ROOT}/test/ans_test.cc") + else () + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/boolcoder_test.cc") + endif () + + if (CONFIG_EXT_TILE) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/av1_ext_tile_test.cc") + endif () +endif () + +if (CONFIG_HIGHBITDEPTH) + if (CONFIG_AV1) + set(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1 + ${AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1} + "${AOM_ROOT}/test/av1_highbd_iht_test.cc" + "${AOM_ROOT}/test/av1_quantize_test.cc") + endif () + + if (CONFIG_INTERNAL_STATS) + set(AOM_UNIT_TEST_COMMON_SOURCES + ${AOM_UNIT_TEST_COMMON_SOURCES} + "${AOM_ROOT}/test/hbd_metrics_test.cc") + endif () +endif () + +if (CONFIG_UNIT_TESTS) + if (MSVC) + # Force static run time to avoid collisions with googletest. + include("${AOM_ROOT}/build/cmake/msvc_runtime.cmake") + endif () + include_directories( + "${AOM_ROOT}/third_party/googletest/src/googletest/src" + "${AOM_ROOT}/third_party/googletest/src/googletest/include") + add_subdirectory("${AOM_ROOT}/third_party/googletest/src/googletest" + EXCLUDE_FROM_ALL) + + # Generate a stub file containing the C function usage_exit(); this is + # required because of the test dependency on aom_common_app_util. + # Specifically, the function die() in tools_common.c calls usage_exit() to + # terminate the program on the caller's behalf. + file(WRITE "${AOM_CONFIG_DIR}/usage_exit.c" "void usage_exit(void) {}") +endif () + +# Setup the targets for CONFIG_UNIT_TESTS. The libaom and app util targets must +# exist before this function is called. +function (setup_aom_test_targets) + add_library(test_aom_common OBJECT ${AOM_UNIT_TEST_COMMON_SOURCES}) + add_library(test_aom_decoder OBJECT ${AOM_UNIT_TEST_DECODER_SOURCES}) + add_library(test_aom_encoder OBJECT ${AOM_UNIT_TEST_ENCODER_SOURCES}) + + set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} test_aom_common test_aom_decoder + test_aom_encoder PARENT_SCOPE) + + add_executable(test_libaom ${AOM_UNIT_TEST_WRAPPER_SOURCES} + $<TARGET_OBJECTS:aom_common_app_util> + $<TARGET_OBJECTS:test_aom_common>) + + if (CONFIG_DECODERS) + target_sources(test_libaom PUBLIC + $<TARGET_OBJECTS:aom_decoder_app_util> + $<TARGET_OBJECTS:test_aom_decoder>) + + if (CONFIG_DECODE_PERF_TESTS AND CONFIG_WEBM_IO) + target_sources(test_libaom PUBLIC ${AOM_DECODE_PERF_TEST_SOURCES}) + endif () + endif () + + if (CONFIG_ENCODERS) + target_sources(test_libaom PUBLIC + $<TARGET_OBJECTS:test_aom_encoder> + $<TARGET_OBJECTS:aom_encoder_app_util>) + + if (CONFIG_ENCODE_PERF_TESTS) + target_sources(test_libaom PUBLIC ${AOM_ENCODE_PERF_TEST_SOURCES}) + endif () + endif () + + target_link_libraries(test_libaom PUBLIC aom gtest) + + add_executable(test_intra_pred_speed + ${AOM_TEST_INTRA_PRED_SPEED_SOURCES} + $<TARGET_OBJECTS:aom_common_app_util>) + target_link_libraries(test_intra_pred_speed PUBLIC aom gtest) + + if (CONFIG_LIBYUV) + target_sources(test_libaom PUBLIC $<TARGET_OBJECTS:yuv>) + endif () + if (CONFIG_WEBM_IO) + target_sources(test_libaom PUBLIC ${AOM_UNIT_TEST_WEBM_SOURCES} + $<TARGET_OBJECTS:webm>) + endif () + if (HAVE_SSE2) + add_intrinsics_source_to_target("-msse2" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_SSE2") + endif () + if (HAVE_SSSE3) + add_intrinsics_source_to_target("-mssse3" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_SSSE3") + endif () + if (HAVE_SSE4_1) + add_intrinsics_source_to_target("-msse4.1" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1") + endif () + if (HAVE_NEON) + add_intrinsics_source_to_target("${AOM_NEON_INTRIN_FLAG}" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_NEON") + endif () + + add_custom_target(testdata + COMMAND ${CMAKE_COMMAND} + -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}" + -DAOM_ROOT="${AOM_ROOT}" + -P "${AOM_ROOT}/test/test_worker.cmake" + SOURCES ${AOM_TEST_DATA_LIST}) +endfunction () diff --git a/third_party/aom/test/test.mk b/third_party/aom/test/test.mk new file mode 100644 index 000000000..fb0ab371e --- /dev/null +++ b/third_party/aom/test/test.mk @@ -0,0 +1,241 @@ +## +## Copyright (c) 2017, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## + +LIBAOM_TEST_SRCS-yes += acm_random.h +LIBAOM_TEST_SRCS-yes += clear_system_state.h +LIBAOM_TEST_SRCS-yes += codec_factory.h +LIBAOM_TEST_SRCS-yes += md5_helper.h +LIBAOM_TEST_SRCS-yes += register_state_check.h +LIBAOM_TEST_SRCS-yes += test.mk +LIBAOM_TEST_SRCS-yes += test_libaom.cc +LIBAOM_TEST_SRCS-yes += util.h +LIBAOM_TEST_SRCS-yes += video_source.h +LIBAOM_TEST_SRCS-yes += transform_test_base.h +LIBAOM_TEST_SRCS-yes += function_equivalence_test.h +LIBAOM_TEST_SRCS-yes += warp_filter_test_util.h + +## +## BLACK BOX TESTS +## +## Black box tests only use the public API. +## +LIBAOM_TEST_SRCS-yes += ../md5_utils.h ../md5_utils.c +LIBAOM_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += altref_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h +#LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += realtime_test.cc +#LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h + +#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += level_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += active_map_refresh_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += active_map_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += borders_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += cpu_speed_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += frame_size_tests.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += lossless_test.cc + +LIBAOM_TEST_SRCS-yes += decode_test_driver.cc +LIBAOM_TEST_SRCS-yes += decode_test_driver.h +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += encode_test_driver.cc +LIBAOM_TEST_SRCS-yes += encode_test_driver.h + +## IVF writing. +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += ../ivfenc.c ../ivfenc.h + +## Y4m parsing. +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_test.cc ../y4menc.c ../y4menc.h + +## WebM Parsing +ifeq ($(CONFIG_WEBM_IO), yes) +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.cc +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.cc +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.h +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.h +LIBAOM_TEST_SRCS-$(CONFIG_DECODERS) += $(LIBWEBM_PARSER_SRCS) +LIBAOM_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h +LIBAOM_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc +LIBAOM_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h +LIBAOM_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h +endif + +LIBAOM_TEST_SRCS-$(CONFIG_DECODERS) += decode_api_test.cc + +# Currently we only support decoder perf tests for av1. Also they read from WebM +# files, so WebM IO is required. +ifeq ($(CONFIG_DECODE_PERF_TESTS)$(CONFIG_AV1_DECODER)$(CONFIG_WEBM_IO), \ + yesyesyes) +LIBAOM_TEST_SRCS-yes += decode_perf_test.cc +endif + +# encode perf tests are av1 only +ifeq ($(CONFIG_ENCODE_PERF_TESTS)$(CONFIG_AV1_ENCODER), yesyes) +LIBAOM_TEST_SRCS-yes += encode_perf_test.cc +endif + +## Multi-codec / unconditional black box tests. +ifeq ($(findstring yes,$(CONFIG_AV1_ENCODER)),yes) +LIBAOM_TEST_SRCS-yes += active_map_refresh_test.cc +LIBAOM_TEST_SRCS-yes += active_map_test.cc +LIBAOM_TEST_SRCS-yes += end_to_end_test.cc +endif + +## +## WHITE BOX TESTS +## +## Whitebox tests invoke functions not exposed via the public API. Certain +## shared library builds don't make these functions accessible. +## +ifeq ($(CONFIG_SHARED),) + +## AV1 +ifeq ($(CONFIG_AV1),yes) + +# These tests require both the encoder and decoder to be built. +ifeq ($(CONFIG_AV1_ENCODER)$(CONFIG_AV1_DECODER),yesyes) +# IDCT test currently depends on FDCT function +LIBAOM_TEST_SRCS-yes += idct8x8_test.cc +LIBAOM_TEST_SRCS-yes += partial_idct_test.cc +LIBAOM_TEST_SRCS-yes += superframe_test.cc +LIBAOM_TEST_SRCS-yes += tile_independence_test.cc +LIBAOM_TEST_SRCS-yes += ethread_test.cc +LIBAOM_TEST_SRCS-yes += motion_vector_test.cc +ifneq ($(CONFIG_ANS),yes) +LIBAOM_TEST_SRCS-yes += binary_codes_test.cc +endif +ifeq ($(CONFIG_EXT_TILE),yes) +LIBAOM_TEST_SRCS-yes += av1_ext_tile_test.cc +endif +ifeq ($(CONFIG_ANS),yes) +LIBAOM_TEST_SRCS-yes += ans_test.cc +LIBAOM_TEST_SRCS-yes += ans_codec_test.cc +else +LIBAOM_TEST_SRCS-yes += boolcoder_test.cc +ifeq ($(CONFIG_ACCOUNTING),yes) +LIBAOM_TEST_SRCS-yes += accounting_test.cc +endif +endif +LIBAOM_TEST_SRCS-yes += divu_small_test.cc +#LIBAOM_TEST_SRCS-yes += encoder_parms_get_to_decoder.cc +endif + +LIBAOM_TEST_SRCS-$(CONFIG_ADAPT_SCAN) += scan_test.cc +LIBAOM_TEST_SRCS-yes += convolve_test.cc +LIBAOM_TEST_SRCS-yes += lpf_8_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_CDEF) += dering_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_CDEF) += clpf_test.cc +LIBAOM_TEST_SRCS-yes += simd_cmp_impl.h +LIBAOM_TEST_SRCS-$(HAVE_SSE2) += simd_cmp_sse2.cc +LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += simd_cmp_ssse3.cc +LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += simd_cmp_sse4.cc +LIBAOM_TEST_SRCS-$(HAVE_NEON) += simd_cmp_neon.cc +LIBAOM_TEST_SRCS-yes += simd_impl.h +LIBAOM_TEST_SRCS-$(HAVE_SSE2) += simd_sse2_test.cc +LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += simd_ssse3_test.cc +LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += simd_sse4_test.cc +LIBAOM_TEST_SRCS-$(HAVE_NEON) += simd_neon_test.cc +LIBAOM_TEST_SRCS-yes += intrapred_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_INTRABC) += intrabc_test.cc +#LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += av1_thread_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += dct16x16_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += dct32x32_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fdct4x4_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fdct8x8_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += hadamard_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += minmax_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += variance_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += error_block_test.cc +#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_quantize_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += arf_freq_test.cc + + +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_dct_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht4x4_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x8_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x16_test.cc +ifeq ($(CONFIG_EXT_TX),yes) +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht4x8_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x4_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x16_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x8_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x32_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht32x16_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fht32x32_test.cc +endif + +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += sum_squares_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_1d_test.cc + +ifeq ($(CONFIG_EXT_INTER),yes) +LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc +LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += masked_sad_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_wedge_utils_test.cc +endif + +## Skip the unit test written for 4-tap filter intra predictor, because we +## revert to 3-tap filter. +## ifeq ($(CONFIG_FILTER_INTRA),yes) +## LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += filterintra_predictors_test.cc +## endif + +ifeq ($(CONFIG_MOTION_VAR),yes) +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += obmc_sad_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += obmc_variance_test.cc +endif + +ifeq ($(CONFIG_HIGHBITDEPTH),yes) +ifeq ($(CONFIG_AV1_ENCODER),yes) +LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_quantize_test.cc +LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_highbd_iht_test.cc +endif +endif # CONFIG_HIGHBITDEPTH +endif # AV1 + +## Multi-codec / unconditional whitebox tests. + +ifeq ($(CONFIG_AV1_ENCODER),yes) +LIBAOM_TEST_SRCS-yes += avg_test.cc +endif +ifeq ($(CONFIG_INTERNAL_STATS),yes) +LIBAOM_TEST_SRCS-$(CONFIG_HIGHBITDEPTH) += hbd_metrics_test.cc +endif +LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_txfm_test.h +LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_txfm_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fwd_txfm1d_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm1d_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fwd_txfm2d_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm2d_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_test.cc +LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_optimz_test.cc +ifneq ($(findstring yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)),) +LIBAOM_TEST_SRCS-$(HAVE_SSE2) += warp_filter_test.cc warp_filter_test_util.cc +endif +ifeq ($(CONFIG_LOOP_RESTORATION),yes) +LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += selfguided_filter_test.cc +endif + +TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc +TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c + +endif # CONFIG_SHARED + +include $(SRC_PATH_BARE)/test/test-data.mk diff --git a/third_party/aom/test/test_data_util.cmake b/third_party/aom/test/test_data_util.cmake new file mode 100644 index 000000000..f096e4e12 --- /dev/null +++ b/third_party/aom/test/test_data_util.cmake @@ -0,0 +1,76 @@ +## +## Copyright (c) 2017, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## + +# Parses test/test-data.sha1 and writes captured file names and checksums to +# $out_files and $out_checksums as lists. +function (make_test_data_lists out_files out_checksums) + if (NOT AOM_TEST_DATA_LIST OR NOT EXISTS "${AOM_TEST_DATA_LIST}") + message(FATAL_ERROR "AOM_TEST_DATA_LIST (${AOM_TEST_DATA_LIST}) missing or " + "variable empty.") + endif () + + # Read test-data.sha1 into $files_and_checksums. $files_and_checksums becomes + # a list with an entry for each line from $AOM_TEST_DATA_LIST. + file(STRINGS "${AOM_TEST_DATA_LIST}" files_and_checksums) + + # Iterate over the list of lines and split it into $checksums and $filenames. + foreach (line ${files_and_checksums}) + string(FIND "${line}" " *" delim_pos) + + math(EXPR filename_pos "${delim_pos} + 2") + string(SUBSTRING "${line}" 0 ${delim_pos} checksum) + string(SUBSTRING "${line}" ${filename_pos} -1 filename) + + set(checksums ${checksums} ${checksum}) + set(filenames ${filenames} ${filename}) + endforeach () + + if (NOT checksums OR NOT filenames) + message(FATAL_ERROR "Parsing of ${AOM_TEST_DATA_LIST} failed.") + endif () + + set(${out_checksums} ${checksums} PARENT_SCOPE) + set(${out_files} ${filenames} PARENT_SCOPE) +endfunction () + +# Appends each file name in $test_files to $test_dir and adds the result path to +# $out_path_list. +function (expand_test_file_paths test_files test_dir out_path_list) + foreach (filename ${${test_files}}) + set(path_list ${path_list} "${test_dir}/${filename}") + endforeach () + set(${out_path_list} ${path_list} PARENT_SCOPE) +endfunction () + +function (check_file local_path expected_checksum out_needs_update) + if (EXISTS "${local_path}") + file(SHA1 "${local_path}" file_checksum) + else () + set(${out_needs_update} 1 PARENT_SCOPE) + return () + endif () + + if ("${file_checksum}" STREQUAL "${expected_checksum}") + unset(${out_needs_update} PARENT_SCOPE) + else () + set(${out_needs_update} 1 PARENT_SCOPE) + endif () +endfunction () + +# Downloads data from $file_url, confirms that $file_checksum matches, and +# writes it to $local_path. +function (download_test_file file_url file_checksum local_path) + message("Downloading ${file_url} ...") + file(DOWNLOAD "${file_url}" "${local_path}" + SHOW_PROGRESS + EXPECTED_HASH SHA1=${file_checksum}) + message("Download of ${file_url} complete.") +endfunction () diff --git a/third_party/aom/test/test_intra_pred_speed.cc b/third_party/aom/test/test_intra_pred_speed.cc new file mode 100644 index 000000000..c4253628e --- /dev/null +++ b/third_party/aom/test/test_intra_pred_speed.cc @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +// Test and time AOM intra-predictor functions + +#include <stdio.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/md5_helper.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem.h" +#include "aom_ports/aom_timer.h" + +// ----------------------------------------------------------------------------- + +namespace { + +typedef void (*AvxPredFunc)(uint8_t *dst, ptrdiff_t y_stride, + const uint8_t *above, const uint8_t *left); + +#if CONFIG_ALT_INTRA +const int kNumAv1IntraFuncs = 14; +#else +const int kNumAv1IntraFuncs = 13; +#endif // CONFIG_ALT_INTRA +const char *kAv1IntraPredNames[kNumAv1IntraFuncs] = { + "DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED", + "H_PRED", "D45_PRED", "D135_PRED", "D117_PRED", "D153_PRED", + "D207_PRED", "D63_PRED", "TM_PRED", +#if CONFIG_ALT_INTRA + "SMOOTH_PRED" +#endif // CONFIG_ALT_INTRA +}; + +void TestIntraPred(const char name[], AvxPredFunc const *pred_funcs, + const char *const pred_func_names[], int num_funcs, + const char *const signatures[], int /*block_size*/, + int num_pixels_per_test) { + libaom_test::ACMRandom rnd(libaom_test::ACMRandom::DeterministicSeed()); + const int kBPS = 32; + const int kTotalPixels = 32 * kBPS; + DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]); + DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]); + DECLARE_ALIGNED(16, uint8_t, left[2 * kBPS]); + DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]); + uint8_t *const above = above_mem + 16; + for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8(); + for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8(); + for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8(); + const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test); + + // Fill up bottom-left and top-right pixels. + for (int i = kBPS; i < 2 * kBPS; ++i) { + left[i] = rnd.Rand8(); + above[i] = rnd.Rand8(); + } + + for (int k = 0; k < num_funcs; ++k) { + if (pred_funcs[k] == NULL) continue; + memcpy(src, ref_src, sizeof(src)); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int num_tests = 0; num_tests < kNumTests; ++num_tests) { + pred_funcs[k](src, kBPS, above, left); + } + libaom_test::ClearSystemState(); + aom_usec_timer_mark(&timer); + const int elapsed_time = + static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000); + libaom_test::MD5 md5; + md5.Add(src, sizeof(src)); + printf("Mode %s[%12s]: %5d ms MD5: %s\n", name, pred_func_names[k], + elapsed_time, md5.Get()); + EXPECT_STREQ(signatures[k], md5.Get()); + } +} + +void TestIntraPred4(AvxPredFunc const *pred_funcs) { + static const char *const kSignatures[kNumAv1IntraFuncs] = { + "4334156168b34ab599d9b5b30f522fe9", + "bc4649d5ba47c7ff178d92e475960fb0", + "8d316e5933326dcac24e1064794b5d12", + "a27270fed024eafd762c95de85f4da51", + "c33dff000d4256c2b8f3bf9e9bab14d2", + "44d8cddc2ad8f79b8ed3306051722b4f", + "df62e96dfcb25d8a435482756a6fa990", + "ecb0d56ae5f677ea45127ce9d5c058e4", + "0b7936841f6813da818275944895b574", + "9117972ef64f91a58ff73e1731c81db2", + "46d493dccf6e5356c6f3c0c73b7dd141", + "b852f42e6c4991d415400332d567872f", +#if CONFIG_ALT_INTRA + "828c49a4248993cce4876fa26eab697f", + "718c8cee9011f92ef31f77a9a7560010" +#else + "309a618577b27c648f9c5ee45252bc8f", +#endif // CONFIG_ALT_INTRA + }; + TestIntraPred("Intra4", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs, + kSignatures, 4, 4 * 4 * kNumAv1IntraFuncs); +} + +void TestIntraPred8(AvxPredFunc const *pred_funcs) { + static const char *const kSignatures[kNumAv1IntraFuncs] = { + "7694ddeeefed887faf9d339d18850928", + "7d726b1213591b99f736be6dec65065b", + "19c5711281357a485591aaf9c96c0a67", + "ba6b66877a089e71cd938e3b8c40caac", + "802440c93317e0f8ba93fab02ef74265", + "9e09a47a15deb0b9d8372824f9805080", + "a2fd4b66e1a667a3e582588934a7e4bd", + "78339c1c60bb1d67d248ab8c4da08b7f", + "5c97d70f7d47de1882a6cd86c165c8a9", + "8182bf60688b42205acd95e59e967157", + "9d69fcaf12398e67242d3fcf5cf2267e", + "7a09adb0fa6c2bf889a99dd816622feb", +#if CONFIG_ALT_INTRA + "f6ade499c626d38eb70661184b79bc57", + "1ad5b106c79b792e514ba25e87139b5e" +#else + "815b75c8e0d91cc1ae766dc5d3e445a3", +#endif // CONFIG_ALT_INTRA + }; + TestIntraPred("Intra8", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs, + kSignatures, 8, 8 * 8 * kNumAv1IntraFuncs); +} + +void TestIntraPred16(AvxPredFunc const *pred_funcs) { + static const char *const kSignatures[kNumAv1IntraFuncs] = { + "b40dbb555d5d16a043dc361e6694fe53", + "fb08118cee3b6405d64c1fd68be878c6", + "6c190f341475c837cc38c2e566b64875", + "db5c34ccbe2c7f595d9b08b0dc2c698c", + "a62cbfd153a1f0b9fed13e62b8408a7a", + "143df5b4c89335e281103f610f5052e4", + "404944b521d16f6edd160feeeb31ff35", + "7841fae7d4d47b519322e6a03eeed9dc", + "f6ebed3f71cbcf8d6d0516ce87e11093", + "3cc480297dbfeed01a1c2d78dd03d0c5", + "fbd607f15da218c5390a5b183b634a10", + "f7063ccbc29f87303d5c3d0555b08944", +#if CONFIG_ALT_INTRA + "7adcaaa3554eb71a81fc48cb9043984b", + "c0acea4397c1b4d54a21bbcec5731dff" +#else + "b8a41aa968ec108af447af4217cba91b", +#endif // CONFIG_ALT_INTRA + }; + TestIntraPred("Intra16", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs, + kSignatures, 16, 16 * 16 * kNumAv1IntraFuncs); +} + +void TestIntraPred32(AvxPredFunc const *pred_funcs) { + static const char *const kSignatures[kNumAv1IntraFuncs] = { + "558541656d84f9ae7896db655826febe", + "b3587a1f9a01495fa38c8cd3c8e2a1bf", + "4c6501e64f25aacc55a2a16c7e8f0255", + "b3b01379ba08916ef6b1b35f7d9ad51c", + "0f1eb38b6cbddb3d496199ef9f329071", + "911c06efb9ed1c3b4c104b232b55812f", + "b4f9f177a8a259514f039cfb403a61e3", + "0a6d584a44f8db9aa7ade2e2fdb9fc9e", + "b01c9076525216925f3456f034fb6eee", + "d267e20ad9e5cd2915d1a47254d3d149", + "3c45418137114cb6cef4c7a7baf4855c", + "d520125ebd512c63c301bf67fea8e059", +#if CONFIG_ALT_INTRA + "297e8fbb5d33c29b12b228fa9d7c40a4", + "31b9296d70dd82238c87173e6d5e65fd" +#else + "9e1370c6d42e08d357d9612c93a71cfc", +#endif // CONFIG_ALT_INTRA + }; + TestIntraPred("Intra32", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs, + kSignatures, 32, 32 * 32 * kNumAv1IntraFuncs); +} + +} // namespace + +// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors +// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4. +#define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \ + d45e, d135, d117, d153, d207e, d63e, tm, smooth) \ + TEST(arch, test_func) { \ + static const AvxPredFunc aom_intra_pred[] = { \ + dc, dc_left, dc_top, dc_128, v, h, d45e, \ + d135, d117, d153, d207e, d63e, tm, smooth \ + }; \ + test_func(aom_intra_pred); \ + } + +// ----------------------------------------------------------------------------- +// 4x4 + +#if CONFIG_ALT_INTRA +#define tm_pred_func aom_paeth_predictor_4x4_c +#define smooth_pred_func aom_smooth_predictor_4x4_c +#else +#define tm_pred_func aom_tm_predictor_4x4_c +#define smooth_pred_func NULL +#endif // CONFIG_ALT_INTRA + +INTRA_PRED_TEST(C, TestIntraPred4, aom_dc_predictor_4x4_c, + aom_dc_left_predictor_4x4_c, aom_dc_top_predictor_4x4_c, + aom_dc_128_predictor_4x4_c, aom_v_predictor_4x4_c, + aom_h_predictor_4x4_c, aom_d45e_predictor_4x4_c, + aom_d135_predictor_4x4_c, aom_d117_predictor_4x4_c, + aom_d153_predictor_4x4_c, aom_d207e_predictor_4x4_c, + aom_d63e_predictor_4x4_c, tm_pred_func, smooth_pred_func) + +#undef tm_pred_func +#undef smooth_pred_func + +#if HAVE_SSE2 +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_4x4_sse2 +#endif // CONFIG_ALT_INTRA + +INTRA_PRED_TEST(SSE2, TestIntraPred4, aom_dc_predictor_4x4_sse2, + aom_dc_left_predictor_4x4_sse2, aom_dc_top_predictor_4x4_sse2, + aom_dc_128_predictor_4x4_sse2, aom_v_predictor_4x4_sse2, + aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) + +#undef tm_pred_func +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, aom_d153_predictor_4x4_ssse3, NULL, + aom_d63e_predictor_4x4_ssse3, NULL, NULL) +#endif // HAVE_SSSE3 + +#if HAVE_DSPR2 +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_4x4_dspr2 +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(DSPR2, TestIntraPred4, aom_dc_predictor_4x4_dspr2, NULL, NULL, + NULL, NULL, aom_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL, + NULL, NULL, tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_DSPR2 + +#if HAVE_NEON +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_4x4_neon +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(NEON, TestIntraPred4, aom_dc_predictor_4x4_neon, + aom_dc_left_predictor_4x4_neon, aom_dc_top_predictor_4x4_neon, + aom_dc_128_predictor_4x4_neon, aom_v_predictor_4x4_neon, + aom_h_predictor_4x4_neon, NULL, aom_d135_predictor_4x4_neon, + NULL, NULL, NULL, NULL, tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_NEON + +#if HAVE_MSA +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_4x4_msa +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(MSA, TestIntraPred4, aom_dc_predictor_4x4_msa, + aom_dc_left_predictor_4x4_msa, aom_dc_top_predictor_4x4_msa, + aom_dc_128_predictor_4x4_msa, aom_v_predictor_4x4_msa, + aom_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_MSA + +// ----------------------------------------------------------------------------- +// 8x8 + +#if CONFIG_ALT_INTRA +#define tm_pred_func aom_paeth_predictor_8x8_c +#define smooth_pred_func aom_smooth_predictor_8x8_c +#else +#define tm_pred_func aom_tm_predictor_8x8_c +#define smooth_pred_func NULL +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(C, TestIntraPred8, aom_dc_predictor_8x8_c, + aom_dc_left_predictor_8x8_c, aom_dc_top_predictor_8x8_c, + aom_dc_128_predictor_8x8_c, aom_v_predictor_8x8_c, + aom_h_predictor_8x8_c, aom_d45e_predictor_8x8_c, + aom_d135_predictor_8x8_c, aom_d117_predictor_8x8_c, + aom_d153_predictor_8x8_c, aom_d207e_predictor_8x8_c, + aom_d63e_predictor_8x8_c, tm_pred_func, smooth_pred_func) +#undef tm_pred_func +#undef smooth_pred_func + +#if HAVE_SSE2 +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_8x8_sse2 +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(SSE2, TestIntraPred8, aom_dc_predictor_8x8_sse2, + aom_dc_left_predictor_8x8_sse2, aom_dc_top_predictor_8x8_sse2, + aom_dc_128_predictor_8x8_sse2, aom_v_predictor_8x8_sse2, + aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, aom_d153_predictor_8x8_ssse3, NULL, NULL, NULL, + NULL) +#endif // HAVE_SSSE3 + +#if HAVE_DSPR2 +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_8x8_dspr2 +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(DSPR2, TestIntraPred8, aom_dc_predictor_8x8_dspr2, NULL, NULL, + NULL, NULL, aom_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL, + NULL, NULL, tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_DSPR2 + +#if HAVE_NEON +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_8x8_neon +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(NEON, TestIntraPred8, aom_dc_predictor_8x8_neon, + aom_dc_left_predictor_8x8_neon, aom_dc_top_predictor_8x8_neon, + aom_dc_128_predictor_8x8_neon, aom_v_predictor_8x8_neon, + aom_h_predictor_8x8_neon, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_NEON + +#if HAVE_MSA +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_8x8_msa +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(MSA, TestIntraPred8, aom_dc_predictor_8x8_msa, + aom_dc_left_predictor_8x8_msa, aom_dc_top_predictor_8x8_msa, + aom_dc_128_predictor_8x8_msa, aom_v_predictor_8x8_msa, + aom_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_MSA + +// ----------------------------------------------------------------------------- +// 16x16 + +#if CONFIG_ALT_INTRA +#define tm_pred_func aom_paeth_predictor_16x16_c +#define smooth_pred_func aom_smooth_predictor_16x16_c +#else +#define tm_pred_func aom_tm_predictor_16x16_c +#define smooth_pred_func NULL +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(C, TestIntraPred16, aom_dc_predictor_16x16_c, + aom_dc_left_predictor_16x16_c, aom_dc_top_predictor_16x16_c, + aom_dc_128_predictor_16x16_c, aom_v_predictor_16x16_c, + aom_h_predictor_16x16_c, aom_d45e_predictor_16x16_c, + aom_d135_predictor_16x16_c, aom_d117_predictor_16x16_c, + aom_d153_predictor_16x16_c, aom_d207e_predictor_16x16_c, + aom_d63e_predictor_16x16_c, tm_pred_func, smooth_pred_func) +#undef tm_pred_func +#undef smooth_pred_func + +#if HAVE_SSE2 +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_16x16_sse2 +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(SSE2, TestIntraPred16, aom_dc_predictor_16x16_sse2, + aom_dc_left_predictor_16x16_sse2, + aom_dc_top_predictor_16x16_sse2, + aom_dc_128_predictor_16x16_sse2, aom_v_predictor_16x16_sse2, + aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, aom_d153_predictor_16x16_ssse3, NULL, NULL, + NULL, NULL) +#endif // HAVE_SSSE3 + +#if HAVE_DSPR2 +INTRA_PRED_TEST(DSPR2, TestIntraPred16, aom_dc_predictor_16x16_dspr2, NULL, + NULL, NULL, NULL, aom_h_predictor_16x16_dspr2, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL) +#endif // HAVE_DSPR2 + +#if HAVE_NEON +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_16x16_neon +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(NEON, TestIntraPred16, aom_dc_predictor_16x16_neon, + aom_dc_left_predictor_16x16_neon, + aom_dc_top_predictor_16x16_neon, + aom_dc_128_predictor_16x16_neon, aom_v_predictor_16x16_neon, + aom_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_NEON + +#if HAVE_MSA +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_16x16_msa +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(MSA, TestIntraPred16, aom_dc_predictor_16x16_msa, + aom_dc_left_predictor_16x16_msa, aom_dc_top_predictor_16x16_msa, + aom_dc_128_predictor_16x16_msa, aom_v_predictor_16x16_msa, + aom_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_MSA + +// ----------------------------------------------------------------------------- +// 32x32 + +#if CONFIG_ALT_INTRA +#define tm_pred_func aom_paeth_predictor_32x32_c +#define smooth_pred_func aom_smooth_predictor_32x32_c +#else +#define tm_pred_func aom_tm_predictor_32x32_c +#define smooth_pred_func NULL +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(C, TestIntraPred32, aom_dc_predictor_32x32_c, + aom_dc_left_predictor_32x32_c, aom_dc_top_predictor_32x32_c, + aom_dc_128_predictor_32x32_c, aom_v_predictor_32x32_c, + aom_h_predictor_32x32_c, aom_d45e_predictor_32x32_c, + aom_d135_predictor_32x32_c, aom_d117_predictor_32x32_c, + aom_d153_predictor_32x32_c, aom_d207e_predictor_32x32_c, + aom_d63e_predictor_32x32_c, tm_pred_func, smooth_pred_func) +#undef tm_pred_func +#undef smooth_pred_func + +#if HAVE_SSE2 +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_32x32_sse2 +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(SSE2, TestIntraPred32, aom_dc_predictor_32x32_sse2, + aom_dc_left_predictor_32x32_sse2, + aom_dc_top_predictor_32x32_sse2, + aom_dc_128_predictor_32x32_sse2, aom_v_predictor_32x32_sse2, + aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, aom_d153_predictor_32x32_ssse3, NULL, NULL, + NULL, NULL) +#endif // HAVE_SSSE3 + +#if HAVE_NEON +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_32x32_neon +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(NEON, TestIntraPred32, aom_dc_predictor_32x32_neon, + aom_dc_left_predictor_32x32_neon, + aom_dc_top_predictor_32x32_neon, + aom_dc_128_predictor_32x32_neon, aom_v_predictor_32x32_neon, + aom_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_NEON + +#if HAVE_MSA +#if CONFIG_ALT_INTRA +#define tm_pred_func NULL +#else +#define tm_pred_func aom_tm_predictor_32x32_msa +#endif // CONFIG_ALT_INTRA +INTRA_PRED_TEST(MSA, TestIntraPred32, aom_dc_predictor_32x32_msa, + aom_dc_left_predictor_32x32_msa, aom_dc_top_predictor_32x32_msa, + aom_dc_128_predictor_32x32_msa, aom_v_predictor_32x32_msa, + aom_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL, NULL, + tm_pred_func, NULL) +#undef tm_pred_func +#endif // HAVE_MSA + +#include "test/test_libaom.cc" diff --git a/third_party/aom/test/test_libaom.cc b/third_party/aom/test/test_libaom.cc new file mode 100644 index 000000000..6d83ce66e --- /dev/null +++ b/third_party/aom/test/test_libaom.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#if ARCH_X86 || ARCH_X86_64 +#include "aom_ports/x86.h" +#endif +extern "C" { +#if CONFIG_AV1 +extern void av1_rtcd(); +#endif // CONFIG_AV1 +extern void aom_dsp_rtcd(); +extern void aom_scale_rtcd(); +} + +#if ARCH_X86 || ARCH_X86_64 +static void append_negative_gtest_filter(const char *str) { + std::string filter = ::testing::FLAGS_gtest_filter; + // Negative patterns begin with one '-' followed by a ':' separated list. + if (filter.find('-') == std::string::npos) filter += '-'; + filter += str; + ::testing::FLAGS_gtest_filter = filter; +} +#endif // ARCH_X86 || ARCH_X86_64 + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + +#if ARCH_X86 || ARCH_X86_64 + const int simd_caps = x86_simd_caps(); + if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter(":MMX.*:MMX/*"); + if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter(":SSE.*:SSE/*"); + if (!(simd_caps & HAS_SSE2)) append_negative_gtest_filter(":SSE2.*:SSE2/*"); + if (!(simd_caps & HAS_SSE3)) append_negative_gtest_filter(":SSE3.*:SSE3/*"); + if (!(simd_caps & HAS_SSSE3)) + append_negative_gtest_filter(":SSSE3.*:SSSE3/*"); + if (!(simd_caps & HAS_SSE4_1)) + append_negative_gtest_filter(":SSE4_1.*:SSE4_1/*"); + if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter(":AVX.*:AVX/*"); + if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter(":AVX2.*:AVX2/*"); +#endif // ARCH_X86 || ARCH_X86_64 + +#if !CONFIG_SHARED +// Shared library builds don't support whitebox tests +// that exercise internal symbols. + +#if CONFIG_AV1 + av1_rtcd(); +#endif // CONFIG_AV1 + aom_dsp_rtcd(); + aom_scale_rtcd(); +#endif // !CONFIG_SHARED + + return RUN_ALL_TESTS(); +} diff --git a/third_party/aom/test/test_worker.cmake b/third_party/aom/test/test_worker.cmake new file mode 100644 index 000000000..fa1d58130 --- /dev/null +++ b/third_party/aom/test/test_worker.cmake @@ -0,0 +1,49 @@ +## +## Copyright (c) 2017, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +if (NOT AOM_ROOT OR NOT AOM_CONFIG_DIR) + message(FATAL_ERROR "AOM_ROOT AND AOM_CONFIG_DIR must be defined.") +endif () + +set(AOM_TEST_DATA_LIST "${AOM_ROOT}/test/test-data.sha1") +set(AOM_TEST_DATA_URL "http://downloads.webmproject.org/test_data/libvpx") +set(AOM_TEST_DATA_PATH "$ENV{LIBAOM_TEST_DATA_PATH}") + +include("${AOM_ROOT}/test/test_data_util.cmake") + +if (${AOM_TEST_DATA_PATH} STREQUAL "") + message(WARNING "Writing test data to ${AOM_CONFIG_DIR}, set " + "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.") + set(AOM_TEST_DATA_PATH "${AOM_CONFIG_DIR}") +endif () + +if (NOT EXISTS "${AOM_TEST_DATA_PATH}") + file(MAKE_DIRECTORY "${AOM_TEST_DATA_PATH}") +endif () + +make_test_data_lists("AOM_TEST_DATA_FILES" "AOM_TEST_DATA_CHECKSUMS") +expand_test_file_paths("AOM_TEST_DATA_FILES" "${AOM_TEST_DATA_PATH}" + "AOM_TEST_DATA_FILE_PATHS") +expand_test_file_paths("AOM_TEST_DATA_FILES" "${AOM_TEST_DATA_URL}" + "AOM_TEST_DATA_URLS") +list(LENGTH AOM_TEST_DATA_FILES num_files) +math(EXPR num_files "${num_files} - 1") + +foreach (file_num RANGE ${num_files}) + list(GET AOM_TEST_DATA_FILES ${file_num} filename) + list(GET AOM_TEST_DATA_CHECKSUMS ${file_num} checksum) + list(GET AOM_TEST_DATA_FILE_PATHS ${file_num} filepath) + list(GET AOM_TEST_DATA_URLS ${file_num} url) + + check_file("${filepath}" "${checksum}" "needs_download") + if (needs_download) + download_test_file("${url}" "${checksum}" "${filepath}") + endif () +endforeach () diff --git a/third_party/aom/test/tile_independence_test.cc b/third_party/aom/test/tile_independence_test.cc new file mode 100644 index 000000000..a29051f2f --- /dev/null +++ b/third_party/aom/test/tile_independence_test.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <cstdio> +#include <cstdlib> +#include <string> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/md5_helper.h" +#include "aom_mem/aom_mem.h" + +namespace { +class TileIndependenceTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWith2Params<int, int> { + protected: + TileIndependenceTest() + : EncoderTest(GET_PARAM(0)), md5_fw_order_(), md5_inv_order_(), + n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = 704; + cfg.h = 144; + cfg.threads = 1; + fw_dec_ = codec_->CreateDecoder(cfg, 0); + inv_dec_ = codec_->CreateDecoder(cfg, 0); + inv_dec_->Control(AV1_INVERT_TILE_DECODE_ORDER, 1); + +#if CONFIG_AV1 && CONFIG_EXT_TILE + if (fw_dec_->IsAV1() && inv_dec_->IsAV1()) { + fw_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1); + fw_dec_->Control(AV1_SET_DECODE_TILE_COL, -1); + inv_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1); + inv_dec_->Control(AV1_SET_DECODE_TILE_COL, -1); + } +#endif + } + + virtual ~TileIndependenceTest() { + delete fw_dec_; + delete inv_dec_; + } + + virtual void SetUp() { + InitializeConfig(); + SetMode(libaom_test::kTwoPassGood); + } + + virtual void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_); + encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_); +#if CONFIG_EXT_TILE + encoder->Control(AV1E_SET_TILE_ENCODING_MODE, 0); // TILE_NORMAL +#endif // CONFIG_EXT_TILE +#if CONFIG_LOOPFILTERING_ACROSS_TILES + encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0); +#endif // CONFIG_LOOPFILTERING_ACROSS_TILES + SetCpuUsed(encoder); + } + } + + virtual void SetCpuUsed(libaom_test::Encoder *encoder) { + static const int kCpuUsed = 3; + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); + } + + void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt, + ::libaom_test::MD5 *md5) { + const aom_codec_err_t res = dec->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = dec->GetDxData().Next(); + md5->Add(img); + } + + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + UpdateMD5(fw_dec_, pkt, &md5_fw_order_); + UpdateMD5(inv_dec_, pkt, &md5_inv_order_); + } + + void DoTest() { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 500; + cfg_.g_lag_in_frames = 12; + cfg_.rc_end_usage = AOM_VBR; + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576, + timebase.den, timebase.num, 0, 5); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + const char *md5_fw_str = md5_fw_order_.Get(); + const char *md5_inv_str = md5_inv_order_.Get(); + ASSERT_STREQ(md5_fw_str, md5_inv_str); + } + + ::libaom_test::MD5 md5_fw_order_, md5_inv_order_; + ::libaom_test::Decoder *fw_dec_, *inv_dec_; + + private: + int n_tile_cols_; + int n_tile_rows_; +}; + +// run an encode with 2 or 4 tiles, and do the decode both in normal and +// inverted tile ordering. Ensure that the MD5 of the output in both cases +// is identical. If so, tiles are considered independent and the test passes. +TEST_P(TileIndependenceTest, MD5Match) { DoTest(); } + +class TileIndependenceTestLarge : public TileIndependenceTest { + virtual void SetCpuUsed(libaom_test::Encoder *encoder) { + static const int kCpuUsed = 0; + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); + } +}; + +TEST_P(TileIndependenceTestLarge, MD5Match) { DoTest(); } + +#if CONFIG_EXT_TILE +AV1_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Values(1, 2, 32), + ::testing::Values(1, 2, 32)); +AV1_INSTANTIATE_TEST_CASE(TileIndependenceTestLarge, + ::testing::Values(1, 2, 32), + ::testing::Values(1, 2, 32)); +#else +AV1_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Values(0, 1), + ::testing::Values(0, 1)); +AV1_INSTANTIATE_TEST_CASE(TileIndependenceTestLarge, ::testing::Values(0, 1), + ::testing::Values(0, 1)); +#endif // CONFIG_EXT_TILE +} // namespace diff --git a/third_party/aom/test/tools_common.sh b/third_party/aom/test/tools_common.sh new file mode 100755 index 000000000..254e6b296 --- /dev/null +++ b/third_party/aom/test/tools_common.sh @@ -0,0 +1,454 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file contains shell code shared by test scripts for libaom tools. + +# Use $AOM_TEST_TOOLS_COMMON_SH as a pseudo include guard. +if [ -z "${AOM_TEST_TOOLS_COMMON_SH}" ]; then +AOM_TEST_TOOLS_COMMON_SH=included + +set -e +devnull='> /dev/null 2>&1' +AOM_TEST_PREFIX="" + +elog() { + echo "$@" 1>&2 +} + +vlog() { + if [ "${AOM_TEST_VERBOSE_OUTPUT}" = "yes" ]; then + echo "$@" + fi +} + +# Sets $AOM_TOOL_TEST to the name specified by positional parameter one. +test_begin() { + AOM_TOOL_TEST="${1}" +} + +# Clears the AOM_TOOL_TEST variable after confirming that $AOM_TOOL_TEST matches +# positional parameter one. +test_end() { + if [ "$1" != "${AOM_TOOL_TEST}" ]; then + echo "FAIL completed test mismatch!." + echo " completed test: ${1}" + echo " active test: ${AOM_TOOL_TEST}." + return 1 + fi + AOM_TOOL_TEST='<unset>' +} + +# Echoes the target configuration being tested. +test_configuration_target() { + aom_config_mk="${LIBAOM_CONFIG_PATH}/config.mk" + # Find the TOOLCHAIN line, split it using ':=' as the field separator, and + # print the last field to get the value. Then pipe the value to tr to consume + # any leading/trailing spaces while allowing tr to echo the output to stdout. + awk -F ':=' '/TOOLCHAIN/ { print $NF }' "${aom_config_mk}" | tr -d ' ' +} + +# Trap function used for failure reports and tool output directory removal. +# When the contents of $AOM_TOOL_TEST do not match the string '<unset>', reports +# failure of test stored in $AOM_TOOL_TEST. +cleanup() { + if [ -n "${AOM_TOOL_TEST}" ] && [ "${AOM_TOOL_TEST}" != '<unset>' ]; then + echo "FAIL: $AOM_TOOL_TEST" + fi + if [ -n "${AOM_TEST_OUTPUT_DIR}" ] && [ -d "${AOM_TEST_OUTPUT_DIR}" ]; then + rm -rf "${AOM_TEST_OUTPUT_DIR}" + fi +} + +# Echoes the git hash portion of the VERSION_STRING variable defined in +# $LIBAOM_CONFIG_PATH/config.mk to stdout, or the version number string when +# no git hash is contained in VERSION_STRING. +config_hash() { + aom_config_mk="${LIBAOM_CONFIG_PATH}/config.mk" + # Find VERSION_STRING line, split it with "-g" and print the last field to + # output the git hash to stdout. + aom_version=$(awk -F -g '/VERSION_STRING/ {print $NF}' "${aom_config_mk}") + # Handle two situations here: + # 1. The default case: $aom_version is a git hash, so echo it unchanged. + # 2. When being run a non-dev tree, the -g portion is not present in the + # version string: It's only the version number. + # In this case $aom_version is something like 'VERSION_STRING=v1.3.0', so + # we echo only what is after the '='. + echo "${aom_version##*=}" +} + +# Echoes the short form of the current git hash. +current_hash() { + if git --version > /dev/null 2>&1; then + (cd "$(dirname "${0}")" + git rev-parse --short HEAD) + else + # Return the config hash if git is unavailable: Fail silently, git hashes + # are used only for warnings. + config_hash + fi +} + +# Echoes warnings to stdout when git hash in aom_config.h does not match the +# current git hash. +check_git_hashes() { + hash_at_configure_time=$(config_hash) + hash_now=$(current_hash) + + if [ "${hash_at_configure_time}" != "${hash_now}" ]; then + echo "Warning: git hash has changed since last configure." + fi +} + +# $1 is the name of an environment variable containing a directory name to +# test. +test_env_var_dir() { + local dir=$(eval echo "\${$1}") + if [ ! -d "${dir}" ]; then + elog "'${dir}': No such directory" + elog "The $1 environment variable must be set to a valid directory." + return 1 + fi +} + +# This script requires that the LIBAOM_BIN_PATH, LIBAOM_CONFIG_PATH, and +# LIBAOM_TEST_DATA_PATH variables are in the environment: Confirm that +# the variables are set and that they all evaluate to directory paths. +verify_aom_test_environment() { + test_env_var_dir "LIBAOM_BIN_PATH" \ + && test_env_var_dir "LIBAOM_CONFIG_PATH" \ + && test_env_var_dir "LIBAOM_TEST_DATA_PATH" +} + +# Greps aom_config.h in LIBAOM_CONFIG_PATH for positional parameter one, which +# should be a LIBAOM preprocessor flag. Echoes yes to stdout when the feature +# is available. +aom_config_option_enabled() { + aom_config_option="${1}" + aom_config_file="${LIBAOM_CONFIG_PATH}/aom_config.h" + config_line=$(grep "${aom_config_option}" "${aom_config_file}") + if echo "${config_line}" | egrep -q '1$'; then + echo yes + fi +} + +# Echoes yes when output of test_configuration_target() contains win32 or win64. +is_windows_target() { + if test_configuration_target \ + | grep -q -e win32 -e win64 > /dev/null 2>&1; then + echo yes + fi +} + +# Echoes path to $1 when it's executable and exists in ${LIBAOM_BIN_PATH}, or an +# empty string. Caller is responsible for testing the string once the function +# returns. +aom_tool_path() { + local readonly tool_name="$1" + local tool_path="${LIBAOM_BIN_PATH}/${tool_name}${AOM_TEST_EXE_SUFFIX}" + if [ ! -x "${tool_path}" ]; then + # Try one directory up: when running via examples.sh the tool could be in + # the parent directory of $LIBAOM_BIN_PATH. + tool_path="${LIBAOM_BIN_PATH}/../${tool_name}${AOM_TEST_EXE_SUFFIX}" + fi + + if [ ! -x "${tool_path}" ]; then + tool_path="" + fi + echo "${tool_path}" +} + +# Echoes yes to stdout when the file named by positional parameter one exists +# in LIBAOM_BIN_PATH, and is executable. +aom_tool_available() { + local tool_name="$1" + local tool="${LIBAOM_BIN_PATH}/${tool_name}${AOM_TEST_EXE_SUFFIX}" + [ -x "${tool}" ] && echo yes +} + +# Echoes yes to stdout when aom_config_option_enabled() reports yes for +# CONFIG_AV1_DECODER. +av1_decode_available() { + [ "$(aom_config_option_enabled CONFIG_AV1_DECODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when aom_config_option_enabled() reports yes for +# CONFIG_AV1_ENCODER. +av1_encode_available() { + [ "$(aom_config_option_enabled CONFIG_AV1_ENCODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when aom_config_option_enabled() reports yes for +# CONFIG_WEBM_IO. +webm_io_available() { + [ "$(aom_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes +} + +# Filters strings from $1 using the filter specified by $2. Filter behavior +# depends on the presence of $3. When $3 is present, strings that match the +# filter are excluded. When $3 is omitted, strings matching the filter are +# included. +# The filtered result is echoed to stdout. +filter_strings() { + strings=${1} + filter=${2} + exclude=${3} + + if [ -n "${exclude}" ]; then + # When positional parameter three exists the caller wants to remove strings. + # Tell grep to invert matches using the -v argument. + exclude='-v' + else + unset exclude + fi + + if [ -n "${filter}" ]; then + for s in ${strings}; do + if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then + filtered_strings="${filtered_strings} ${s}" + fi + done + else + filtered_strings="${strings}" + fi + echo "${filtered_strings}" +} + +# Runs user test functions passed via positional parameters one and two. +# Functions in positional parameter one are treated as environment verification +# functions and are run unconditionally. Functions in positional parameter two +# are run according to the rules specified in aom_test_usage(). +run_tests() { + local env_tests="verify_aom_test_environment $1" + local tests_to_filter="$2" + local test_name="${AOM_TEST_NAME}" + + if [ -z "${test_name}" ]; then + test_name="$(basename "${0%.*}")" + fi + + if [ "${AOM_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then + # Filter out DISABLED tests. + tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude) + fi + + if [ -n "${AOM_TEST_FILTER}" ]; then + # Remove tests not matching the user's filter. + tests_to_filter=$(filter_strings "${tests_to_filter}" ${AOM_TEST_FILTER}) + fi + + # User requested test listing: Dump test names and return. + if [ "${AOM_TEST_LIST_TESTS}" = "yes" ]; then + for test_name in $tests_to_filter; do + echo ${test_name} + done + return + fi + + # Don't bother with the environment tests if everything else was disabled. + [ -z "${tests_to_filter}" ] && return + + # Combine environment and actual tests. + local tests_to_run="${env_tests} ${tests_to_filter}" + + check_git_hashes + + # Run tests. + for test in ${tests_to_run}; do + test_begin "${test}" + vlog " RUN ${test}" + "${test}" + vlog " PASS ${test}" + test_end "${test}" + done + + local tested_config="$(test_configuration_target) @ $(current_hash)" + echo "${test_name}: Done, all tests pass for ${tested_config}." +} + +aom_test_usage() { +cat << EOF + Usage: ${0##*/} [arguments] + --bin-path <path to libaom binaries directory> + --config-path <path to libaom config directory> + --filter <filter>: User test filter. Only tests matching filter are run. + --run-disabled-tests: Run disabled tests. + --help: Display this message and exit. + --test-data-path <path to libaom test data directory> + --show-program-output: Shows output from all programs being tested. + --prefix: Allows for a user specified prefix to be inserted before all test + programs. Grants the ability, for example, to run test programs + within valgrind. + --list-tests: List all test names and exit without actually running tests. + --verbose: Verbose output. + + When the --bin-path option is not specified the script attempts to use + \$LIBAOM_BIN_PATH and then the current directory. + + When the --config-path option is not specified the script attempts to use + \$LIBAOM_CONFIG_PATH and then the current directory. + + When the -test-data-path option is not specified the script attempts to use + \$LIBAOM_TEST_DATA_PATH and then the current directory. +EOF +} + +# Returns non-zero (failure) when required environment variables are empty +# strings. +aom_test_check_environment() { + if [ -z "${LIBAOM_BIN_PATH}" ] || \ + [ -z "${LIBAOM_CONFIG_PATH}" ] || \ + [ -z "${LIBAOM_TEST_DATA_PATH}" ]; then + return 1 + fi +} + +# Echo aomenc command line parameters allowing use of a raw yuv file as +# input to aomenc. +yuv_raw_input() { + echo ""${YUV_RAW_INPUT}" + --width="${YUV_RAW_INPUT_WIDTH}" + --height="${YUV_RAW_INPUT_HEIGHT}"" +} + +# Do a small encode for testing decoders. +encode_yuv_raw_input_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + local readonly output="$1" + local readonly encoder="$(aom_tool_path aomenc)" + shift + eval "${encoder}" $(yuv_raw_input) \ + --codec=av1 \ + $@ \ + --limit=5 \ + --output="${output}" \ + ${devnull} + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +# Parse the command line. +while [ -n "$1" ]; do + case "$1" in + --bin-path) + LIBAOM_BIN_PATH="$2" + shift + ;; + --config-path) + LIBAOM_CONFIG_PATH="$2" + shift + ;; + --filter) + AOM_TEST_FILTER="$2" + shift + ;; + --run-disabled-tests) + AOM_TEST_RUN_DISABLED_TESTS=yes + ;; + --help) + aom_test_usage + exit + ;; + --test-data-path) + LIBAOM_TEST_DATA_PATH="$2" + shift + ;; + --prefix) + AOM_TEST_PREFIX="$2" + shift + ;; + --verbose) + AOM_TEST_VERBOSE_OUTPUT=yes + ;; + --show-program-output) + devnull= + ;; + --list-tests) + AOM_TEST_LIST_TESTS=yes + ;; + *) + aom_test_usage + exit 1 + ;; + esac + shift +done + +# Handle running the tests from a build directory without arguments when running +# the tests on *nix/macosx. +LIBAOM_BIN_PATH="${LIBAOM_BIN_PATH:-.}" +LIBAOM_CONFIG_PATH="${LIBAOM_CONFIG_PATH:-.}" +LIBAOM_TEST_DATA_PATH="${LIBAOM_TEST_DATA_PATH:-.}" + +# Create a temporary directory for output files, and a trap to clean it up. +if [ -n "${TMPDIR}" ]; then + AOM_TEST_TEMP_ROOT="${TMPDIR}" +elif [ -n "${TEMPDIR}" ]; then + AOM_TEST_TEMP_ROOT="${TEMPDIR}" +else + AOM_TEST_TEMP_ROOT=/tmp +fi + +AOM_TEST_OUTPUT_DIR="${AOM_TEST_TEMP_ROOT}/aom_test_$$" + +if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \ + [ ! -d "${AOM_TEST_OUTPUT_DIR}" ]; then + echo "${0##*/}: Cannot create output directory, giving up." + echo "${0##*/}: AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}" + exit 1 +fi + +if [ "$(is_windows_target)" = "yes" ]; then + AOM_TEST_EXE_SUFFIX=".exe" +fi + +# Variables shared by tests. +VP8_IVF_FILE="${LIBAOM_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf" +AV1_IVF_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf" + +AV1_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm" +AV1_FPM_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm" +AV1_LT_50_FRAMES_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-02-size-32x08.webm" + +YUV_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/hantro_collage_w352h288.yuv" +YUV_RAW_INPUT_WIDTH=352 +YUV_RAW_INPUT_HEIGHT=288 + +Y4M_NOSQ_PAR_INPUT="${LIBAOM_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m" +Y4M_720P_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_1280_720_30.y4m" + +# Setup a trap function to clean up after tests complete. +trap cleanup EXIT + +vlog "$(basename "${0%.*}") test configuration: + LIBAOM_BIN_PATH=${LIBAOM_BIN_PATH} + LIBAOM_CONFIG_PATH=${LIBAOM_CONFIG_PATH} + LIBAOM_TEST_DATA_PATH=${LIBAOM_TEST_DATA_PATH} + AOM_IVF_FILE=${AOM_IVF_FILE} + AV1_IVF_FILE=${AV1_IVF_FILE} + AV1_WEBM_FILE=${AV1_WEBM_FILE} + AOM_TEST_EXE_SUFFIX=${AOM_TEST_EXE_SUFFIX} + AOM_TEST_FILTER=${AOM_TEST_FILTER} + AOM_TEST_LIST_TESTS=${AOM_TEST_LIST_TESTS} + AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR} + AOM_TEST_PREFIX=${AOM_TEST_PREFIX} + AOM_TEST_RUN_DISABLED_TESTS=${AOM_TEST_RUN_DISABLED_TESTS} + AOM_TEST_SHOW_PROGRAM_OUTPUT=${AOM_TEST_SHOW_PROGRAM_OUTPUT} + AOM_TEST_TEMP_ROOT=${AOM_TEST_TEMP_ROOT} + AOM_TEST_VERBOSE_OUTPUT=${AOM_TEST_VERBOSE_OUTPUT} + YUV_RAW_INPUT=${YUV_RAW_INPUT} + YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH} + YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT} + Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}" + +fi # End $AOM_TEST_TOOLS_COMMON_SH pseudo include guard. diff --git a/third_party/aom/test/transform_test_base.h b/third_party/aom/test/transform_test_base.h new file mode 100644 index 000000000..4c1a55496 --- /dev/null +++ b/third_party/aom/test/transform_test_base.h @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TEST_TRANSFORM_TEST_BASE_H_ +#define TEST_TRANSFORM_TEST_BASE_H_ + +#include "./aom_config.h" +#include "aom_mem/aom_mem.h" +#include "aom/aom_codec.h" + +namespace libaom_test { + +// Note: +// Same constant are defined in av1/common/av1_entropy.h and +// av1/common/entropy.h. Goal is to make this base class +// to use for future codec transform testing. But including +// either of them would lead to compiling error when we do +// unit test for another codec. Suggest to move the definition +// to a aom header file. +const int kDctMaxValue = 16384; + +typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, + int tx_type); + +typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type); + +class TransformTestBase { + public: + virtual ~TransformTestBase() {} + + protected: + virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0; + + virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0; + + void RunAccuracyCheck(uint32_t ref_max_error, double ref_avg_error) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + uint32_t max_error = 0; + int64_t total_error = 0; + const int count_test_block = 10000; + + int16_t *test_input_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + tran_low_t *test_temp_block = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(tran_low_t) * num_coeffs_)); + uint8_t *dst = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * num_coeffs_)); + uint8_t *src = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * num_coeffs_)); +#if CONFIG_HIGHBITDEPTH + uint16_t *dst16 = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * num_coeffs_)); + uint16_t *src16 = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * num_coeffs_)); +#endif + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < num_coeffs_; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + test_input_block[j] = src16[j] - dst16[j]; +#endif + } + } + + ASM_REGISTER_STATE_CHECK( + RunFwdTxfm(test_input_block, test_temp_block, pitch_)); + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif + } + + for (int j = 0; j < num_coeffs_; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + ASSERT_EQ(AOM_BITS_8, bit_depth_); + const int diff = dst[j] - src[j]; +#endif + const uint32_t error = diff * diff; + if (max_error < error) max_error = error; + total_error += error; + } + } + + double avg_error = total_error * 1. / count_test_block / num_coeffs_; + + EXPECT_GE(ref_max_error, max_error) + << "Error: FHT/IHT has an individual round trip error > " + << ref_max_error; + + EXPECT_GE(ref_avg_error, avg_error) + << "Error: FHT/IHT has average round trip error > " << ref_avg_error + << " per block"; + + aom_free(test_input_block); + aom_free(test_temp_block); + aom_free(dst); + aom_free(src); +#if CONFIG_HIGHBITDEPTH + aom_free(dst16); + aom_free(src16); +#endif + } + + void RunCoeffCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + + // Use a stride value which is not the width of any transform, to catch + // cases where the transforms use the stride incorrectly. + int stride = 96; + + int16_t *input_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * stride * height_)); + tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(tran_low_t) * num_coeffs_)); + tran_low_t *output_block = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(tran_low_t) * num_coeffs_)); + + for (int i = 0; i < count_test_block; ++i) { + int j, k; + for (j = 0; j < height_; ++j) { + for (k = 0; k < pitch_; ++k) { + int in_idx = j * stride + k; + int out_idx = j * pitch_ + k; + input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + if (bit_depth_ == AOM_BITS_8) { + output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8(); +#if CONFIG_HIGHBITDEPTH + } else { + output_block[out_idx] = output_ref_block[out_idx] = + rnd.Rand16() & mask_; +#endif + } + } + } + + fwd_txfm_ref(input_block, output_ref_block, stride, tx_type_); + ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, stride)); + + // The minimum quant value is 4. + for (j = 0; j < height_; ++j) { + for (k = 0; k < pitch_; ++k) { + int out_idx = j * pitch_ + k; + ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx]) + << "Error: not bit-exact result at index: " << out_idx + << " at test block: " << i; + } + } + } + aom_free(input_block); + aom_free(output_ref_block); + aom_free(output_block); + } + + void RunInvCoeffCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + + // Use a stride value which is not the width of any transform, to catch + // cases where the transforms use the stride incorrectly. + int stride = 96; + + int16_t *input_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + tran_low_t *trans_block = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(tran_low_t) * num_coeffs_)); + uint8_t *output_block = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * stride * height_)); + uint8_t *output_ref_block = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * stride * height_)); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + int j, k; + for (j = 0; j < height_; ++j) { + for (k = 0; k < pitch_; ++k) { + int in_idx = j * pitch_ + k; + int out_idx = j * stride + k; + input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + output_ref_block[out_idx] = rnd.Rand16() & mask_; + output_block[out_idx] = output_ref_block[out_idx]; + } + } + + fwd_txfm_ref(input_block, trans_block, pitch_, tx_type_); + + inv_txfm_ref(trans_block, output_ref_block, stride, tx_type_); + ASM_REGISTER_STATE_CHECK(RunInvTxfm(trans_block, output_block, stride)); + + for (j = 0; j < height_; ++j) { + for (k = 0; k < pitch_; ++k) { + int out_idx = j * stride + k; + ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx]) + << "Error: not bit-exact result at index: " << out_idx + << " j = " << j << " k = " << k << " at test block: " << i; + } + } + } + aom_free(input_block); + aom_free(trans_block); + aom_free(output_ref_block); + aom_free(output_block); + } + + void RunMemCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + + int16_t *input_extreme_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(tran_low_t) * num_coeffs_)); + tran_low_t *output_block = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(tran_low_t) * num_coeffs_)); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < num_coeffs_; ++j) { + input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; + } + if (i == 0) { + for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = mask_; + } else if (i == 1) { + for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = -mask_; + } + + fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); + ASM_REGISTER_STATE_CHECK( + RunFwdTxfm(input_extreme_block, output_block, pitch_)); + + int row_length = FindRowLength(); + // The minimum quant value is 4. + for (int j = 0; j < num_coeffs_; ++j) { + EXPECT_EQ(output_block[j], output_ref_block[j]) + << "Not bit-exact at test index: " << i << ", " + << "j = " << j << std::endl; + EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8), + abs(output_block[j])) + << "Error: NxN FDCT has coefficient larger than N*DCT_MAX_VALUE"; + } + } + aom_free(input_extreme_block); + aom_free(output_ref_block); + aom_free(output_block); + } + + void RunInvAccuracyCheck(int limit) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + + int16_t *in = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + tran_low_t *coeff = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(tran_low_t) * num_coeffs_)); + uint8_t *dst = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * num_coeffs_)); + uint8_t *src = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * num_coeffs_)); + +#if CONFIG_HIGHBITDEPTH + uint16_t *dst16 = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * num_coeffs_)); + uint16_t *src16 = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * num_coeffs_)); +#endif + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < num_coeffs_; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + in[j] = src[j] - dst[j]; +#if CONFIG_HIGHBITDEPTH + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + in[j] = src16[j] - dst16[j]; +#endif + } + } + + fwd_txfm_ref(in, coeff, pitch_, tx_type_); + + if (bit_depth_ == AOM_BITS_8) { + ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); +#if CONFIG_HIGHBITDEPTH + } else { + ASM_REGISTER_STATE_CHECK( + RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); +#endif + } + + for (int j = 0; j < num_coeffs_; ++j) { +#if CONFIG_HIGHBITDEPTH + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; +#else + const int diff = dst[j] - src[j]; +#endif + const uint32_t error = diff * diff; + EXPECT_GE(static_cast<uint32_t>(limit), error) + << "Error: 4x4 IDCT has error " << error << " at index " << j; + } + } + aom_free(in); + aom_free(coeff); + aom_free(dst); + aom_free(src); +#if CONFIG_HIGHBITDEPTH + aom_free(src16); + aom_free(dst16); +#endif + } + + int pitch_; + int height_; + int tx_type_; + FhtFunc fwd_txfm_ref; + IhtFunc inv_txfm_ref; + aom_bit_depth_t bit_depth_; + int mask_; + int num_coeffs_; + + private: + // Assume transform size is 4x4, 8x8, 16x16,... + int FindRowLength() const { + int row = 4; + if (16 == num_coeffs_) { + row = 4; + } else if (64 == num_coeffs_) { + row = 8; + } else if (256 == num_coeffs_) { + row = 16; + } else if (1024 == num_coeffs_) { + row = 32; + } + return row; + } +}; + +} // namespace libaom_test + +#endif // TEST_TRANSFORM_TEST_BASE_H_ diff --git a/third_party/aom/test/twopass_encoder.sh b/third_party/aom/test/twopass_encoder.sh new file mode 100755 index 000000000..3abb7628b --- /dev/null +++ b/third_party/aom/test/twopass_encoder.sh @@ -0,0 +1,54 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom twopass_encoder example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to twopass_encoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +twopass_encoder_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi +} + +# Runs twopass_encoder using the codec specified by $1 with a frame limit of +# 100. +twopass_encoder() { + local encoder="${LIBAOM_BIN_PATH}/twopass_encoder${AOM_TEST_EXE_SUFFIX}" + local codec="$1" + local output_file="${AOM_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf" + local limit=7 + + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" "${limit}" \ + ${devnull} + + [ -e "${output_file}" ] || return 1 +} + +twopass_encoder_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + twopass_encoder av1 || return 1 + fi +} + +twopass_encoder_tests="twopass_encoder_av1" + +run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}" diff --git a/third_party/aom/test/user_priv_test.cc b/third_party/aom/test/user_priv_test.cc new file mode 100644 index 000000000..3052b27b1 --- /dev/null +++ b/third_party/aom/test/user_priv_test.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdio> +#include <cstdlib> +#include <string> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "./aom_config.h" +#include "test/acm_random.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/ivf_video_source.h" +#include "test/md5_helper.h" +#include "test/util.h" +#if CONFIG_WEBM_IO +#include "test/webm_video_source.h" +#endif +#include "aom_mem/aom_mem.h" +#include "aom/aom.h" + +namespace { + +using std::string; +using libaom_test::ACMRandom; + +#if CONFIG_WEBM_IO + +void CheckUserPrivateData(void *user_priv, int *target) { + // actual pointer value should be the same as expected. + EXPECT_EQ(reinterpret_cast<void *>(target), user_priv) + << "user_priv pointer value does not match."; +} + +// Decodes |filename|. Passes in user_priv data when calling DecodeFrame and +// compares the user_priv from return img with the original user_priv to see if +// they match. Both the pointer values and the values inside the addresses +// should match. +string DecodeFile(const string &filename) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + libaom_test::WebMVideoSource video(filename); + video.Init(); + + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + libaom_test::AV1Decoder decoder(cfg, 0); + + libaom_test::MD5 md5; + int frame_num = 0; + for (video.Begin(); !::testing::Test::HasFailure() && video.cxdata(); + video.Next()) { + void *user_priv = reinterpret_cast<void *>(&frame_num); + const aom_codec_err_t res = + decoder.DecodeFrame(video.cxdata(), video.frame_size(), + (frame_num == 0) ? NULL : user_priv); + if (res != AOM_CODEC_OK) { + EXPECT_EQ(AOM_CODEC_OK, res) << decoder.DecodeError(); + break; + } + libaom_test::DxDataIterator dec_iter = decoder.GetDxData(); + const aom_image_t *img = NULL; + + // Get decompressed data. + while ((img = dec_iter.Next())) { + if (frame_num == 0) { + CheckUserPrivateData(img->user_priv, NULL); + } else { + CheckUserPrivateData(img->user_priv, &frame_num); + + // Also test ctrl_get_reference api. + struct av1_ref_frame ref; + // Randomly fetch a reference frame. + ref.idx = rnd.Rand8() % 3; + decoder.Control(AV1_GET_REFERENCE, &ref); + + CheckUserPrivateData(ref.img.user_priv, NULL); + } + md5.Add(img); + } + + frame_num++; + } + return string(md5.Get()); +} + +TEST(UserPrivTest, VideoDecode) { + // no tiles or frame parallel; this exercises the decoding to test the + // user_priv. + EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", + DecodeFile("av10-2-03-size-226x226.webm").c_str()); +} + +#endif // CONFIG_WEBM_IO + +} // namespace diff --git a/third_party/aom/test/util.h b/third_party/aom/test/util.h new file mode 100644 index 000000000..a20fab65c --- /dev/null +++ b/third_party/aom/test/util.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TEST_UTIL_H_ +#define TEST_UTIL_H_ + +#include <stdio.h> +#include <math.h> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "aom/aom_image.h" + +// Macros +#define GET_PARAM(k) std::tr1::get<k>(GetParam()) + +inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) { + assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) && + (img1->d_h == img2->d_h)); + + const unsigned int width_y = img1->d_w; + const unsigned int height_y = img1->d_h; + unsigned int i, j; + + int64_t sqrerr = 0; + for (i = 0; i < height_y; ++i) + for (j = 0; j < width_y; ++j) { + int64_t d = img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] - + img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j]; + sqrerr += d * d; + } + double mse = static_cast<double>(sqrerr) / (width_y * height_y); + double psnr = 100.0; + if (mse > 0.0) { + psnr = 10 * log10(255.0 * 255.0 / mse); + } + return psnr; +} + +#endif // TEST_UTIL_H_ diff --git a/third_party/aom/test/variance_test.cc b/third_party/aom/test/variance_test.cc new file mode 100644 index 000000000..5b1003ca7 --- /dev/null +++ b/third_party/aom/test/variance_test.cc @@ -0,0 +1,1385 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <cstdlib> +#include <new> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/mem.h" + +namespace { + +typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse); +typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride, + int xoffset, int yoffset, + const uint8_t *b, int b_stride, + unsigned int *sse); +typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride, + int xoffset, int yoffset, + const uint8_t *b, int b_stride, + uint32_t *sse, + const uint8_t *second_pred); +typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride); +typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src); + +using libaom_test::ACMRandom; + +// Truncate high bit depth results by downshifting (with rounding) by: +// 2 * (bit_depth - 8) for sse +// (bit_depth - 8) for se +static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) { + switch (bit_depth) { + case AOM_BITS_12: + *sse = (*sse + 128) >> 8; + *se = (*se + 8) >> 4; + break; + case AOM_BITS_10: + *sse = (*sse + 8) >> 4; + *se = (*se + 2) >> 2; + break; + case AOM_BITS_8: + default: break; + } +} + +static unsigned int mb_ss_ref(const int16_t *src) { + unsigned int res = 0; + for (int i = 0; i < 256; ++i) { + res += src[i] * src[i]; + } + return res; +} + +/* Note: + * Our codebase calculates the "diff" value in the variance algorithm by + * (src - ref). + */ +static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, + int l2h, int src_stride, int ref_stride, + uint32_t *sse_ptr, bool use_high_bit_depth_, + aom_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int diff; + if (!use_high_bit_depth_) { + diff = src[y * src_stride + x] - ref[y * ref_stride + x]; + se += diff; + sse += diff * diff; +#if CONFIG_HIGHBITDEPTH + } else { + diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] - + CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x]; + se += diff; + sse += diff * diff; +#endif // CONFIG_HIGHBITDEPTH + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast<uint32_t>(sse); + return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); +} + +/* The subpel reference functions differ from the codec version in one aspect: + * they calculate the bilinear factors directly instead of using a lookup table + * and therefore upshift xoff and yoff by 1. Only every other calculated value + * is used so the codec version shrinks the table to save space and maintain + * compatibility with vp8. + */ +static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src, + int l2w, int l2h, int xoff, int yoff, + uint32_t *sse_ptr, bool use_high_bit_depth_, + aom_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + + xoff <<= 1; + yoff <<= 1; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + // Bilinear interpolation at a 16th pel step. + if (!use_high_bit_depth_) { + const int a1 = ref[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = r - src[w * y + x]; + se += diff; + sse += diff * diff; +#if CONFIG_HIGHBITDEPTH + } else { + uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); + uint16_t *src16 = CONVERT_TO_SHORTPTR(src); + const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = r - src16[w * y + x]; + se += diff; + sse += diff * diff; +#endif // CONFIG_HIGHBITDEPTH + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast<uint32_t>(sse); + return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); +} + +static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src, + const uint8_t *second_pred, int l2w, + int l2h, int xoff, int yoff, + uint32_t *sse_ptr, + bool use_high_bit_depth, + aom_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + + xoff <<= 1; + yoff <<= 1; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + // bilinear interpolation at a 16th pel step + if (!use_high_bit_depth) { + const int a1 = ref[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = + ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x]; + se += diff; + sse += diff * diff; +#if CONFIG_HIGHBITDEPTH + } else { + const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); + const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); + const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred); + const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x]; + se += diff; + sse += diff * diff; +#endif // CONFIG_HIGHBITDEPTH + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast<uint32_t>(sse); + return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); +} + +//////////////////////////////////////////////////////////////////////////////// + +class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> { + public: + SumOfSquaresTest() : func_(GetParam()) {} + + virtual ~SumOfSquaresTest() { libaom_test::ClearSystemState(); } + + protected: + void ConstTest(); + void RefTest(); + + SumOfSquaresFunction func_; + ACMRandom rnd_; +}; + +void SumOfSquaresTest::ConstTest() { + int16_t mem[256]; + unsigned int res; + for (int v = 0; v < 256; ++v) { + for (int i = 0; i < 256; ++i) { + mem[i] = v; + } + ASM_REGISTER_STATE_CHECK(res = func_(mem)); + EXPECT_EQ(256u * (v * v), res); + } +} + +void SumOfSquaresTest::RefTest() { + int16_t mem[256]; + for (int i = 0; i < 100; ++i) { + for (int j = 0; j < 256; ++j) { + mem[j] = rnd_.Rand8() - rnd_.Rand8(); + } + + const unsigned int expected = mb_ss_ref(mem); + unsigned int res; + ASM_REGISTER_STATE_CHECK(res = func_(mem)); + EXPECT_EQ(expected, res); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Encapsulating struct to store the function to test along with +// some testing context. +// Can be used for MSE, SSE, Variance, etc. + +template <typename Func> +struct TestParams { + TestParams(int log2w = 0, int log2h = 0, Func function = NULL, + int bit_depth_value = 0) + : log2width(log2w), log2height(log2h), func(function) { + use_high_bit_depth = (bit_depth_value > 0); + if (use_high_bit_depth) { + bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value); + } else { + bit_depth = AOM_BITS_8; + } + width = 1 << log2width; + height = 1 << log2height; + block_size = width * height; + mask = (1u << bit_depth) - 1; + } + + int log2width, log2height; + int width, height; + int block_size; + Func func; + aom_bit_depth_t bit_depth; + bool use_high_bit_depth; + uint32_t mask; +}; + +template <typename Func> +std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) { + return os << "log2width/height:" << p.log2width << "/" << p.log2height + << " function:" << reinterpret_cast<const void *>(p.func) + << " bit-depth:" << p.bit_depth; +} + +// Main class for testing a function type +template <typename FunctionType> +class MainTestClass + : public ::testing::TestWithParam<TestParams<FunctionType> > { + public: + virtual void SetUp() { + params_ = this->GetParam(); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + const size_t unit = + use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t); + src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit)); + ref_ = new uint8_t[block_size() * unit]; + ASSERT_TRUE(src_ != NULL); + ASSERT_TRUE(ref_ != NULL); +#if CONFIG_HIGHBITDEPTH + if (use_high_bit_depth()) { + // TODO(skal): remove! + src_ = CONVERT_TO_BYTEPTR(src_); + ref_ = CONVERT_TO_BYTEPTR(ref_); + } +#endif + } + + virtual void TearDown() { +#if CONFIG_HIGHBITDEPTH + if (use_high_bit_depth()) { + // TODO(skal): remove! + src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_)); + ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_)); + } +#endif + + aom_free(src_); + delete[] ref_; + src_ = NULL; + ref_ = NULL; + libaom_test::ClearSystemState(); + } + + protected: + // We could sub-class MainTestClass into dedicated class for Variance + // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing + // to access top class fields xxx. That's cumbersome, so for now we'll just + // implement the testing methods here: + + // Variance tests + void ZeroTest(); + void RefTest(); + void RefStrideTest(); + void OneQuarterTest(); + + // MSE/SSE tests + void RefTestMse(); + void RefTestSse(); + void MaxTestMse(); + void MaxTestSse(); + + protected: + ACMRandom rnd_; + uint8_t *src_; + uint8_t *ref_; + TestParams<FunctionType> params_; + + // some relay helpers + bool use_high_bit_depth() const { return params_.use_high_bit_depth; } + int byte_shift() const { return params_.bit_depth - 8; } + int block_size() const { return params_.block_size; } + int width() const { return params_.width; } + uint32_t mask() const { return params_.mask; } +}; + +//////////////////////////////////////////////////////////////////////////////// +// Tests related to variance. + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::ZeroTest() { + for (int i = 0; i <= 255; ++i) { + if (!use_high_bit_depth()) { + memset(src_, i, block_size()); + } else { + uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_); + for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift(); + } + for (int j = 0; j <= 255; ++j) { + if (!use_high_bit_depth()) { + memset(ref_, j, block_size()); + } else { + uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_); + for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift(); + } + unsigned int sse, var; + ASM_REGISTER_STATE_CHECK( + var = params_.func(src_, width(), ref_, width(), &sse)); + EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j; + } + } +} + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::RefTest() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); j++) { + if (!use_high_bit_depth()) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); +#if CONFIG_HIGHBITDEPTH + } else { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); +#endif // CONFIG_HIGHBITDEPTH + } + } + unsigned int sse1, sse2, var1, var2; + const int stride = width(); + ASM_REGISTER_STATE_CHECK( + var1 = params_.func(src_, stride, ref_, stride, &sse1)); + var2 = + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; + EXPECT_EQ(var1, var2) << "Error at test index: " << i; + } +} + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::RefStrideTest() { + for (int i = 0; i < 10; ++i) { + const int ref_stride = (i & 1) * width(); + const int src_stride = ((i >> 1) & 1) * width(); + for (int j = 0; j < block_size(); j++) { + const int ref_ind = (j / width()) * ref_stride + j % width(); + const int src_ind = (j / width()) * src_stride + j % width(); + if (!use_high_bit_depth()) { + src_[src_ind] = rnd_.Rand8(); + ref_[ref_ind] = rnd_.Rand8(); +#if CONFIG_HIGHBITDEPTH + } else { + CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask(); + CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask(); +#endif // CONFIG_HIGHBITDEPTH + } + } + unsigned int sse1, sse2; + unsigned int var1, var2; + + ASM_REGISTER_STATE_CHECK( + var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1)); + var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height, + src_stride, ref_stride, &sse2, use_high_bit_depth(), + params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; + EXPECT_EQ(var1, var2) << "Error at test index: " << i; + } +} + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::OneQuarterTest() { + const int half = block_size() / 2; + if (!use_high_bit_depth()) { + memset(src_, 255, block_size()); + memset(ref_, 255, half); + memset(ref_ + half, 0, half); +#if CONFIG_HIGHBITDEPTH + } else { + aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size()); + aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half); + aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half); +#endif // CONFIG_HIGHBITDEPTH + } + unsigned int sse, var, expected; + ASM_REGISTER_STATE_CHECK( + var = params_.func(src_, width(), ref_, width(), &sse)); + expected = block_size() * 255 * 255 / 4; + EXPECT_EQ(expected, var); +} + +//////////////////////////////////////////////////////////////////////////////// +// Tests related to MSE / SSE. + +template <typename FunctionType> +void MainTestClass<FunctionType>::RefTestMse() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); ++j) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } + unsigned int sse1, sse2; + const int stride = width(); + ASM_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1)); + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, false, AOM_BITS_8); + EXPECT_EQ(sse1, sse2); + } +} + +template <typename FunctionType> +void MainTestClass<FunctionType>::RefTestSse() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); ++j) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } + unsigned int sse2; + unsigned int var1; + const int stride = width(); + ASM_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride)); + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, false, AOM_BITS_8); + EXPECT_EQ(var1, sse2); + } +} + +template <typename FunctionType> +void MainTestClass<FunctionType>::MaxTestMse() { + memset(src_, 255, block_size()); + memset(ref_, 0, block_size()); + unsigned int sse; + ASM_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse)); + const unsigned int expected = block_size() * 255 * 255; + EXPECT_EQ(expected, sse); +} + +template <typename FunctionType> +void MainTestClass<FunctionType>::MaxTestSse() { + memset(src_, 255, block_size()); + memset(ref_, 0, block_size()); + unsigned int var; + ASM_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width())); + const unsigned int expected = block_size() * 255 * 255; + EXPECT_EQ(expected, var); +} + +//////////////////////////////////////////////////////////////////////////////// + +using ::std::tr1::get; +using ::std::tr1::make_tuple; +using ::std::tr1::tuple; + +template <typename SubpelVarianceFunctionType> +class SubpelVarianceTest + : public ::testing::TestWithParam< + tuple<int, int, SubpelVarianceFunctionType, int> > { + public: + virtual void SetUp() { + const tuple<int, int, SubpelVarianceFunctionType, int> ¶ms = + this->GetParam(); + log2width_ = get<0>(params); + width_ = 1 << log2width_; + log2height_ = get<1>(params); + height_ = 1 << log2height_; + subpel_variance_ = get<2>(params); + if (get<3>(params)) { + bit_depth_ = (aom_bit_depth_t)get<3>(params); + use_high_bit_depth_ = true; + } else { + bit_depth_ = AOM_BITS_8; + use_high_bit_depth_ = false; + } + mask_ = (1 << bit_depth_) - 1; + + rnd_.Reset(ACMRandom::DeterministicSeed()); + block_size_ = width_ * height_; + if (!use_high_bit_depth_) { + src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size_)); + sec_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size_)); + ref_ = new uint8_t[block_size_ + width_ + height_ + 1]; +#if CONFIG_HIGHBITDEPTH + } else { + src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + aom_memalign(16, block_size_ * sizeof(uint16_t)))); + sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + aom_memalign(16, block_size_ * sizeof(uint16_t)))); + ref_ = + CONVERT_TO_BYTEPTR(new uint16_t[block_size_ + width_ + height_ + 1]); +#endif // CONFIG_HIGHBITDEPTH + } + ASSERT_TRUE(src_ != NULL); + ASSERT_TRUE(sec_ != NULL); + ASSERT_TRUE(ref_ != NULL); + } + + virtual void TearDown() { + if (!use_high_bit_depth_) { + aom_free(src_); + delete[] ref_; + aom_free(sec_); +#if CONFIG_HIGHBITDEPTH + } else { + aom_free(CONVERT_TO_SHORTPTR(src_)); + delete[] CONVERT_TO_SHORTPTR(ref_); + aom_free(CONVERT_TO_SHORTPTR(sec_)); +#endif // CONFIG_HIGHBITDEPTH + } + libaom_test::ClearSystemState(); + } + + protected: + void RefTest(); + void ExtremeRefTest(); + + ACMRandom rnd_; + uint8_t *src_; + uint8_t *ref_; + uint8_t *sec_; + bool use_high_bit_depth_; + aom_bit_depth_t bit_depth_; + int width_, log2width_; + int height_, log2height_; + int block_size_, mask_; + SubpelVarianceFunctionType subpel_variance_; +}; + +template <typename SubpelVarianceFunctionType> +void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() { + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + if (!use_high_bit_depth_) { + for (int j = 0; j < block_size_; j++) { + src_[j] = rnd_.Rand8(); + } + for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) { + ref_[j] = rnd_.Rand8(); + } +#if CONFIG_HIGHBITDEPTH + } else { + for (int j = 0; j < block_size_; j++) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_; + } + for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) { + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_; + } +#endif // CONFIG_HIGHBITDEPTH + } + unsigned int sse1, sse2; + unsigned int var1; + ASM_REGISTER_STATE_CHECK( + var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1)); + const unsigned int var2 = + subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2, + use_high_bit_depth_, bit_depth_); + EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; + EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; + } + } +} + +template <typename SubpelVarianceFunctionType> +void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() { + // Compare against reference. + // Src: Set the first half of values to 0, the second half to the maximum. + // Ref: Set the first half of values to the maximum, the second half to 0. + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + const int half = block_size_ / 2; + if (!use_high_bit_depth_) { + memset(src_, 0, half); + memset(src_ + half, 255, half); + memset(ref_, 255, half); + memset(ref_ + half, 0, half + width_ + height_ + 1); +#if CONFIG_HIGHBITDEPTH + } else { + aom_memset16(CONVERT_TO_SHORTPTR(src_), mask_, half); + aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half); + aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half); + aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask_, + half + width_ + height_ + 1); +#endif // CONFIG_HIGHBITDEPTH + } + unsigned int sse1, sse2; + unsigned int var1; + ASM_REGISTER_STATE_CHECK( + var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1)); + const unsigned int var2 = + subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2, + use_high_bit_depth_, bit_depth_); + EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; + EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; + } + } +} + +template <> +void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() { + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + if (!use_high_bit_depth_) { + for (int j = 0; j < block_size_; j++) { + src_[j] = rnd_.Rand8(); + sec_[j] = rnd_.Rand8(); + } + for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) { + ref_[j] = rnd_.Rand8(); + } +#if CONFIG_HIGHBITDEPTH + } else { + for (int j = 0; j < block_size_; j++) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_; + CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask_; + } + for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) { + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_; + } +#endif // CONFIG_HIGHBITDEPTH + } + uint32_t sse1, sse2; + uint32_t var1, var2; + ASM_REGISTER_STATE_CHECK(var1 = + subpel_variance_(ref_, width_ + 1, x, y, + src_, width_, &sse1, sec_)); + var2 = subpel_avg_variance_ref(ref_, src_, sec_, log2width_, log2height_, + x, y, &sse2, use_high_bit_depth_, + static_cast<aom_bit_depth_t>(bit_depth_)); + EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; + EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; + } + } +} + +typedef MainTestClass<Get4x4SseFunc> AvxSseTest; +typedef MainTestClass<VarianceMxNFunc> AvxMseTest; +typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest; +typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest; +typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest; + +TEST_P(AvxSseTest, RefSse) { RefTestSse(); } +TEST_P(AvxSseTest, MaxSse) { MaxTestSse(); } +TEST_P(AvxMseTest, RefMse) { RefTestMse(); } +TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); } +TEST_P(AvxVarianceTest, Zero) { ZeroTest(); } +TEST_P(AvxVarianceTest, Ref) { RefTest(); } +TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); } +TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); } +TEST_P(SumOfSquaresTest, Const) { ConstTest(); } +TEST_P(SumOfSquaresTest, Ref) { RefTest(); } +TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); } +TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } +TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); } + +INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest, + ::testing::Values(aom_get_mb_ss_c)); + +typedef TestParams<Get4x4SseFunc> SseParams; +INSTANTIATE_TEST_CASE_P(C, AvxSseTest, + ::testing::Values(SseParams(2, 2, + &aom_get4x4sse_cs_c))); + +typedef TestParams<VarianceMxNFunc> MseParams; +INSTANTIATE_TEST_CASE_P(C, AvxMseTest, + ::testing::Values(MseParams(4, 4, &aom_mse16x16_c), + MseParams(4, 3, &aom_mse16x8_c), + MseParams(3, 4, &aom_mse8x16_c), + MseParams(3, 3, &aom_mse8x8_c))); + +typedef TestParams<VarianceMxNFunc> VarianceParams; +INSTANTIATE_TEST_CASE_P( + C, AvxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_c), + VarianceParams(6, 5, &aom_variance64x32_c), + VarianceParams(5, 6, &aom_variance32x64_c), + VarianceParams(5, 5, &aom_variance32x32_c), + VarianceParams(5, 4, &aom_variance32x16_c), + VarianceParams(4, 5, &aom_variance16x32_c), + VarianceParams(4, 4, &aom_variance16x16_c), + VarianceParams(4, 3, &aom_variance16x8_c), + VarianceParams(3, 4, &aom_variance8x16_c), + VarianceParams(3, 3, &aom_variance8x8_c), + VarianceParams(3, 2, &aom_variance8x4_c), + VarianceParams(2, 3, &aom_variance4x8_c), + VarianceParams(2, 2, &aom_variance4x4_c))); + +INSTANTIATE_TEST_CASE_P( + C, AvxSubpelVarianceTest, + ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_c, 0), + make_tuple(6, 5, &aom_sub_pixel_variance64x32_c, 0), + make_tuple(5, 6, &aom_sub_pixel_variance32x64_c, 0), + make_tuple(5, 5, &aom_sub_pixel_variance32x32_c, 0), + make_tuple(5, 4, &aom_sub_pixel_variance32x16_c, 0), + make_tuple(4, 5, &aom_sub_pixel_variance16x32_c, 0), + make_tuple(4, 4, &aom_sub_pixel_variance16x16_c, 0), + make_tuple(4, 3, &aom_sub_pixel_variance16x8_c, 0), + make_tuple(3, 4, &aom_sub_pixel_variance8x16_c, 0), + make_tuple(3, 3, &aom_sub_pixel_variance8x8_c, 0), + make_tuple(3, 2, &aom_sub_pixel_variance8x4_c, 0), + make_tuple(2, 3, &aom_sub_pixel_variance4x8_c, 0), + make_tuple(2, 2, &aom_sub_pixel_variance4x4_c, 0))); + +INSTANTIATE_TEST_CASE_P( + C, AvxSubpelAvgVarianceTest, + ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0), + make_tuple(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0), + make_tuple(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0), + make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0), + make_tuple(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0), + make_tuple(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0), + make_tuple(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0), + make_tuple(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0), + make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0), + make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0), + make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0), + make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0), + make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0))); + +#if CONFIG_HIGHBITDEPTH +typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest; +typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest; +typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest; +typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest; + +TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); } +TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); } +TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); } +TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); } +TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); } +TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); } +TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); } +TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } +TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); } + +/* TODO(debargha): This test does not support the highbd version +INSTANTIATE_TEST_CASE_P( + C, AvxHBDMseTest, + ::testing::Values(make_tuple(4, 4, &aom_highbd_12_mse16x16_c), + make_tuple(4, 4, &aom_highbd_12_mse16x8_c), + make_tuple(4, 4, &aom_highbd_12_mse8x16_c), + make_tuple(4, 4, &aom_highbd_12_mse8x8_c), + make_tuple(4, 4, &aom_highbd_10_mse16x16_c), + make_tuple(4, 4, &aom_highbd_10_mse16x8_c), + make_tuple(4, 4, &aom_highbd_10_mse8x16_c), + make_tuple(4, 4, &aom_highbd_10_mse8x8_c), + make_tuple(4, 4, &aom_highbd_8_mse16x16_c), + make_tuple(4, 4, &aom_highbd_8_mse16x8_c), + make_tuple(4, 4, &aom_highbd_8_mse8x16_c), + make_tuple(4, 4, &aom_highbd_8_mse8x8_c))); +*/ + +const VarianceParams kArrayHBDVariance_c[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12), + VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12), + VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12), + VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12), + VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12), + VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12), + VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12), + VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12), + VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12), + VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12), + VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12), + VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12), + VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12), + VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12), + VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10), + VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10), + VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10), + VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10), + VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10), + VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10), + VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10), + VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10), + VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10), + VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10), + VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10), + VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10), + VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10), + VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10), + VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8), + VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8), + VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8), + VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8), + VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8), + VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8), + VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8), + VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8), + VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8), + VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8), + VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8), + VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8), + VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8), + VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8), + VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8) +}; +INSTANTIATE_TEST_CASE_P(C, AvxHBDVarianceTest, + ::testing::ValuesIn(kArrayHBDVariance_c)); + +#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE4_1, AvxHBDVarianceTest, + ::testing::Values( + VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8), + VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10), + VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12))); +#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH + +const AvxHBDSubpelVarianceTest::ParamType kArrayHBDSubpelVariance_c[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8), + make_tuple(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8), + make_tuple(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8), + make_tuple(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8), + make_tuple(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8), + make_tuple(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8), + make_tuple(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8), + make_tuple(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8), + make_tuple(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8), + make_tuple(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8), + make_tuple(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8), + make_tuple(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8), + make_tuple(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8), + make_tuple(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8), + make_tuple(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10), + make_tuple(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10), + make_tuple(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10), + make_tuple(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10), + make_tuple(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10), + make_tuple(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10), + make_tuple(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10), + make_tuple(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10), + make_tuple(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10), + make_tuple(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10), + make_tuple(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10), + make_tuple(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10), + make_tuple(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10), + make_tuple(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10), + make_tuple(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12), + make_tuple(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12), + make_tuple(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12), + make_tuple(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12), + make_tuple(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12), + make_tuple(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12), + make_tuple(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12), + make_tuple(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12), + make_tuple(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12), + make_tuple(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12), + make_tuple(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12), + make_tuple(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12), + make_tuple(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12), + make_tuple(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12), + make_tuple(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12), +}; +INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelVariance_c)); + +const AvxHBDSubpelAvgVarianceTest::ParamType kArrayHBDSubpelAvgVariance_c[] = { +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c, 8), + make_tuple(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c, 8), + make_tuple(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c, 8), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8), + make_tuple(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8), + make_tuple(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8), + make_tuple(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8), + make_tuple(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8), + make_tuple(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8), + make_tuple(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8), + make_tuple(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8), + make_tuple(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8), + make_tuple(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8), + make_tuple(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8), + make_tuple(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8), + make_tuple(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c, 10), + make_tuple(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c, 10), + make_tuple(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c, 10), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c, 10), + make_tuple(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c, 10), + make_tuple(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c, 10), + make_tuple(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c, 10), + make_tuple(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c, 10), + make_tuple(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c, 10), + make_tuple(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c, 10), + make_tuple(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c, 10), + make_tuple(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c, 10), + make_tuple(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10), + make_tuple(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10), + make_tuple(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10), + make_tuple(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10), +#if CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c, 12), + make_tuple(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c, 12), + make_tuple(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c, 12), +#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION + make_tuple(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c, 12), + make_tuple(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c, 12), + make_tuple(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c, 12), + make_tuple(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c, 12), + make_tuple(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c, 12), + make_tuple(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c, 12), + make_tuple(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c, 12), + make_tuple(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c, 12), + make_tuple(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c, 12), + make_tuple(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12), + make_tuple(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12), + make_tuple(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12), + make_tuple(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12) +}; +INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c)); +#endif // CONFIG_HIGHBITDEPTH + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest, + ::testing::Values(aom_get_mb_ss_sse2)); + +INSTANTIATE_TEST_CASE_P(SSE2, AvxMseTest, + ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2), + MseParams(4, 3, &aom_mse16x8_sse2), + MseParams(3, 4, &aom_mse8x16_sse2), + MseParams(3, 3, &aom_mse8x8_sse2))); + +INSTANTIATE_TEST_CASE_P( + SSE2, AvxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_sse2), + VarianceParams(6, 5, &aom_variance64x32_sse2), + VarianceParams(5, 6, &aom_variance32x64_sse2), + VarianceParams(5, 5, &aom_variance32x32_sse2), + VarianceParams(5, 4, &aom_variance32x16_sse2), + VarianceParams(4, 5, &aom_variance16x32_sse2), + VarianceParams(4, 4, &aom_variance16x16_sse2), + VarianceParams(4, 3, &aom_variance16x8_sse2), + VarianceParams(3, 4, &aom_variance8x16_sse2), + VarianceParams(3, 3, &aom_variance8x8_sse2), + VarianceParams(3, 2, &aom_variance8x4_sse2), + VarianceParams(2, 3, &aom_variance4x8_sse2), + VarianceParams(2, 2, &aom_variance4x4_sse2))); + +INSTANTIATE_TEST_CASE_P( + SSE2, AvxSubpelVarianceTest, + ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_sse2, 0), + make_tuple(6, 5, &aom_sub_pixel_variance64x32_sse2, 0), + make_tuple(5, 6, &aom_sub_pixel_variance32x64_sse2, 0), + make_tuple(5, 5, &aom_sub_pixel_variance32x32_sse2, 0), + make_tuple(5, 4, &aom_sub_pixel_variance32x16_sse2, 0), + make_tuple(4, 5, &aom_sub_pixel_variance16x32_sse2, 0), + make_tuple(4, 4, &aom_sub_pixel_variance16x16_sse2, 0), + make_tuple(4, 3, &aom_sub_pixel_variance16x8_sse2, 0), + make_tuple(3, 4, &aom_sub_pixel_variance8x16_sse2, 0), + make_tuple(3, 3, &aom_sub_pixel_variance8x8_sse2, 0), + make_tuple(3, 2, &aom_sub_pixel_variance8x4_sse2, 0), + make_tuple(2, 3, &aom_sub_pixel_variance4x8_sse2, 0), + make_tuple(2, 2, &aom_sub_pixel_variance4x4_sse2, 0))); + +INSTANTIATE_TEST_CASE_P( + SSE2, AvxSubpelAvgVarianceTest, + ::testing::Values( + make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_sse2, 0), + make_tuple(6, 5, &aom_sub_pixel_avg_variance64x32_sse2, 0), + make_tuple(5, 6, &aom_sub_pixel_avg_variance32x64_sse2, 0), + make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_sse2, 0), + make_tuple(5, 4, &aom_sub_pixel_avg_variance32x16_sse2, 0), + make_tuple(4, 5, &aom_sub_pixel_avg_variance16x32_sse2, 0), + make_tuple(4, 4, &aom_sub_pixel_avg_variance16x16_sse2, 0), + make_tuple(4, 3, &aom_sub_pixel_avg_variance16x8_sse2, 0), + make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_sse2, 0), + make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_sse2, 0), + make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_sse2, 0), + make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0), + make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0))); + +#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE4_1, AvxSubpelVarianceTest, + ::testing::Values( + make_tuple(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1, 8), + make_tuple(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1, 10), + make_tuple(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1, 12))); + +INSTANTIATE_TEST_CASE_P( + SSE4_1, AvxSubpelAvgVarianceTest, + ::testing::Values( + make_tuple(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1, 8), + make_tuple(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1, 10), + make_tuple(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1, 12))); +#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH + +#if CONFIG_HIGHBITDEPTH +/* TODO(debargha): This test does not support the highbd version +INSTANTIATE_TEST_CASE_P( + SSE2, AvxHBDMseTest, + ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2), + MseParams(4, 3, &aom_highbd_12_mse16x8_sse2), + MseParams(3, 4, &aom_highbd_12_mse8x16_sse2), + MseParams(3, 3, &aom_highbd_12_mse8x8_sse2), + MseParams(4, 4, &aom_highbd_10_mse16x16_sse2), + MseParams(4, 3, &aom_highbd_10_mse16x8_sse2), + MseParams(3, 4, &aom_highbd_10_mse8x16_sse2), + MseParams(3, 3, &aom_highbd_10_mse8x8_sse2), + MseParams(4, 4, &aom_highbd_8_mse16x16_sse2), + MseParams(4, 3, &aom_highbd_8_mse16x8_sse2), + MseParams(3, 4, &aom_highbd_8_mse8x16_sse2), + MseParams(3, 3, &aom_highbd_8_mse8x8_sse2))); +*/ + +INSTANTIATE_TEST_CASE_P( + SSE2, AvxHBDVarianceTest, + ::testing::Values( + VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12), + VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12), + VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12), + VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12), + VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12), + VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12), + VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12), + VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12), + VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12), + VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12), + VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10), + VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10), + VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10), + VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10), + VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10), + VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10), + VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10), + VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10), + VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10), + VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10), + VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8), + VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8), + VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8), + VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8), + VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8), + VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8), + VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8), + VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8), + VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8), + VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8))); + +INSTANTIATE_TEST_CASE_P( + SSE2, AvxHBDSubpelVarianceTest, + ::testing::Values( + make_tuple(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12), + make_tuple(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12), + make_tuple(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12), + make_tuple(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12), + make_tuple(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12), + make_tuple(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12), + make_tuple(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12), + make_tuple(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12), + make_tuple(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12), + make_tuple(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12), + make_tuple(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12), + make_tuple(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10), + make_tuple(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10), + make_tuple(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10), + make_tuple(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10), + make_tuple(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10), + make_tuple(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10), + make_tuple(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10), + make_tuple(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10), + make_tuple(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10), + make_tuple(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10), + make_tuple(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10), + make_tuple(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8), + make_tuple(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8), + make_tuple(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8), + make_tuple(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8), + make_tuple(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8), + make_tuple(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8), + make_tuple(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8), + make_tuple(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8), + make_tuple(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8), + make_tuple(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8), + make_tuple(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8))); + +INSTANTIATE_TEST_CASE_P( + SSE2, AvxHBDSubpelAvgVarianceTest, + ::testing::Values( + make_tuple(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2, 12), + make_tuple(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2, 12), + make_tuple(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2, 12), + make_tuple(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2, 12), + make_tuple(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2, 12), + make_tuple(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2, 12), + make_tuple(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2, 12), + make_tuple(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2, 12), + make_tuple(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2, 12), + make_tuple(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2, 12), + make_tuple(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2, 12), + make_tuple(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2, 10), + make_tuple(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2, 10), + make_tuple(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2, 10), + make_tuple(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2, 10), + make_tuple(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2, 10), + make_tuple(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2, 10), + make_tuple(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2, 10), + make_tuple(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2, 10), + make_tuple(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2, 10), + make_tuple(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2, 10), + make_tuple(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2, 10), + make_tuple(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2, 8), + make_tuple(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2, 8), + make_tuple(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2, 8), + make_tuple(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2, 8), + make_tuple(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2, 8), + make_tuple(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2, 8), + make_tuple(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2, 8), + make_tuple(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2, 8), + make_tuple(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2, 8), + make_tuple(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2, 8), + make_tuple(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2, 8))); +#endif // CONFIG_HIGHBITDEPTH +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P( + SSSE3, AvxSubpelVarianceTest, + ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0), + make_tuple(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0), + make_tuple(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0), + make_tuple(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0), + make_tuple(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0), + make_tuple(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0), + make_tuple(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0), + make_tuple(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0), + make_tuple(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0), + make_tuple(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0), + make_tuple(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0), + make_tuple(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0), + make_tuple(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0))); + +INSTANTIATE_TEST_CASE_P( + SSSE3, AvxSubpelAvgVarianceTest, + ::testing::Values( + make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0), + make_tuple(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0), + make_tuple(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0), + make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0), + make_tuple(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0), + make_tuple(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0), + make_tuple(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0), + make_tuple(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0), + make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0), + make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0), + make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0), + make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0), + make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0))); +#endif // HAVE_SSSE3 + +#if HAVE_AVX2 +INSTANTIATE_TEST_CASE_P(AVX2, AvxMseTest, + ::testing::Values(MseParams(4, 4, &aom_mse16x16_avx2))); + +INSTANTIATE_TEST_CASE_P( + AVX2, AvxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_avx2), + VarianceParams(6, 5, &aom_variance64x32_avx2), + VarianceParams(5, 5, &aom_variance32x32_avx2), + VarianceParams(5, 4, &aom_variance32x16_avx2), + VarianceParams(4, 4, &aom_variance16x16_avx2))); + +INSTANTIATE_TEST_CASE_P( + AVX2, AvxSubpelVarianceTest, + ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_avx2, 0), + make_tuple(5, 5, &aom_sub_pixel_variance32x32_avx2, 0))); + +INSTANTIATE_TEST_CASE_P( + AVX2, AvxSubpelAvgVarianceTest, + ::testing::Values( + make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0), + make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0))); +#endif // HAVE_AVX2 + +#if HAVE_MEDIA +INSTANTIATE_TEST_CASE_P(MEDIA, AvxMseTest, + ::testing::Values(MseParams(4, 4, + &aom_mse16x16_media))); + +INSTANTIATE_TEST_CASE_P( + MEDIA, AvxVarianceTest, + ::testing::Values(VarianceParams(4, 4, &aom_variance16x16_media), + VarianceParams(3, 3, &aom_variance8x8_media))); + +INSTANTIATE_TEST_CASE_P( + MEDIA, AvxSubpelVarianceTest, + ::testing::Values(make_tuple(4, 4, &aom_sub_pixel_variance16x16_media, 0), + make_tuple(3, 3, &aom_sub_pixel_variance8x8_media, 0))); +#endif // HAVE_MEDIA + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, AvxSseTest, + ::testing::Values(SseParams(2, 2, + &aom_get4x4sse_cs_neon))); + +INSTANTIATE_TEST_CASE_P(NEON, AvxMseTest, + ::testing::Values(MseParams(4, 4, &aom_mse16x16_neon))); + +INSTANTIATE_TEST_CASE_P( + NEON, AvxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_neon), + VarianceParams(6, 5, &aom_variance64x32_neon), + VarianceParams(5, 6, &aom_variance32x64_neon), + VarianceParams(5, 5, &aom_variance32x32_neon), + VarianceParams(4, 4, &aom_variance16x16_neon), + VarianceParams(4, 3, &aom_variance16x8_neon), + VarianceParams(3, 4, &aom_variance8x16_neon), + VarianceParams(3, 3, &aom_variance8x8_neon))); + +INSTANTIATE_TEST_CASE_P( + NEON, AvxSubpelVarianceTest, + ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_neon, 0), + make_tuple(5, 5, &aom_sub_pixel_variance32x32_neon, 0), + make_tuple(4, 4, &aom_sub_pixel_variance16x16_neon, 0), + make_tuple(3, 3, &aom_sub_pixel_variance8x8_neon, 0))); +#endif // HAVE_NEON + +#if HAVE_MSA +INSTANTIATE_TEST_CASE_P(MSA, SumOfSquaresTest, + ::testing::Values(aom_get_mb_ss_msa)); + +INSTANTIATE_TEST_CASE_P(MSA, AvxSseTest, + ::testing::Values(SseParams(2, 2, + &aom_get4x4sse_cs_msa))); + +INSTANTIATE_TEST_CASE_P(MSA, AvxMseTest, + ::testing::Values(MseParams(4, 4, &aom_mse16x16_msa), + MseParams(4, 3, &aom_mse16x8_msa), + MseParams(3, 4, &aom_mse8x16_msa), + MseParams(3, 3, &aom_mse8x8_msa))); + +INSTANTIATE_TEST_CASE_P( + MSA, AvxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_msa), + VarianceParams(6, 5, &aom_variance64x32_msa), + VarianceParams(5, 6, &aom_variance32x64_msa), + VarianceParams(5, 5, &aom_variance32x32_msa), + VarianceParams(5, 4, &aom_variance32x16_msa), + VarianceParams(4, 5, &aom_variance16x32_msa), + VarianceParams(4, 4, &aom_variance16x16_msa), + VarianceParams(4, 3, &aom_variance16x8_msa), + VarianceParams(3, 4, &aom_variance8x16_msa), + VarianceParams(3, 3, &aom_variance8x8_msa), + VarianceParams(3, 2, &aom_variance8x4_msa), + VarianceParams(2, 3, &aom_variance4x8_msa), + VarianceParams(2, 2, &aom_variance4x4_msa))); + +INSTANTIATE_TEST_CASE_P( + MSA, AvxSubpelVarianceTest, + ::testing::Values(make_tuple(2, 2, &aom_sub_pixel_variance4x4_msa, 0), + make_tuple(2, 3, &aom_sub_pixel_variance4x8_msa, 0), + make_tuple(3, 2, &aom_sub_pixel_variance8x4_msa, 0), + make_tuple(3, 3, &aom_sub_pixel_variance8x8_msa, 0), + make_tuple(3, 4, &aom_sub_pixel_variance8x16_msa, 0), + make_tuple(4, 3, &aom_sub_pixel_variance16x8_msa, 0), + make_tuple(4, 4, &aom_sub_pixel_variance16x16_msa, 0), + make_tuple(4, 5, &aom_sub_pixel_variance16x32_msa, 0), + make_tuple(5, 4, &aom_sub_pixel_variance32x16_msa, 0), + make_tuple(5, 5, &aom_sub_pixel_variance32x32_msa, 0), + make_tuple(5, 6, &aom_sub_pixel_variance32x64_msa, 0), + make_tuple(6, 5, &aom_sub_pixel_variance64x32_msa, 0), + make_tuple(6, 6, &aom_sub_pixel_variance64x64_msa, 0))); + +INSTANTIATE_TEST_CASE_P( + MSA, AvxSubpelAvgVarianceTest, + ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_msa, 0), + make_tuple(6, 5, &aom_sub_pixel_avg_variance64x32_msa, 0), + make_tuple(5, 6, &aom_sub_pixel_avg_variance32x64_msa, 0), + make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_msa, 0), + make_tuple(5, 4, &aom_sub_pixel_avg_variance32x16_msa, 0), + make_tuple(4, 5, &aom_sub_pixel_avg_variance16x32_msa, 0), + make_tuple(4, 4, &aom_sub_pixel_avg_variance16x16_msa, 0), + make_tuple(4, 3, &aom_sub_pixel_avg_variance16x8_msa, 0), + make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_msa, 0), + make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_msa, 0), + make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_msa, 0), + make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_msa, 0), + make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_msa, 0))); +#endif // HAVE_MSA +} // namespace diff --git a/third_party/aom/test/video_source.h b/third_party/aom/test/video_source.h new file mode 100644 index 000000000..e986ffb37 --- /dev/null +++ b/third_party/aom/test/video_source.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_VIDEO_SOURCE_H_ +#define TEST_VIDEO_SOURCE_H_ + +#if defined(_WIN32) +#undef NOMINMAX +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#endif +#include <cstdio> +#include <cstdlib> +#include <string> +#include "test/acm_random.h" +#include "aom/aom_encoder.h" + +namespace libaom_test { + +// Helper macros to ensure LIBAOM_TEST_DATA_PATH is a quoted string. +// These are undefined right below GetDataPath +// NOTE: LIBAOM_TEST_DATA_PATH MUST NOT be a quoted string before +// Stringification or the GetDataPath will fail at runtime +#define TO_STRING(S) #S +#define STRINGIFY(S) TO_STRING(S) + +// A simple function to encapsulate cross platform retrieval of test data path +static std::string GetDataPath() { + const char *const data_path = getenv("LIBAOM_TEST_DATA_PATH"); + if (data_path == NULL) { +#ifdef LIBAOM_TEST_DATA_PATH + // In some environments, we cannot set environment variables + // Instead, we set the data path by using a preprocessor symbol + // which can be set from make files + return STRINGIFY(LIBAOM_TEST_DATA_PATH); +#else + return "."; +#endif + } + return data_path; +} + +// Undefining stringification macros because they are not used elsewhere +#undef TO_STRING +#undef STRINGIFY + +inline FILE *OpenTestDataFile(const std::string &file_name) { + const std::string path_to_source = GetDataPath() + "/" + file_name; + return fopen(path_to_source.c_str(), "rb"); +} + +static FILE *GetTempOutFile(std::string *file_name) { + file_name->clear(); +#if defined(_WIN32) + char fname[MAX_PATH]; + char tmppath[MAX_PATH]; + if (GetTempPathA(MAX_PATH, tmppath)) { + // Assume for now that the filename generated is unique per process + if (GetTempFileNameA(tmppath, "lvx", 0, fname)) { + file_name->assign(fname); + return fopen(fname, "wb+"); + } + } + return NULL; +#else + return tmpfile(); +#endif +} + +class TempOutFile { + public: + TempOutFile() { file_ = GetTempOutFile(&file_name_); } + ~TempOutFile() { + CloseFile(); + if (!file_name_.empty()) { + EXPECT_EQ(0, remove(file_name_.c_str())); + } + } + FILE *file() { return file_; } + const std::string &file_name() { return file_name_; } + + protected: + void CloseFile() { + if (file_) { + fclose(file_); + file_ = NULL; + } + } + FILE *file_; + std::string file_name_; +}; + +// Abstract base class for test video sources, which provide a stream of +// aom_image_t images with associated timestamps and duration. +class VideoSource { + public: + virtual ~VideoSource() {} + + // Prepare the stream for reading, rewind/open as necessary. + virtual void Begin() = 0; + + // Advance the cursor to the next frame + virtual void Next() = 0; + + // Get the current video frame, or NULL on End-Of-Stream. + virtual aom_image_t *img() const = 0; + + // Get the presentation timestamp of the current frame. + virtual aom_codec_pts_t pts() const = 0; + + // Get the current frame's duration + virtual unsigned long duration() const = 0; + + // Get the timebase for the stream + virtual aom_rational_t timebase() const = 0; + + // Get the current frame counter, starting at 0. + virtual unsigned int frame() const = 0; + + // Get the current file limit. + virtual unsigned int limit() const = 0; +}; + +class DummyVideoSource : public VideoSource { + public: + DummyVideoSource() + : img_(NULL), limit_(100), width_(80), height_(64), + format_(AOM_IMG_FMT_I420) { + ReallocImage(); + } + + virtual ~DummyVideoSource() { aom_img_free(img_); } + + virtual void Begin() { + frame_ = 0; + FillFrame(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + virtual aom_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + virtual aom_codec_pts_t pts() const { return frame_; } + + virtual unsigned long duration() const { return 1; } + + virtual aom_rational_t timebase() const { + const aom_rational_t t = { 1, 30 }; + return t; + } + + virtual unsigned int frame() const { return frame_; } + + virtual unsigned int limit() const { return limit_; } + + void set_limit(unsigned int limit) { limit_ = limit; } + + void SetSize(unsigned int width, unsigned int height) { + if (width != width_ || height != height_) { + width_ = width; + height_ = height; + ReallocImage(); + } + } + + void SetImageFormat(aom_img_fmt_t format) { + if (format_ != format) { + format_ = format; + ReallocImage(); + } + } + + protected: + virtual void FillFrame() { + if (img_) memset(img_->img_data, 0, raw_sz_); + } + + void ReallocImage() { + aom_img_free(img_); + img_ = aom_img_alloc(NULL, format_, width_, height_, 32); + raw_sz_ = ((img_->w + 31) & ~31) * img_->h * img_->bps / 8; + } + + aom_image_t *img_; + size_t raw_sz_; + unsigned int limit_; + unsigned int frame_; + unsigned int width_; + unsigned int height_; + aom_img_fmt_t format_; +}; + +class RandomVideoSource : public DummyVideoSource { + public: + RandomVideoSource(int seed = ACMRandom::DeterministicSeed()) + : rnd_(seed), seed_(seed) {} + + protected: + // Reset the RNG to get a matching stream for the second pass + virtual void Begin() { + frame_ = 0; + rnd_.Reset(seed_); + FillFrame(); + } + + // 15 frames of noise, followed by 15 static frames. Reset to 0 rather + // than holding previous frames to encourage keyframes to be thrown. + virtual void FillFrame() { + if (img_) { + if (frame_ % 30 < 15) + for (size_t i = 0; i < raw_sz_; ++i) img_->img_data[i] = rnd_.Rand8(); + else + memset(img_->img_data, 0, raw_sz_); + } + } + + ACMRandom rnd_; + int seed_; +}; + +// Abstract base class for test video sources, which provide a stream of +// decompressed images to the decoder. +class CompressedVideoSource { + public: + virtual ~CompressedVideoSource() {} + + virtual void Init() = 0; + + // Prepare the stream for reading, rewind/open as necessary. + virtual void Begin() = 0; + + // Advance the cursor to the next frame + virtual void Next() = 0; + + virtual const uint8_t *cxdata() const = 0; + + virtual size_t frame_size() const = 0; + + virtual unsigned int frame_number() const = 0; +}; + +} // namespace libaom_test + +#endif // TEST_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/warp_filter_test.cc b/third_party/aom/test/warp_filter_test.cc new file mode 100644 index 000000000..fd6608bfc --- /dev/null +++ b/third_party/aom/test/warp_filter_test.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/warp_filter_test_util.h" + +using std::tr1::tuple; +using std::tr1::make_tuple; +using libaom_test::ACMRandom; +using libaom_test::AV1WarpFilter::AV1WarpFilterTest; +#if CONFIG_HIGHBITDEPTH +using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest; +#endif + +namespace { + +TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(av1_warp_affine_sse2); } + +INSTANTIATE_TEST_CASE_P(SSE2, AV1WarpFilterTest, + libaom_test::AV1WarpFilter::GetDefaultParams()); + +#if CONFIG_HIGHBITDEPTH +TEST_P(AV1HighbdWarpFilterTest, CheckOutput) { + RunCheckOutput(av1_highbd_warp_affine_ssse3); +} + +INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdWarpFilterTest, + libaom_test::AV1HighbdWarpFilter::GetDefaultParams()); +#endif + +} // namespace diff --git a/third_party/aom/test/warp_filter_test_util.cc b/third_party/aom/test/warp_filter_test_util.cc new file mode 100644 index 000000000..1ce265b60 --- /dev/null +++ b/third_party/aom/test/warp_filter_test_util.cc @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/warp_filter_test_util.h" + +using std::tr1::tuple; +using std::tr1::make_tuple; +using std::vector; +using libaom_test::ACMRandom; +using libaom_test::AV1WarpFilter::AV1WarpFilterTest; +using libaom_test::AV1WarpFilter::WarpTestParam; +#if CONFIG_HIGHBITDEPTH +using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest; +using libaom_test::AV1HighbdWarpFilter::HighbdWarpTestParam; +#endif + +::testing::internal::ParamGenerator<WarpTestParam> +libaom_test::AV1WarpFilter::GetDefaultParams() { + const WarpTestParam defaultParams[] = { + make_tuple(4, 4, 50000), make_tuple(8, 8, 50000), + make_tuple(64, 64, 1000), make_tuple(4, 16, 20000), + make_tuple(32, 8, 10000), + }; + return ::testing::ValuesIn(defaultParams); +} + +AV1WarpFilterTest::~AV1WarpFilterTest() {} +void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); } + +void AV1WarpFilterTest::TearDown() { libaom_test::ClearSystemState(); } + +int32_t AV1WarpFilterTest::random_param(int bits) { + // 1 in 8 chance of generating zero (arbitrarily chosen) + if (((rnd_.Rand8()) & 7) == 0) return 0; + // Otherwise, enerate uniform values in the range + // [-(1 << bits), 1] U [1, 1<<bits] + int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1)); + if ((rnd_.Rand8()) & 1) return -v; + return v; +} + +void AV1WarpFilterTest::generate_model(int32_t *mat, int16_t *alpha, + int16_t *beta, int16_t *gamma, + int16_t *delta) { + while (1) { + mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6); + mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6); + mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) + + (1 << WARPEDMODEL_PREC_BITS); + mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3); + // 50/50 chance of generating ROTZOOM vs. AFFINE models + if (rnd_.Rand8() & 1) { + // AFFINE + mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3); + mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) + + (1 << WARPEDMODEL_PREC_BITS); + } else { + mat[4] = -mat[3]; + mat[5] = mat[2]; + } + + // Calculate the derived parameters and check that they are suitable + // for the warp filter. + assert(mat[2] != 0); + + *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX); + *beta = clamp(mat[3], INT16_MIN, INT16_MAX); + *gamma = clamp(((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) / mat[2], + INT16_MIN, INT16_MAX); + *delta = + clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) - + (1 << WARPEDMODEL_PREC_BITS), + INT16_MIN, INT16_MAX); + + if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) || + (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS))) + continue; + + // We have a valid model, so finish + return; + } +} + +void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) { + const int w = 128, h = 128; + const int border = 16; + const int stride = w + 2 * border; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2); + int i, j, sub_x, sub_y; + + uint8_t *input_ = new uint8_t[h * stride]; + uint8_t *input = input_ + border; + + // The warp functions always write rows with widths that are multiples of 8. + // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. + int output_n = ((out_w + 7) & ~7) * out_h; + uint8_t *output = new uint8_t[output_n]; + uint8_t *output2 = new uint8_t[output_n]; + int32_t mat[8]; + int16_t alpha, beta, gamma, delta; + + // Generate an input block and extend its borders horizontally + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * stride + j] = rnd_.Rand8(); + for (i = 0; i < h; ++i) { + memset(input + i * stride - border, input[i * stride], border); + memset(input + i * stride + w, input[i * stride + (w - 1)], border); + } + + for (i = 0; i < num_iters; ++i) { + for (sub_x = 0; sub_x < 2; ++sub_x) + for (sub_y = 0; sub_y < 2; ++sub_y) { + generate_model(mat, &alpha, &beta, &gamma, &delta); + av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w, + out_h, out_w, sub_x, sub_y, 0, alpha, beta, gamma, + delta); + test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h, + out_w, sub_x, sub_y, 0, alpha, beta, gamma, delta); + + for (j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" << (j % out_w) + << ", " << (j / out_w) << ") on iteration " << i; + } + } + delete[] input_; + delete[] output; + delete[] output2; +} + +#if CONFIG_HIGHBITDEPTH +::testing::internal::ParamGenerator<HighbdWarpTestParam> +libaom_test::AV1HighbdWarpFilter::GetDefaultParams() { + const HighbdWarpTestParam defaultParams[] = { + make_tuple(4, 4, 50000, 8), make_tuple(8, 8, 50000, 8), + make_tuple(64, 64, 1000, 8), make_tuple(4, 16, 20000, 8), + make_tuple(32, 8, 10000, 8), make_tuple(4, 4, 50000, 10), + make_tuple(8, 8, 50000, 10), make_tuple(64, 64, 1000, 10), + make_tuple(4, 16, 20000, 10), make_tuple(32, 8, 10000, 10), + make_tuple(4, 4, 50000, 12), make_tuple(8, 8, 50000, 12), + make_tuple(64, 64, 1000, 12), make_tuple(4, 16, 20000, 12), + make_tuple(32, 8, 10000, 12), + }; + return ::testing::ValuesIn(defaultParams); +} + +AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() {} +void AV1HighbdWarpFilterTest::SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); +} + +void AV1HighbdWarpFilterTest::TearDown() { libaom_test::ClearSystemState(); } + +int32_t AV1HighbdWarpFilterTest::random_param(int bits) { + // 1 in 8 chance of generating zero (arbitrarily chosen) + if (((rnd_.Rand8()) & 7) == 0) return 0; + // Otherwise, enerate uniform values in the range + // [-(1 << bits), 1] U [1, 1<<bits] + int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1)); + if ((rnd_.Rand8()) & 1) return -v; + return v; +} + +void AV1HighbdWarpFilterTest::generate_model(int32_t *mat, int16_t *alpha, + int16_t *beta, int16_t *gamma, + int16_t *delta) { + while (1) { + mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6); + mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6); + mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) + + (1 << WARPEDMODEL_PREC_BITS); + mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3); + // 50/50 chance of generating ROTZOOM vs. AFFINE models + if (rnd_.Rand8() & 1) { + // AFFINE + mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3); + mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) + + (1 << WARPEDMODEL_PREC_BITS); + } else { + mat[4] = -mat[3]; + mat[5] = mat[2]; + } + + // Calculate the derived parameters and check that they are suitable + // for the warp filter. + assert(mat[2] != 0); + + *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX); + *beta = clamp(mat[3], INT16_MIN, INT16_MAX); + *gamma = clamp(((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) / mat[2], + INT16_MIN, INT16_MAX); + *delta = + clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) - + (1 << WARPEDMODEL_PREC_BITS), + INT16_MIN, INT16_MAX); + + if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) || + (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS))) + continue; + + // We have a valid model, so finish + return; + } +} + +void AV1HighbdWarpFilterTest::RunCheckOutput( + highbd_warp_affine_func test_impl) { + const int w = 128, h = 128; + const int border = 16; + const int stride = w + 2 * border; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2); + const int bd = GET_PARAM(3); + const int mask = (1 << bd) - 1; + int i, j, sub_x, sub_y; + + // The warp functions always write rows with widths that are multiples of 8. + // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. + int output_n = ((out_w + 7) & ~7) * out_h; + uint16_t *input_ = new uint16_t[h * stride]; + uint16_t *input = input_ + border; + uint16_t *output = new uint16_t[output_n]; + uint16_t *output2 = new uint16_t[output_n]; + int32_t mat[8]; + int16_t alpha, beta, gamma, delta; + + // Generate an input block and extend its borders horizontally + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * stride + j] = rnd_.Rand16() & mask; + for (i = 0; i < h; ++i) { + for (j = 0; j < border; ++j) { + input[i * stride - border + j] = input[i * stride]; + input[i * stride + w + j] = input[i * stride + (w - 1)]; + } + } + + for (i = 0; i < num_iters; ++i) { + for (sub_x = 0; sub_x < 2; ++sub_x) + for (sub_y = 0; sub_y < 2; ++sub_y) { + generate_model(mat, &alpha, &beta, &gamma, &delta); + + av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32, + out_w, out_h, out_w, sub_x, sub_y, bd, 0, + alpha, beta, gamma, delta); + test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h, + out_w, sub_x, sub_y, bd, 0, alpha, beta, gamma, delta); + + for (j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" << (j % out_w) + << ", " << (j / out_w) << ") on iteration " << i; + } + } + + delete[] input_; + delete[] output; + delete[] output2; +} +#endif // CONFIG_HIGHBITDEPTH diff --git a/third_party/aom/test/warp_filter_test_util.h b/third_party/aom/test/warp_filter_test_util.h new file mode 100644 index 000000000..6a87e46d0 --- /dev/null +++ b/third_party/aom/test/warp_filter_test_util.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TEST_WARP_FILTER_TEST_UTIL_H_ +#define TEST_WARP_FILTER_TEST_UTIL_H_ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "./av1_rtcd.h" +#include "./aom_dsp_rtcd.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" + +#include "av1/common/mv.h" + +namespace libaom_test { + +namespace AV1WarpFilter { + +typedef void (*warp_affine_func)(int32_t *mat, uint8_t *ref, int width, + int height, int stride, uint8_t *pred, + int p_col, int p_row, int p_width, + int p_height, int p_stride, int subsampling_x, + int subsampling_y, int ref_frm, int16_t alpha, + int16_t beta, int16_t gamma, int16_t delta); + +typedef std::tr1::tuple<int, int, int> WarpTestParam; + +::testing::internal::ParamGenerator<WarpTestParam> GetDefaultParams(); + +class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParam> { + public: + virtual ~AV1WarpFilterTest(); + virtual void SetUp(); + + virtual void TearDown(); + + protected: + int32_t random_param(int bits); + void generate_model(int32_t *mat, int16_t *alpha, int16_t *beta, + int16_t *gamma, int16_t *delta); + + void RunCheckOutput(warp_affine_func test_impl); + + libaom_test::ACMRandom rnd_; +}; + +} // namespace AV1WarpFilter + +#if CONFIG_HIGHBITDEPTH +namespace AV1HighbdWarpFilter { +typedef void (*highbd_warp_affine_func)( + int32_t *mat, uint16_t *ref, int width, int height, int stride, + uint16_t *pred, int p_col, int p_row, int p_width, int p_height, + int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm, + int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); + +typedef std::tr1::tuple<int, int, int, int> HighbdWarpTestParam; + +::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams(); + +class AV1HighbdWarpFilterTest + : public ::testing::TestWithParam<HighbdWarpTestParam> { + public: + virtual ~AV1HighbdWarpFilterTest(); + virtual void SetUp(); + + virtual void TearDown(); + + protected: + int32_t random_param(int bits); + void generate_model(int32_t *mat, int16_t *alpha, int16_t *beta, + int16_t *gamma, int16_t *delta); + + void RunCheckOutput(highbd_warp_affine_func test_impl); + + libaom_test::ACMRandom rnd_; +}; + +} // namespace AV1HighbdWarpFilter +#endif // CONFIG_HIGHBITDEPTH + +} // namespace libaom_test + +#endif // TEST_WARP_FILTER_TEST_UTIL_H_ diff --git a/third_party/aom/test/webm_video_source.h b/third_party/aom/test/webm_video_source.h new file mode 100644 index 000000000..286f69cbf --- /dev/null +++ b/third_party/aom/test/webm_video_source.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_WEBM_VIDEO_SOURCE_H_ +#define TEST_WEBM_VIDEO_SOURCE_H_ +#include <cstdarg> +#include <cstdio> +#include <cstdlib> +#include <new> +#include <string> +#include "../tools_common.h" +#include "../webmdec.h" +#include "test/video_source.h" + +namespace libaom_test { + +// This class extends VideoSource to allow parsing of WebM files, +// so that we can do actual file decodes. +class WebMVideoSource : public CompressedVideoSource { + public: + explicit WebMVideoSource(const std::string &file_name) + : file_name_(file_name), aom_ctx_(new AvxInputContext()), + webm_ctx_(new WebmInputContext()), buf_(NULL), buf_sz_(0), frame_(0), + end_of_file_(false) {} + + virtual ~WebMVideoSource() { + if (aom_ctx_->file != NULL) fclose(aom_ctx_->file); + webm_free(webm_ctx_); + delete aom_ctx_; + delete webm_ctx_; + } + + virtual void Init() {} + + virtual void Begin() { + aom_ctx_->file = OpenTestDataFile(file_name_); + ASSERT_TRUE(aom_ctx_->file != NULL) << "Input file open failed. Filename: " + << file_name_; + + ASSERT_EQ(file_is_webm(webm_ctx_, aom_ctx_), 1) << "file is not WebM"; + + FillFrame(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + void FillFrame() { + ASSERT_TRUE(aom_ctx_->file != NULL); + const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_); + ASSERT_GE(status, 0) << "webm_read_frame failed"; + if (status == 1) { + end_of_file_ = true; + } + } + + void SeekToNextKeyFrame() { + ASSERT_TRUE(aom_ctx_->file != NULL); + do { + const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_); + ASSERT_GE(status, 0) << "webm_read_frame failed"; + ++frame_; + if (status == 1) { + end_of_file_ = true; + } + } while (!webm_ctx_->is_key_frame && !end_of_file_); + } + + virtual const uint8_t *cxdata() const { return end_of_file_ ? NULL : buf_; } + virtual size_t frame_size() const { return buf_sz_; } + virtual unsigned int frame_number() const { return frame_; } + + protected: + std::string file_name_; + AvxInputContext *aom_ctx_; + WebmInputContext *webm_ctx_; + uint8_t *buf_; + size_t buf_sz_; + unsigned int frame_; + bool end_of_file_; +}; + +} // namespace libaom_test + +#endif // TEST_WEBM_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/y4m_test.cc b/third_party/aom/test/y4m_test.cc new file mode 100644 index 000000000..fc9fff514 --- /dev/null +++ b/third_party/aom/test/y4m_test.cc @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. +*/ + +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "./aom_config.h" +#include "./y4menc.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { + +using std::string; + +static const unsigned int kWidth = 160; +static const unsigned int kHeight = 90; +static const unsigned int kFrames = 10; + +struct Y4mTestParam { + const char *filename; + unsigned int bit_depth; + aom_img_fmt format; + const char *md5raw; +}; + +const Y4mTestParam kY4mTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, + "e5406275b9fc6bb3436c31d4a05c1cab" }, + { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, + "284a47a47133b12884ec3a14e959a0b6" }, + { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, + "90517ff33843d85de712fd4fe60dbed0" }, + { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, + "63f21f9f717d8b8631bd2288ee87137b" }, + { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, + "48ab51fb540aed07f7ff5af130c9b605" }, + { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, + "067bfd75aa85ff9bae91fa3e0edd1e3e" }, + { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, + "9e6d8f6508c6e55625f6b697bc461cef" }, + { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, + "b239c6b301c0b835485be349ca83a7e3" }, + { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, + "5a6481a550821dab6d0192f5c63845e9" }, +}; + +static void write_image_file(const aom_image_t *img, FILE *file) { + int plane, y; + for (plane = 0; plane < 3; ++plane) { + const unsigned char *buf = img->planes[plane]; + const int stride = img->stride[plane]; + const int bytes_per_sample = (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; + const int h = + (plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift + : img->d_h); + const int w = + (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift + : img->d_w); + for (y = 0; y < h; ++y) { + fwrite(buf, bytes_per_sample, w, file); + buf += stride; + } + } +} + +class Y4mVideoSourceTest : public ::testing::TestWithParam<Y4mTestParam>, + public ::libaom_test::Y4mVideoSource { + protected: + Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {} + + virtual ~Y4mVideoSourceTest() { CloseSource(); } + + virtual void Init(const std::string &file_name, int limit) { + file_name_ = file_name; + start_ = 0; + limit_ = limit; + frame_ = 0; + Begin(); + } + + // Checks y4m header information + void HeaderChecks(unsigned int bit_depth, aom_img_fmt_t fmt) { + ASSERT_TRUE(input_file_ != NULL); + ASSERT_EQ(y4m_.pic_w, (int)kWidth); + ASSERT_EQ(y4m_.pic_h, (int)kHeight); + ASSERT_EQ(img()->d_w, kWidth); + ASSERT_EQ(img()->d_h, kHeight); + ASSERT_EQ(y4m_.bit_depth, bit_depth); + ASSERT_EQ(y4m_.aom_fmt, fmt); + if (fmt == AOM_IMG_FMT_I420 || fmt == AOM_IMG_FMT_I42016) { + ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2); + ASSERT_EQ(img()->x_chroma_shift, 1U); + ASSERT_EQ(img()->y_chroma_shift, 1U); + } + if (fmt == AOM_IMG_FMT_I422 || fmt == AOM_IMG_FMT_I42216) { + ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2); + ASSERT_EQ(img()->x_chroma_shift, 1U); + ASSERT_EQ(img()->y_chroma_shift, 0U); + } + if (fmt == AOM_IMG_FMT_I444 || fmt == AOM_IMG_FMT_I44416) { + ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3); + ASSERT_EQ(img()->x_chroma_shift, 0U); + ASSERT_EQ(img()->y_chroma_shift, 0U); + } + } + + // Checks MD5 of the raw frame data + void Md5Check(const string &expected_md5) { + ASSERT_TRUE(input_file_ != NULL); + libaom_test::MD5 md5; + for (unsigned int i = start_; i < limit_; i++) { + md5.Add(img()); + Next(); + } + ASSERT_EQ(string(md5.Get()), expected_md5); + } +}; + +TEST_P(Y4mVideoSourceTest, SourceTest) { + const Y4mTestParam t = GetParam(); + Init(t.filename, kFrames); + HeaderChecks(t.bit_depth, t.format); + Md5Check(t.md5raw); +} + +INSTANTIATE_TEST_CASE_P(C, Y4mVideoSourceTest, + ::testing::ValuesIn(kY4mTestVectors)); + +class Y4mVideoWriteTest : public Y4mVideoSourceTest { + protected: + Y4mVideoWriteTest() : tmpfile_(NULL) {} + + virtual ~Y4mVideoWriteTest() { + delete tmpfile_; + input_file_ = NULL; + } + + void ReplaceInputFile(FILE *input_file) { + CloseSource(); + frame_ = 0; + input_file_ = input_file; + rewind(input_file_); + ReadSourceToStart(); + } + + // Writes out a y4m file and then reads it back + void WriteY4mAndReadBack() { + ASSERT_TRUE(input_file_ != NULL); + char buf[Y4M_BUFFER_SIZE] = { 0 }; + const struct AvxRational framerate = { y4m_.fps_n, y4m_.fps_d }; + tmpfile_ = new libaom_test::TempOutFile; + ASSERT_TRUE(tmpfile_->file() != NULL); + y4m_write_file_header(buf, sizeof(buf), kWidth, kHeight, &framerate, + y4m_.aom_fmt, y4m_.bit_depth); + fputs(buf, tmpfile_->file()); + for (unsigned int i = start_; i < limit_; i++) { + y4m_write_frame_header(buf, sizeof(buf)); + fputs(buf, tmpfile_->file()); + write_image_file(img(), tmpfile_->file()); + Next(); + } + ReplaceInputFile(tmpfile_->file()); + } + + virtual void Init(const std::string &file_name, int limit) { + Y4mVideoSourceTest::Init(file_name, limit); + WriteY4mAndReadBack(); + } + libaom_test::TempOutFile *tmpfile_; +}; + +TEST_P(Y4mVideoWriteTest, WriteTest) { + const Y4mTestParam t = GetParam(); + Init(t.filename, kFrames); + HeaderChecks(t.bit_depth, t.format); + Md5Check(t.md5raw); +} + +INSTANTIATE_TEST_CASE_P(C, Y4mVideoWriteTest, + ::testing::ValuesIn(kY4mTestVectors)); +} // namespace diff --git a/third_party/aom/test/y4m_video_source.h b/third_party/aom/test/y4m_video_source.h new file mode 100644 index 000000000..2279d7970 --- /dev/null +++ b/third_party/aom/test/y4m_video_source.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_Y4M_VIDEO_SOURCE_H_ +#define TEST_Y4M_VIDEO_SOURCE_H_ +#include <algorithm> +#include <string> + +#include "test/video_source.h" +#include "./y4minput.h" + +namespace libaom_test { + +// This class extends VideoSource to allow parsing of raw yv12 +// so that we can do actual file encodes. +class Y4mVideoSource : public VideoSource { + public: + Y4mVideoSource(const std::string &file_name, unsigned int start, int limit) + : file_name_(file_name), input_file_(NULL), img_(new aom_image_t()), + start_(start), limit_(limit), frame_(0), framerate_numerator_(0), + framerate_denominator_(0), y4m_() {} + + virtual ~Y4mVideoSource() { + aom_img_free(img_.get()); + CloseSource(); + } + + virtual void OpenSource() { + CloseSource(); + input_file_ = OpenTestDataFile(file_name_); + ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: " + << file_name_; + } + + virtual void ReadSourceToStart() { + ASSERT_TRUE(input_file_ != NULL); + ASSERT_FALSE(y4m_input_open(&y4m_, input_file_, NULL, 0, 0)); + framerate_numerator_ = y4m_.fps_n; + framerate_denominator_ = y4m_.fps_d; + frame_ = 0; + for (unsigned int i = 0; i < start_; i++) { + Next(); + } + FillFrame(); + } + + virtual void Begin() { + OpenSource(); + ReadSourceToStart(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + virtual aom_image_t *img() const { + return (frame_ < limit_) ? img_.get() : NULL; + } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + virtual aom_codec_pts_t pts() const { return frame_; } + + virtual unsigned long duration() const { return 1; } + + virtual aom_rational_t timebase() const { + const aom_rational_t t = { framerate_denominator_, framerate_numerator_ }; + return t; + } + + virtual unsigned int frame() const { return frame_; } + + virtual unsigned int limit() const { return limit_; } + + virtual void FillFrame() { + ASSERT_TRUE(input_file_ != NULL); + // Read a frame from input_file. + y4m_input_fetch_frame(&y4m_, input_file_, img_.get()); + } + + // Swap buffers with another y4m source. This allows reading a new frame + // while keeping the old frame around. A whole Y4mSource is required and + // not just a aom_image_t because of how the y4m reader manipulates + // aom_image_t internals, + void SwapBuffers(Y4mVideoSource *other) { + std::swap(other->y4m_.dst_buf, y4m_.dst_buf); + aom_image_t *tmp; + tmp = other->img_.release(); + other->img_.reset(img_.release()); + img_.reset(tmp); + } + + protected: + void CloseSource() { + y4m_input_close(&y4m_); + y4m_ = y4m_input(); + if (input_file_ != NULL) { + fclose(input_file_); + input_file_ = NULL; + } + } + + std::string file_name_; + FILE *input_file_; + testing::internal::scoped_ptr<aom_image_t> img_; + unsigned int start_; + unsigned int limit_; + unsigned int frame_; + int framerate_numerator_; + int framerate_denominator_; + y4m_input y4m_; +}; + +} // namespace libaom_test + +#endif // TEST_Y4M_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/yuv_video_source.h b/third_party/aom/test/yuv_video_source.h new file mode 100644 index 000000000..9ff76a8d8 --- /dev/null +++ b/third_party/aom/test/yuv_video_source.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef TEST_YUV_VIDEO_SOURCE_H_ +#define TEST_YUV_VIDEO_SOURCE_H_ + +#include <cstdio> +#include <cstdlib> +#include <string> + +#include "test/video_source.h" +#include "aom/aom_image.h" + +namespace libaom_test { + +// This class extends VideoSource to allow parsing of raw YUV +// formats of various color sampling and bit-depths so that we can +// do actual file encodes. +class YUVVideoSource : public VideoSource { + public: + YUVVideoSource(const std::string &file_name, aom_img_fmt format, + unsigned int width, unsigned int height, int rate_numerator, + int rate_denominator, unsigned int start, int limit) + : file_name_(file_name), input_file_(NULL), img_(NULL), start_(start), + limit_(limit), frame_(0), width_(0), height_(0), + format_(AOM_IMG_FMT_NONE), framerate_numerator_(rate_numerator), + framerate_denominator_(rate_denominator) { + // This initializes format_, raw_size_, width_, height_ and allocates img. + SetSize(width, height, format); + } + + virtual ~YUVVideoSource() { + aom_img_free(img_); + if (input_file_) fclose(input_file_); + } + + virtual void Begin() { + if (input_file_) fclose(input_file_); + input_file_ = OpenTestDataFile(file_name_); + ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: " + << file_name_; + if (start_) + fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET); + + frame_ = start_; + FillFrame(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + virtual aom_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + virtual aom_codec_pts_t pts() const { return frame_; } + + virtual unsigned long duration() const { return 1; } + + virtual aom_rational_t timebase() const { + const aom_rational_t t = { framerate_denominator_, framerate_numerator_ }; + return t; + } + + virtual unsigned int frame() const { return frame_; } + + virtual unsigned int limit() const { return limit_; } + + virtual void SetSize(unsigned int width, unsigned int height, + aom_img_fmt format) { + if (width != width_ || height != height_ || format != format_) { + aom_img_free(img_); + img_ = aom_img_alloc(NULL, format, width, height, 1); + ASSERT_TRUE(img_ != NULL); + width_ = width; + height_ = height; + format_ = format; + switch (format) { + case AOM_IMG_FMT_I420: raw_size_ = width * height * 3 / 2; break; + case AOM_IMG_FMT_I422: raw_size_ = width * height * 2; break; + case AOM_IMG_FMT_I440: raw_size_ = width * height * 2; break; + case AOM_IMG_FMT_I444: raw_size_ = width * height * 3; break; + case AOM_IMG_FMT_I42016: raw_size_ = width * height * 3; break; + case AOM_IMG_FMT_I42216: raw_size_ = width * height * 4; break; + case AOM_IMG_FMT_I44016: raw_size_ = width * height * 4; break; + case AOM_IMG_FMT_I44416: raw_size_ = width * height * 6; break; + default: ASSERT_TRUE(0); + } + } + } + + virtual void FillFrame() { + ASSERT_TRUE(input_file_ != NULL); + // Read a frame from input_file. + if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) { + limit_ = frame_; + } + } + + protected: + std::string file_name_; + FILE *input_file_; + aom_image_t *img_; + size_t raw_size_; + unsigned int start_; + unsigned int limit_; + unsigned int frame_; + unsigned int width_; + unsigned int height_; + aom_img_fmt format_; + int framerate_numerator_; + int framerate_denominator_; +}; + +} // namespace libaom_test + +#endif // TEST_YUV_VIDEO_SOURCE_H_ |