142 files changed, 31198 insertions, 0 deletions
diff --git a/third_party/aom/test/accounting_test.cc b/third_party/aom/test/accounting_test.cc
new file mode 100644
index 000000000..e8387d0dc
--- /dev/null
+++ b/third_party/aom/test/accounting_test.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/bitreader.h"
+#include "aom_dsp/bitwriter.h"
+
+using libaom_test::ACMRandom;
+
+TEST(AV1, TestAccounting) {
+  const int kBufferSize = 10000;
+  const int kSymbols = 1024;
+  aom_writer bw;
+  uint8_t bw_buffer[kBufferSize];
+  aom_start_encode(&bw, bw_buffer);
+  for (int i = 0; i < kSymbols; i++) {
+    aom_write(&bw, 0, 32);
+    aom_write(&bw, 0, 32);
+    aom_write(&bw, 0, 32);
+  }
+  aom_stop_encode(&bw);
+  aom_reader br;
+#if CONFIG_ANS && ANS_MAX_SYMBOLS
+  br.window_size = 1 << 16;
+#endif
+  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+
+  Accounting accounting;
+  aom_accounting_init(&accounting);
+  br.accounting = &accounting;
+  for (int i = 0; i < kSymbols; i++) {
+    aom_read(&br, 32, "A");
+  }
+  // Consecutive symbols that are the same are coalesced.
+  GTEST_ASSERT_EQ(accounting.syms.num_syms, 1);
+  GTEST_ASSERT_EQ(accounting.syms.syms[0].samples, (unsigned int)kSymbols);
+
+  aom_accounting_reset(&accounting);
+  GTEST_ASSERT_EQ(accounting.syms.num_syms, 0);
+
+  // Should record 2 * kSymbols accounting symbols.
+  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+  br.accounting = &accounting;
+  for (int i = 0; i < kSymbols; i++) {
+    aom_read(&br, 32, "A");
+    aom_read(&br, 32, "B");
+    aom_read(&br, 32, "B");
+  }
+  GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols * 2);
+  uint32_t tell_frac = aom_reader_tell_frac(&br);
+  for (int i = 0; i < accounting.syms.num_syms; i++) {
+    tell_frac -= accounting.syms.syms[i].bits;
+  }
+  GTEST_ASSERT_EQ(tell_frac, 0U);
+
+  GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, "A"),
+                  aom_accounting_dictionary_lookup(&accounting, "A"));
+
+  // Check for collisions. The current aom_accounting_hash function returns
+  // the same hash code for AB and BA.
+  GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, "AB"),
+                  aom_accounting_dictionary_lookup(&accounting, "BA"));
+}
diff --git a/third_party/aom/test/acm_random.h b/third_party/aom/test/acm_random.h
new file mode 100644
index 000000000..4842345ff
--- /dev/null
+++ b/third_party/aom/test/acm_random.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_ACM_RANDOM_H_
+#define TEST_ACM_RANDOM_H_
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "aom/aom_integer.h"
+
+namespace libaom_test {
+
+class ACMRandom {
+ public:
+  ACMRandom() : random_(DeterministicSeed()) {}
+
+  explicit ACMRandom(int seed) : random_(seed) {}
+
+  void Reset(int seed) { random_.Reseed(seed); }
+
+  uint32_t Rand31(void) {
+    return random_.Generate(testing::internal::Random::kMaxRange);
+  }
+
+  uint16_t Rand16(void) {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    return (value >> 15) & 0xffff;
+  }
+
+  int16_t Rand9Signed(void) {
+    // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
+    const uint32_t value = random_.Generate(512);
+    return static_cast<int16_t>(value) - 256;
+  }
+
+  uint8_t Rand8(void) {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    // There's a bit more entropy in the upper bits of this implementation.
+    return (value >> 23) & 0xff;
+  }
+
+  uint8_t Rand8Extremes(void) {
+    // Returns a random value near 0 or near 255, to better exercise
+    // saturation behavior.
+    const uint8_t r = Rand8();
+    return r < 128 ? r << 4 : r >> 4;
+  }
+
+  int PseudoUniform(int range) { return random_.Generate(range); }
+
+  int operator()(int n) { return PseudoUniform(n); }
+
+  static int DeterministicSeed(void) { return 0xbaba; }
+
+ private:
+  testing::internal::Random random_;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_ACM_RANDOM_H_
diff --git a/third_party/aom/test/active_map_refresh_test.cc b/third_party/aom/test/active_map_refresh_test.cc
new file mode 100644
index 000000000..7ee86e7e6
--- /dev/null
+++ b/third_party/aom/test/active_map_refresh_test.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <algorithm>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+// Check if any pixel in a 16x16 macroblock varies between frames.
+int CheckMb(const aom_image_t &current, const aom_image_t &previous, int mb_r,
+            int mb_c) {
+  for (int plane = 0; plane < 3; plane++) {
+    int r = 16 * mb_r;
+    int c0 = 16 * mb_c;
+    int r_top = std::min(r + 16, static_cast<int>(current.d_h));
+    int c_top = std::min(c0 + 16, static_cast<int>(current.d_w));
+    r = std::max(r, 0);
+    c0 = std::max(c0, 0);
+    if (plane > 0 && current.x_chroma_shift) {
+      c_top = (c_top + 1) >> 1;
+      c0 >>= 1;
+    }
+    if (plane > 0 && current.y_chroma_shift) {
+      r_top = (r_top + 1) >> 1;
+      r >>= 1;
+    }
+    for (; r < r_top; ++r) {
+      for (int c = c0; c < c_top; ++c) {
+        if (current.planes[plane][current.stride[plane] * r + c] !=
+            previous.planes[plane][previous.stride[plane] * r + c])
+          return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+void GenerateMap(int mb_rows, int mb_cols, const aom_image_t &current,
+                 const aom_image_t &previous, uint8_t *map) {
+  for (int mb_r = 0; mb_r < mb_rows; ++mb_r) {
+    for (int mb_c = 0; mb_c < mb_cols; ++mb_c) {
+      map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c);
+    }
+  }
+}
+
+const int kAqModeCyclicRefresh = 3;
+
+class ActiveMapRefreshTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ActiveMapRefreshTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    cpu_used_ = GET_PARAM(2);
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    ::libaom_test::Y4mVideoSource *y4m_video =
+        static_cast<libaom_test::Y4mVideoSource *>(video);
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AV1E_SET_AQ_MODE, kAqModeCyclicRefresh);
+    } else if (video->frame() >= 2 && video->img()) {
+      aom_image_t *current = video->img();
+      aom_image_t *previous = y4m_holder_->img();
+      ASSERT_TRUE(previous != NULL);
+      aom_active_map_t map = aom_active_map_t();
+      const int width = static_cast<int>(current->d_w);
+      const int height = static_cast<int>(current->d_h);
+      const int mb_width = (width + 15) / 16;
+      const int mb_height = (height + 15) / 16;
+      uint8_t *active_map = new uint8_t[mb_width * mb_height];
+      GenerateMap(mb_height, mb_width, *current, *previous, active_map);
+      map.cols = mb_width;
+      map.rows = mb_height;
+      map.active_map = active_map;
+      encoder->Control(AOME_SET_ACTIVEMAP, &map);
+      delete[] active_map;
+    }
+    if (video->img()) {
+      y4m_video->SwapBuffers(y4m_holder_);
+    }
+  }
+
+  int cpu_used_;
+  ::libaom_test::Y4mVideoSource *y4m_holder_;
+};
+
+TEST_P(ActiveMapRefreshTest, Test) {
+  cfg_.g_lag_in_frames = 0;
+  cfg_.g_profile = 1;
+  cfg_.rc_target_bitrate = 600;
+  cfg_.rc_resize_allowed = 0;
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 30;
+  cfg_.g_pass = AOM_RC_ONE_PASS;
+  cfg_.rc_end_usage = AOM_CBR;
+  cfg_.kf_max_dist = 90000;
+
+  ::libaom_test::Y4mVideoSource video("desktop_credits.y4m", 0, 10);
+  ::libaom_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 10);
+  video_holder.Begin();
+  y4m_holder_ = &video_holder;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest,
+                          ::testing::Values(::libaom_test::kRealTime),
+                          ::testing::Range(5, 6));
+}  // namespace
diff --git a/third_party/aom/test/active_map_test.cc b/third_party/aom/test/active_map_test.cc
new file mode 100644
index 000000000..a926b0faf
--- /dev/null
+++ b/third_party/aom/test/active_map_test.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class ActiveMapTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  static const int kWidth = 208;
+  static const int kHeight = 144;
+
+  ActiveMapTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ActiveMapTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    cpu_used_ = GET_PARAM(2);
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+    } else if (video->frame() == 3) {
+      aom_active_map_t map = aom_active_map_t();
+      /* clang-format off */
+      uint8_t active_map[9 * 13] = {
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1,
+        0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
+        0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1,
+        0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+      };
+      /* clang-format on */
+      map.cols = (kWidth + 15) / 16;
+      map.rows = (kHeight + 15) / 16;
+      ASSERT_EQ(map.cols, 13u);
+      ASSERT_EQ(map.rows, 9u);
+      map.active_map = active_map;
+      encoder->Control(AOME_SET_ACTIVEMAP, &map);
+    } else if (video->frame() == 15) {
+      aom_active_map_t map = aom_active_map_t();
+      map.cols = (kWidth + 15) / 16;
+      map.rows = (kHeight + 15) / 16;
+      map.active_map = NULL;
+      encoder->Control(AOME_SET_ACTIVEMAP, &map);
+    }
+  }
+
+  void DoTest() {
+    // Validate that this non multiple of 64 wide clip encodes
+    cfg_.g_lag_in_frames = 0;
+    cfg_.rc_target_bitrate = 400;
+    cfg_.rc_resize_allowed = 0;
+    cfg_.g_pass = AOM_RC_ONE_PASS;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.kf_max_dist = 90000;
+    ::libaom_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30,
+                                         1, 0, 20);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  int cpu_used_;
+};
+
+TEST_P(ActiveMapTest, Test) { DoTest(); }
+
+class ActiveMapTestLarge : public ActiveMapTest {};
+
+TEST_P(ActiveMapTestLarge, Test) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_CASE(ActiveMapTestLarge,
+                          ::testing::Values(::libaom_test::kRealTime),
+                          ::testing::Range(0, 5));
+
+AV1_INSTANTIATE_TEST_CASE(ActiveMapTest,
+                          ::testing::Values(::libaom_test::kRealTime),
+                          ::testing::Range(5, 9));
+
+}  // namespace
diff --git a/third_party/aom/test/altref_test.cc b/third_party/aom/test/altref_test.cc
new file mode 100644
index 000000000..6dd8b5186
--- /dev/null
+++ b/third_party/aom/test/altref_test.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+namespace {
+
+class AltRefForcedKeyTestLarge
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  AltRefForcedKeyTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), forced_kf_frame_num_(1), frame_num_(0) {}
+  virtual ~AltRefForcedKeyTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.g_threads = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+#if CONFIG_AV1_ENCODER
+      // override test default for tile columns if necessary.
+      if (GET_PARAM(0) == &libaom_test::kAV1) {
+        encoder->Control(AV1E_SET_TILE_COLUMNS, 6);
+      }
+#endif
+    }
+    frame_flags_ =
+        (video->frame() == forced_kf_frame_num_) ? AOM_EFLAG_FORCE_KF : 0;
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (frame_num_ == forced_kf_frame_num_) {
+      ASSERT_TRUE(!!(pkt->data.frame.flags & AOM_FRAME_IS_KEY))
+          << "Frame #" << frame_num_ << " isn't a keyframe!";
+    }
+    ++frame_num_;
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  int cpu_used_;
+  unsigned int forced_kf_frame_num_;
+  unsigned int frame_num_;
+};
+
+TEST_P(AltRefForcedKeyTestLarge, Frame1IsKey) {
+  const aom_rational timebase = { 1, 30 };
+  const int lag_values[] = { 3, 15, 25, -1 };
+
+  forced_kf_frame_num_ = 1;
+  for (int i = 0; lag_values[i] != -1; ++i) {
+    frame_num_ = 0;
+    cfg_.g_lag_in_frames = lag_values[i];
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       timebase.den, timebase.num, 0, 30);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+}
+
+TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) {
+  const aom_rational timebase = { 1, 30 };
+  const int lag_values[] = { 3, 15, 25, -1 };
+
+  for (int i = 0; lag_values[i] != -1; ++i) {
+    frame_num_ = 0;
+    forced_kf_frame_num_ = lag_values[i] - 1;
+    cfg_.g_lag_in_frames = lag_values[i];
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       timebase.den, timebase.num, 0, 30);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+}
+
+AV1_INSTANTIATE_TEST_CASE(AltRefForcedKeyTestLarge,
+                          ::testing::Values(::libaom_test::kOnePassGood),
+                          ::testing::Range(0, 9));
+
+}  // namespace
diff --git a/third_party/aom/test/android/Android.mk b/third_party/aom/test/android/Android.mk
new file mode 100644
index 000000000..74f9d7cba
--- /dev/null
+++ b/third_party/aom/test/android/Android.mk
@@ -0,0 +1,58 @@
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+# This make file builds aom_test app for android.
+# The test app itself runs on the command line through adb shell
+# The paths are really messed up as the libaom make file
+# expects to be made from a parent directory.
+CUR_WD := $(call my-dir)
+BINDINGS_DIR := $(CUR_WD)/../../..
+LOCAL_PATH := $(CUR_WD)/../../..
+
+#libwebm
+include $(CLEAR_VARS)
+include $(BINDINGS_DIR)/libaom/third_party/libwebm/Android.mk
+LOCAL_PATH := $(CUR_WD)/../../..
+
+#libaom
+include $(CLEAR_VARS)
+LOCAL_STATIC_LIBRARIES := libwebm
+include $(BINDINGS_DIR)/libaom/build/make/Android.mk
+LOCAL_PATH := $(CUR_WD)/../..
+
+#libgtest
+include $(CLEAR_VARS)
+LOCAL_ARM_MODE := arm
+LOCAL_CPP_EXTENSION := .cc
+LOCAL_MODULE := gtest
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/googletest/src
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/googletest/include
+LOCAL_SRC_FILES := ./third_party/googletest/src/googletest/src/gtest-all.cc
+include $(BUILD_STATIC_LIBRARY)
+
+#libaom_test
+include $(CLEAR_VARS)
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE := libaom_test
+LOCAL_STATIC_LIBRARIES := gtest libwebm
+
+ifeq ($(ENABLE_SHARED),1)
+  LOCAL_SHARED_LIBRARIES := aom
+else
+  LOCAL_STATIC_LIBRARIES += aom
+endif
+
+include $(LOCAL_PATH)/test/test.mk
+LOCAL_C_INCLUDES := $(BINDINGS_DIR)
+FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBAOM_TEST_SRCS-yes)))
+LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
+# some test files depend on *_rtcd.h, ensure they're generated first.
+$(eval $(call rtcd_dep_template))
+include $(BUILD_EXECUTABLE)
diff --git a/third_party/aom/test/android/README b/third_party/aom/test/android/README
new file mode 100644
index 000000000..35c829738
--- /dev/null
+++ b/third_party/aom/test/android/README
@@ -0,0 +1,32 @@
+Android.mk will build aom unittests on android.
+1) Configure libaom from the parent directory:
+./libaom/configure --target=armv7-android-gcc --enable-external-build \
+  --enable-postproc --disable-install-srcs --enable-multi-res-encoding \
+  --enable-temporal-denoising --disable-unit-tests --disable-install-docs \
+  --disable-examples --disable-runtime-cpu-detect --sdk-path=$NDK
+
+2) From the parent directory, invoke ndk-build:
+NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libaom/test/android/Android.mk \
+  APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release \
+  APP_STL=gnustl_static
+
+Note: Both adb and ndk-build are available prebuilt at:
+  https://chromium.googlesource.com/android_tools
+
+3) Run get_files.py to download the test files:
+python get_files.py -i /path/to/test-data.sha1 -o /path/to/put/files \
+  -u http://downloads.webmproject.org/test_data/libaom
+
+4) Transfer files to device using adb. Ensure you have proper permissions for
+the target
+
+adb push /path/to/test_files /data/local/tmp
+adb push /path/to/built_libs /data/local/tmp
+
+NOTE: Built_libs defaults to parent_dir/libs/armeabi-v7a
+
+5) Run tests:
+adb shell
+(on device)
+cd /data/local/tmp
+LD_LIBRARY_PATH=. ./aom_test
diff --git a/third_party/aom/test/android/get_files.py b/third_party/aom/test/android/get_files.py
new file mode 100644
index 000000000..bdae9a315
--- /dev/null
+++ b/third_party/aom/test/android/get_files.py
@@ -0,0 +1,120 @@
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+# This simple script pulls test files from the webm homepage
+# It is intelligent enough to only pull files if
+#   1) File / test_data folder does not exist
+#   2) SHA mismatch
+
+import pycurl
+import csv
+import hashlib
+import re
+import os.path
+import time
+import itertools
+import sys
+import getopt
+
+#globals
+url = ''
+file_list_path = ''
+local_resource_path = ''
+
+# Helper functions:
+# A simple function which returns the sha hash of a file in hex
+def get_file_sha(filename):
+  try:
+    sha_hash = hashlib.sha1()
+    with open(filename, 'rb') as file:
+      buf = file.read(HASH_CHUNK)
+      while len(buf) > 0:
+        sha_hash.update(buf)
+        buf = file.read(HASH_CHUNK)
+      return sha_hash.hexdigest()
+  except IOError:
+    print "Error reading " + filename
+
+# Downloads a file from a url, and then checks the sha against the passed
+# in sha
+def download_and_check_sha(url, filename, sha):
+  path = os.path.join(local_resource_path, filename)
+  fp = open(path, "wb")
+  curl = pycurl.Curl()
+  curl.setopt(pycurl.URL, url + "/" + filename)
+  curl.setopt(pycurl.WRITEDATA, fp)
+  curl.perform()
+  curl.close()
+  fp.close()
+  return get_file_sha(path) == sha
+
+#constants
+ftp_retries = 3
+
+SHA_COL = 0
+NAME_COL = 1
+EXPECTED_COL = 2
+HASH_CHUNK = 65536
+
+# Main script
+try:
+  opts, args = \
+      getopt.getopt(sys.argv[1:], \
+                    "u:i:o:", ["url=", "input_csv=", "output_dir="])
+except:
+  print 'get_files.py -u <url> -i <input_csv> -o <output_dir>'
+  sys.exit(2)
+
+for opt, arg in opts:
+  if opt == '-u':
+    url = arg
+  elif opt in ("-i", "--input_csv"):
+    file_list_path = os.path.join(arg)
+  elif opt in ("-o", "--output_dir"):
+    local_resource_path = os.path.join(arg)
+
+if len(sys.argv) != 7:
+  print "Expects two paths and a url!"
+  exit(1)
+
+if not os.path.isdir(local_resource_path):
+  os.makedirs(local_resource_path)
+
+file_list_csv = open(file_list_path, "rb")
+
+# Our 'csv' file uses multiple spaces as a delimiter, python's
+# csv class only uses single character delimiters, so we convert them below
+file_list_reader = csv.reader((re.sub(' +', ' ', line) \
+    for line in file_list_csv), delimiter = ' ')
+
+file_shas = []
+file_names = []
+
+for row in file_list_reader:
+  if len(row) != EXPECTED_COL:
+      continue
+  file_shas.append(row[SHA_COL])
+  file_names.append(row[NAME_COL])
+
+file_list_csv.close()
+
+# Download files, only if they don't already exist and have correct shas
+for filename, sha in itertools.izip(file_names, file_shas):
+  path = os.path.join(local_resource_path, filename)
+  if os.path.isfile(path) \
+      and get_file_sha(path) == sha:
+    print path + ' exists, skipping'
+    continue
+  for retry in range(0, ftp_retries):
+    print "Downloading " + path
+    if not download_and_check_sha(url, filename, sha):
+      print "Sha does not match, retrying..."
+    else:
+      break
diff --git a/third_party/aom/test/android/scrape_gtest_log.py b/third_party/aom/test/android/scrape_gtest_log.py
new file mode 100644
index 000000000..e0c929a5d
--- /dev/null
+++ b/third_party/aom/test/android/scrape_gtest_log.py
@@ -0,0 +1,60 @@
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+
+"""Standalone script which parses a gtest log for json.
+
+Json is returned returns as an array.  This script is used by the libaom
+waterfall to gather json results mixed in with gtest logs.  This is
+dubious software engineering.
+"""
+
+import getopt
+import json
+import os
+import re
+import sys
+
+
+def main():
+  if len(sys.argv) != 3:
+    print "Expects a file to write json to!"
+    exit(1)
+
+  try:
+    opts, _ = \
+        getopt.getopt(sys.argv[1:], \
+                      'o:', ['output-json='])
+  except getopt.GetOptError:
+    print 'scrape_gtest_log.py -o <output_json>'
+    sys.exit(2)
+
+  output_json = ''
+  for opt, arg in opts:
+    if opt in ('-o', '--output-json'):
+      output_json = os.path.join(arg)
+
+  blob = sys.stdin.read()
+  json_string = '[' + ','.join('{' + x + '}' for x in
+                               re.findall(r'{([^}]*.?)}', blob)) + ']'
+  print blob
+
+  output = json.dumps(json.loads(json_string), indent=4, sort_keys=True)
+  print output
+
+  path = os.path.dirname(output_json)
+  if path and not os.path.exists(path):
+    os.makedirs(path)
+
+  outfile = open(output_json, 'w')
+  outfile.write(output)
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/third_party/aom/test/ans_codec_test.cc b/third_party/aom/test/ans_codec_test.cc
new file mode 100644
index 000000000..a1b25fbda
--- /dev/null
+++ b/third_party/aom/test/ans_codec_test.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "aom_dsp/ans.h"
+#include "av1/av1_dx_iface.c"
+
+// A note on ANS_MAX_SYMBOLS == 0:
+// Fused gtest doesn't work with EXPECT_FATAL_FAILURE [1]. Just run with a
+// single iteration and don't try to check the window size if we are unwindowed.
+// [1] https://github.com/google/googletest/issues/356
+
+namespace {
+
+const char kTestVideoName[] = "niklas_1280_720_30.y4m";
+const int kTestVideoFrames = 10;
+
+class AnsCodecTest : public ::libaom_test::EncoderTest,
+                     public ::libaom_test::CodecTestWithParam<int> {
+ protected:
+  AnsCodecTest()
+      : EncoderTest(GET_PARAM(0)), ans_window_size_log2_(GET_PARAM(1)) {}
+
+  virtual ~AnsCodecTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(::libaom_test::kOnePassGood);
+    cfg_.g_lag_in_frames = 25;
+    cfg_.rc_end_usage = AOM_CQ;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+#if ANS_MAX_SYMBOLS
+      encoder->Control(AV1E_SET_ANS_WINDOW_SIZE_LOG2, ans_window_size_log2_);
+#endif
+      // Try to push a high symbol count through the codec
+      encoder->Control(AOME_SET_CQ_LEVEL, 8);
+      encoder->Control(AOME_SET_CPUUSED, 2);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 0);
+      encoder->Control(AV1E_SET_TILE_ROWS, 0);
+    }
+  }
+
+  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                                  libaom_test::Decoder *decoder) {
+    aom_codec_ctx_t *const av1_decoder = decoder->GetDecoder();
+#if ANS_MAX_SYMBOLS
+    aom_codec_alg_priv_t *const priv =
+        reinterpret_cast<aom_codec_alg_priv_t *>(av1_decoder->priv);
+    FrameWorkerData *const worker_data =
+        reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1);
+    AV1_COMMON *const common = &worker_data->pbi->common;
+
+    EXPECT_EQ(ans_window_size_log2_, common->ans_window_size_log2);
+#endif
+
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    return AOM_CODEC_OK == res_dec;
+  }
+
+ private:
+  int ans_window_size_log2_;
+};
+
+TEST_P(AnsCodecTest, BitstreamParms) {
+  testing::internal::scoped_ptr<libaom_test::VideoSource> video(
+      new libaom_test::Y4mVideoSource(kTestVideoName, 0, kTestVideoFrames));
+  ASSERT_TRUE(video.get() != NULL);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+}
+
+#if ANS_MAX_SYMBOLS
+AV1_INSTANTIATE_TEST_CASE(AnsCodecTest, ::testing::Range(8, 24));
+#else
+AV1_INSTANTIATE_TEST_CASE(AnsCodecTest, ::testing::Range(0, 1));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/ans_test.cc b/third_party/aom/test/ans_test.cc
new file mode 100644
index 000000000..a553a9e84
--- /dev/null
+++ b/third_party/aom/test/ans_test.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <ctime>
+#include <utility>
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom_dsp/ansreader.h"
+#include "aom_dsp/buf_ans.h"
+
+namespace {
+typedef std::vector<std::pair<uint8_t, bool> > PvVec;
+
+const int kPrintStats = 0;
+// Use a small buffer size to exercise ANS window spills or buffer growth
+const int kBufAnsSize = 1 << 8;
+
+PvVec abs_encode_build_vals(int iters) {
+  PvVec ret;
+  libaom_test::ACMRandom gen(0x30317076);
+  double entropy = 0;
+  for (int i = 0; i < iters; ++i) {
+    uint8_t p;
+    do {
+      p = gen.Rand8();
+    } while (p == 0);  // zero is not a valid coding probability
+    bool b = gen.Rand8() < p;
+    ret.push_back(std::make_pair(static_cast<uint8_t>(p), b));
+    if (kPrintStats) {
+      double d = p / 256.;
+      entropy += -d * log2(d) - (1 - d) * log2(1 - d);
+    }
+  }
+  if (kPrintStats) printf("entropy %f\n", entropy);
+  return ret;
+}
+
+bool check_rabs(const PvVec &pv_vec, uint8_t *buf) {
+  BufAnsCoder a;
+  aom_buf_ans_alloc(&a, NULL, kBufAnsSize);
+  buf_ans_write_init(&a, buf);
+
+  std::clock_t start = std::clock();
+  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+    buf_rabs_write(&a, it->second, 256 - it->first);
+  }
+  aom_buf_ans_flush(&a);
+  std::clock_t enc_time = std::clock() - start;
+  int offset = buf_ans_write_end(&a);
+  aom_buf_ans_free(&a);
+  bool okay = true;
+  AnsDecoder d;
+#if ANS_MAX_SYMBOLS
+  d.window_size = kBufAnsSize;
+#endif
+  if (ans_read_init(&d, buf, offset)) return false;
+  start = std::clock();
+  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+    okay = okay && (rabs_read(&d, 256 - it->first) != 0) == it->second;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  if (!okay) return false;
+  if (kPrintStats)
+    printf("uABS size %d enc_time %f dec_time %f\n", offset,
+           static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+           static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return ans_read_end(&d) != 0;
+}
+
+const aom_cdf_prob spareto65[] = { 8320, 6018, 4402, 3254, 4259,
+                                   3919, 2057, 492,  45,   2 };
+
+const int kRansSymbols =
+    static_cast<int>(sizeof(spareto65) / sizeof(spareto65[0]));
+
+struct rans_sym {
+  aom_cdf_prob prob;
+  aom_cdf_prob cum_prob;  // not-inclusive
+};
+
+std::vector<int> ans_encode_build_vals(rans_sym *const tab, int iters) {
+  aom_cdf_prob sum = 0;
+  for (int i = 0; i < kRansSymbols; ++i) {
+    tab[i].cum_prob = sum;
+    tab[i].prob = spareto65[i];
+    sum += spareto65[i];
+  }
+  std::vector<int> p_to_sym;
+  for (int i = 0; i < kRansSymbols; ++i) {
+    p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
+  }
+  assert(p_to_sym.size() == RANS_PRECISION);
+  std::vector<int> ret;
+  libaom_test::ACMRandom gen(18543637);
+  for (int i = 0; i < iters; ++i) {
+    int sym =
+        p_to_sym[((gen.Rand8() << 8) + gen.Rand8()) & (RANS_PRECISION - 1)];
+    ret.push_back(sym);
+  }
+  return ret;
+}
+
+void rans_build_dec_tab(const struct rans_sym sym_tab[],
+                        aom_cdf_prob *dec_tab) {
+  unsigned int sum = 0;
+  for (int i = 0; sum < RANS_PRECISION; ++i) {
+    dec_tab[i] = sum += sym_tab[i].prob;
+  }
+}
+
+bool check_rans(const std::vector<int> &sym_vec, const rans_sym *const tab,
+                uint8_t *buf) {
+  BufAnsCoder a;
+  aom_buf_ans_alloc(&a, NULL, kBufAnsSize);
+  buf_ans_write_init(&a, buf);
+  aom_cdf_prob dec_tab[kRansSymbols];
+  rans_build_dec_tab(tab, dec_tab);
+
+  std::clock_t start = std::clock();
+  for (std::vector<int>::const_iterator it = sym_vec.begin();
+       it != sym_vec.end(); ++it) {
+    buf_rans_write(&a, tab[*it].cum_prob, tab[*it].prob);
+  }
+  aom_buf_ans_flush(&a);
+  std::clock_t enc_time = std::clock() - start;
+  int offset = buf_ans_write_end(&a);
+  aom_buf_ans_free(&a);
+  bool okay = true;
+  AnsDecoder d;
+#if ANS_MAX_SYMBOLS
+  d.window_size = kBufAnsSize;
+#endif
+  if (ans_read_init(&d, buf, offset)) return false;
+  start = std::clock();
+  for (std::vector<int>::const_iterator it = sym_vec.begin();
+       it != sym_vec.end(); ++it) {
+    okay &= rans_read(&d, dec_tab) == *it;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  if (!okay) return false;
+  if (kPrintStats)
+    printf("rANS size %d enc_time %f dec_time %f\n", offset,
+           static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+           static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return ans_read_end(&d) != 0;
+}
+
+class AbsTestFix : public ::testing::Test {
+ protected:
+  static void SetUpTestCase() { pv_vec_ = abs_encode_build_vals(kNumBools); }
+  virtual void SetUp() { buf_ = new uint8_t[kNumBools / 8]; }
+  virtual void TearDown() { delete[] buf_; }
+  static const int kNumBools = 100000000;
+  static PvVec pv_vec_;
+  uint8_t *buf_;
+};
+PvVec AbsTestFix::pv_vec_;
+
+class AnsTestFix : public ::testing::Test {
+ protected:
+  static void SetUpTestCase() {
+    sym_vec_ = ans_encode_build_vals(rans_sym_tab_, kNumSyms);
+  }
+  virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; }
+  virtual void TearDown() { delete[] buf_; }
+  static const int kNumSyms = 25000000;
+  static std::vector<int> sym_vec_;
+  static rans_sym rans_sym_tab_[kRansSymbols];
+  uint8_t *buf_;
+};
+std::vector<int> AnsTestFix::sym_vec_;
+rans_sym AnsTestFix::rans_sym_tab_[kRansSymbols];
+
+TEST_F(AbsTestFix, Rabs) { EXPECT_TRUE(check_rabs(pv_vec_, buf_)); }
+TEST_F(AnsTestFix, Rans) {
+  EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab_, buf_));
+}
+TEST(AnsTest, FinalStateSerialization) {
+  for (unsigned i = L_BASE; i < L_BASE * IO_BASE; ++i) {
+    uint8_t buf[8];
+    AnsCoder c;
+    ans_write_init(&c, buf);
+    c.state = i;
+    const int written_size = ans_write_end(&c);
+    ASSERT_LT(static_cast<size_t>(written_size), sizeof(buf));
+    AnsDecoder d;
+#if ANS_MAX_SYMBOLS
+    // There is no real data window here because no symbols are sent through
+    // ans (only synthetic states), so use a dummy value
+    d.window_size = 1024;
+#endif
+    const int read_init_status = ans_read_init(&d, buf, written_size);
+    EXPECT_EQ(read_init_status, 0);
+    EXPECT_EQ(d.state, i);
+  }
+}
+}  // namespace
diff --git a/third_party/aom/test/aomcx_set_ref.sh b/third_party/aom/test/aomcx_set_ref.sh
new file mode 100755
index 000000000..f51b73c58
--- /dev/null
+++ b/third_party/aom/test/aomcx_set_ref.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom aom_cx_set_ref example. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to aom_cx_set_ref_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+aom_cx_set_ref_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs aom_cx_set_ref and updates the reference frame before encoding frame 90.
+# $1 is the codec name, which aom_cx_set_ref does not support at present: It's
+# currently used only to name the output file.
+# TODO(tomfinegan): Pass the codec param once the example is updated to support
+# AV1.
+aom_set_ref() {
+  local encoder="${LIBAOM_BIN_PATH}/aom_cx_set_ref${AOM_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/aom_cx_set_ref_${codec}.ivf"
+  local ref_frame_num=4
+  local limit=10
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
+      "${ref_frame_num}" "${limit}" ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+aom_cx_set_ref_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    aom_set_ref av1 || return 1
+  fi
+}
+
+aom_cx_set_ref_tests="aom_cx_set_ref_av1"
+
+run_tests aom_cx_set_ref_verify_environment "${aom_cx_set_ref_tests}"
+
diff --git a/third_party/aom/test/aomdec.sh b/third_party/aom/test/aomdec.sh
new file mode 100755
index 000000000..28901ed1b
--- /dev/null
+++ b/third_party/aom/test/aomdec.sh
@@ -0,0 +1,124 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests aomdec. To add new tests to this file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to aomdec_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available.
+aomdec_verify_environment() {
+  if [ "$(av1_encode_available)" != "yes" ] ; then
+    if [ ! -e "${AV1_WEBM_FILE}" ] || \
+      [ ! -e "${AV1_FPM_WEBM_FILE}" ] || \
+      [ ! -e "${AV1_LT_50_FRAMES_WEBM_FILE}" ] ; then
+      elog "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+      return 1
+    fi
+  fi
+  if [ -z "$(aom_tool_path aomdec)" ]; then
+    elog "aomdec not found. It must exist in LIBAOM_BIN_PATH or its parent."
+    return 1
+  fi
+}
+
+# Wrapper function for running aomdec with pipe input. Requires that
+# LIBAOM_BIN_PATH points to the directory containing aomdec. $1 is used as the
+# input file path and shifted away. All remaining parameters are passed through
+# to aomdec.
+aomdec_pipe() {
+  local readonly input="$1"
+  shift
+  if [ ! -e "${input}" ]; then
+    local file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
+    encode_yuv_raw_input_av1 "${file}" --ivf
+  else
+    local file="${input}"
+  fi
+  cat "${file}" | aomdec - "$@" ${devnull}
+}
+
+
+# Wrapper function for running aomdec. Requires that LIBAOM_BIN_PATH points to
+# the directory containing aomdec. $1 one is used as the input file path and
+# shifted away. All remaining parameters are passed through to aomdec.
+aomdec() {
+  local readonly decoder="$(aom_tool_path aomdec)"
+  local readonly input="$1"
+  shift
+  eval "${AOM_TEST_PREFIX}" "${decoder}" "$input" "$@" ${devnull}
+}
+
+aomdec_can_decode_av1() {
+  if [ "$(av1_decode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+aomdec_aom_ivf_pipe_input() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    aomdec_pipe "${AOM_IVF_FILE}" --summary --noblit
+  fi
+}
+
+aomdec_av1_webm() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    if [ ! -e "${AV1_WEBM_FILE}" ]; then
+      local file="${AOM_TEST_OUTPUT_DIR}/test_encode.webm"
+      encode_yuv_raw_input_av1 "${file}"
+    else
+      aomdec "${AV1_WEBM_FILE}" --summary --noblit
+    fi
+  fi
+}
+
+aomdec_av1_webm_frame_parallel() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local file
+    if [ ! -e "${AV1_WEBM_FILE}" ]; then
+      file="${AOM_TEST_OUTPUT_DIR}/test_encode.webm"
+      encode_yuv_raw_input_av1 "${file}" "--ivf --error-resilient=1 "
+    else
+      file="${AV1_FPM_WEBM_FILE}"
+    fi
+    for threads in 2 3 4 5 6 7 8; do
+      aomdec "${file}" --summary --noblit --threads=$threads \
+        --frame-parallel
+    done
+  fi
+}
+
+# TODO(vigneshv): Enable or remove this test and associated code.
+DISABLED_aomdec_av1_webm_less_than_50_frames() {
+  # ensure that reaching eof in webm_guess_framerate doesn't result in invalid
+  # frames in actual webm_read_frame calls.
+  if [ "$(aomdec_can_decode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly decoder="$(aom_tool_path aomdec)"
+    local readonly expected=10
+    local readonly num_frames=$(${AOM_TEST_PREFIX} "${decoder}" \
+      "${AV1_LT_50_FRAMES_WEBM_FILE}" --summary --noblit 2>&1 \
+      | awk '/^[0-9]+ decoded frames/ { print $1 }')
+    if [ "$num_frames" -ne "$expected" ]; then
+      elog "Output frames ($num_frames) != expected ($expected)"
+      return 1
+    fi
+  fi
+}
+
+aomdec_tests="aomdec_av1_webm
+              aomdec_av1_webm_frame_parallel
+              aomdec_aom_ivf_pipe_input
+              DISABLED_aomdec_av1_webm_less_than_50_frames"
+
+run_tests aomdec_verify_environment "${aomdec_tests}"
diff --git a/third_party/aom/test/aomenc.sh b/third_party/aom/test/aomenc.sh
new file mode 100755
index 000000000..57a4c28a5
--- /dev/null
+++ b/third_party/aom/test/aomenc.sh
@@ -0,0 +1,241 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests aomenc using hantro_collage_w352h288.yuv as input. To add
+## new tests to this file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to aomenc_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+readonly TEST_FRAMES=5
+
+# Environment check: Make sure input is available.
+aomenc_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then
+      elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in"
+      elog "LIBAOM_TEST_DATA_PATH."
+      return 1
+    fi
+  fi
+  if [ -z "$(aom_tool_path aomenc)" ]; then
+    elog "aomenc not found. It must exist in LIBAOM_BIN_PATH or its parent."
+    return 1
+  fi
+}
+
+aomenc_can_encode_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+aomenc_can_encode_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+# Utilities that echo aomenc input file parameters.
+y4m_input_non_square_par() {
+  echo ""${Y4M_NOSQ_PAR_INPUT}""
+}
+
+y4m_input_720p() {
+  echo ""${Y4M_720P_INPUT}""
+}
+
+# Echo default aomenc real time encoding params. $1 is the codec, which defaults
+# to av1 if unspecified.
+aomenc_rt_params() {
+  local readonly codec="${1:-av1}"
+  echo "--codec=${codec}
+    --buf-initial-sz=500
+    --buf-optimal-sz=600
+    --buf-sz=1000
+    --cpu-used=-6
+    --end-usage=cbr
+    --error-resilient=1
+    --kf-max-dist=90000
+    --lag-in-frames=0
+    --max-intra-rate=300
+    --max-q=56
+    --min-q=2
+    --noise-sensitivity=0
+    --overshoot-pct=50
+    --passes=1
+    --profile=0
+    --resize-allowed=0
+    --rt
+    --static-thresh=0
+    --undershoot-pct=50"
+}
+
+# Wrapper function for running aomenc with pipe input. Requires that
+# LIBAOM_BIN_PATH points to the directory containing aomenc. $1 is used as the
+# input file path and shifted away. All remaining parameters are passed through
+# to aomenc.
+aomenc_pipe() {
+  local readonly encoder="$(aom_tool_path aomenc)"
+  local readonly input="$1"
+  shift
+  cat "${input}" | eval "${AOM_TEST_PREFIX}" "${encoder}" - \
+    --test-decode=fatal \
+    "$@" ${devnull}
+}
+
+# Wrapper function for running aomenc. Requires that LIBAOM_BIN_PATH points to
+# the directory containing aomenc. $1 one is used as the input file path and
+# shifted away. All remaining parameters are passed through to aomenc.
+aomenc() {
+  local readonly encoder="$(aom_tool_path aomenc)"
+  local readonly input="$1"
+  shift
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${input}" \
+    --test-decode=fatal \
+    "$@" ${devnull}
+}
+
+aomenc_av1_ivf() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.ivf"
+    aomenc $(yuv_raw_input) \
+      --codec=av1 \
+      --limit="${TEST_FRAMES}" \
+      --ivf \
+      --output="${output}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_webm() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.webm"
+    aomenc $(yuv_raw_input) \
+      --codec=av1 \
+      --limit="${TEST_FRAMES}" \
+      --output="${output}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_webm_2pass() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.webm"
+    aomenc $(yuv_raw_input) \
+      --codec=av1 \
+      --limit="${TEST_FRAMES}" \
+      --output="${output}" \
+      --passes=2
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_ivf_lossless() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lossless.ivf"
+    aomenc $(yuv_raw_input) \
+      --codec=av1 \
+      --limit="${TEST_FRAMES}" \
+      --ivf \
+      --output="${output}" \
+      --lossless=1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_ivf_minq0_maxq0() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lossless_minq0_maxq0.ivf"
+    aomenc $(yuv_raw_input) \
+      --codec=av1 \
+      --limit="${TEST_FRAMES}" \
+      --ivf \
+      --output="${output}" \
+      --min-q=0 \
+      --max-q=0
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_webm_lag5_frames10() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly lag_total_frames=10
+    local readonly lag_frames=5
+    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lag5_frames10.webm"
+    aomenc $(yuv_raw_input) \
+      --codec=av1 \
+      --limit="${lag_total_frames}" \
+      --lag-in-frames="${lag_frames}" \
+      --output="${output}" \
+      --passes=2 \
+      --auto-alt-ref=1
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+# TODO(fgalligan): Test that DisplayWidth is different than video width.
+aomenc_av1_webm_non_square_par() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_non_square_par.webm"
+    aomenc $(y4m_input_non_square_par) \
+      --codec=av1 \
+      --limit="${TEST_FRAMES}" \
+      --output="${output}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_tests="aomenc_av1_ivf
+              aomenc_av1_webm
+              aomenc_av1_webm_2pass
+              aomenc_av1_ivf_lossless
+              aomenc_av1_ivf_minq0_maxq0
+              aomenc_av1_webm_lag5_frames10
+              aomenc_av1_webm_non_square_par"
+
+run_tests aomenc_verify_environment "${aomenc_tests}"
diff --git a/third_party/aom/test/aq_segment_test.cc b/third_party/aom/test/aq_segment_test.cc
new file mode 100644
index 000000000..5dc93ec79
--- /dev/null
+++ b/third_party/aom/test/aq_segment_test.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "./aom_config.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class AqSegmentTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  AqSegmentTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~AqSegmentTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+    aq_mode_ = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
+#if CONFIG_EXT_DELTA_Q
+      encoder->Control(AV1E_SET_DELTAQ_MODE, deltaq_mode_);
+#endif
+      encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100);
+    }
+  }
+
+  void DoTest(int aq_mode) {
+    aq_mode_ = aq_mode;
+#if CONFIG_EXT_DELTA_Q
+    deltaq_mode_ = 0;
+#endif
+    cfg_.kf_max_dist = 12;
+    cfg_.rc_min_quantizer = 8;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 6;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_target_bitrate = 300;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 15);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  int set_cpu_used_;
+  int aq_mode_;
+#if CONFIG_EXT_DELTA_Q
+  int deltaq_mode_;
+#endif
+};
+
+// Validate that this AQ segmentation mode (AQ=1, variance_ap)
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchAQ1) { DoTest(1); }
+
+// Validate that this AQ segmentation mode (AQ=2, complexity_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchAQ2) { DoTest(2); }
+
+// Validate that this AQ segmentation mode (AQ=3, cyclic_refresh_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchAQ3) { DoTest(3); }
+
+class AqSegmentTestLarge : public AqSegmentTest {};
+
+TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ1) { DoTest(1); }
+
+TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ2) { DoTest(2); }
+
+TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ3) { DoTest(3); }
+
+#if CONFIG_DELTA_Q & !CONFIG_EXT_DELTA_Q
+// Validate that this AQ mode (AQ=4, delta q)
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchAQ4) {
+  cfg_.rc_end_usage = AOM_CQ;
+  aq_mode_ = 4;
+
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+#endif
+
+#if CONFIG_EXT_DELTA_Q
+// Validate that this delta q mode
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchExtDeltaQ) {
+  cfg_.rc_end_usage = AOM_CQ;
+  aq_mode_ = 0;
+  deltaq_mode_ = 2;
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+#endif
+
+AV1_INSTANTIATE_TEST_CASE(AqSegmentTest,
+                          ::testing::Values(::libaom_test::kRealTime,
+                                            ::libaom_test::kOnePassGood),
+                          ::testing::Range(5, 9));
+AV1_INSTANTIATE_TEST_CASE(AqSegmentTestLarge,
+                          ::testing::Values(::libaom_test::kRealTime,
+                                            ::libaom_test::kOnePassGood),
+                          ::testing::Range(3, 5));
+}  // namespace
diff --git a/third_party/aom/test/arf_freq_test.cc b/third_party/aom/test/arf_freq_test.cc
new file mode 100644
index 000000000..bef58b3e8
--- /dev/null
+++ b/third_party/aom/test/arf_freq_test.cc
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "av1/encoder/ratectrl.h"
+
+namespace {
+
+const unsigned int kFrames = 100;
+const int kBitrate = 500;
+
+#define ARF_NOT_SEEN 1000001
+#define ARF_SEEN_ONCE 1000000
+
+typedef struct {
+  const char *filename;
+  unsigned int width;
+  unsigned int height;
+  unsigned int framerate_num;
+  unsigned int framerate_den;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+typedef struct {
+  libaom_test::TestMode mode;
+  int cpu_used;
+} TestEncodeParam;
+
+const TestVideoParam kTestVectors[] = {
+  // artificially increase framerate to trigger default check
+  { "hantro_collage_w352h288.yuv", 352, 288, 5000, 1, 8, AOM_IMG_FMT_I420,
+    AOM_BITS_8, 0 },
+  { "hantro_collage_w352h288.yuv", 352, 288, 30, 1, 8, AOM_IMG_FMT_I420,
+    AOM_BITS_8, 0 },
+  { "rush_hour_444.y4m", 352, 288, 30, 1, 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
+#if CONFIG_HIGHBITDEPTH
+// Add list of profile 2/3 test videos here ...
+#endif  // CONFIG_HIGHBITDEPTH
+};
+
+const TestEncodeParam kEncodeVectors[] = {
+  { ::libaom_test::kOnePassGood, 2 }, { ::libaom_test::kOnePassGood, 5 },
+  { ::libaom_test::kTwoPassGood, 1 }, { ::libaom_test::kTwoPassGood, 2 },
+  { ::libaom_test::kTwoPassGood, 5 }, { ::libaom_test::kRealTime, 5 },
+};
+
+const int kMinArfVectors[] = {
+  // NOTE: 0 refers to the default built-in logic in:
+  //       av1_rc_get_default_min_gf_interval(...)
+  0, 4, 8, 12, 15
+};
+
+int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename)
+    return 0;
+  else
+    return !strcmp(dot, ".y4m");
+}
+
+class ArfFreqTestLarge
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith3Params<TestVideoParam,
+                                                 TestEncodeParam, int> {
+ protected:
+  ArfFreqTestLarge()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
+        test_encode_param_(GET_PARAM(2)), min_arf_requested_(GET_PARAM(3)) {}
+
+  virtual ~ArfFreqTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(test_encode_param_.mode);
+    if (test_encode_param_.mode != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 25;
+      cfg_.rc_end_usage = AOM_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = AOM_CBR;
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+    dec_cfg_.threads = 4;
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    min_run_ = ARF_NOT_SEEN;
+    run_of_visible_frames_ = 0;
+  }
+
+  int GetNumFramesInPkt(const aom_codec_cx_pkt_t *pkt) {
+    const uint8_t *buffer = reinterpret_cast<uint8_t *>(pkt->data.frame.buf);
+    const uint8_t marker = buffer[pkt->data.frame.sz - 1];
+    const int mag = ((marker >> 3) & 3) + 1;
+    int frames = (marker & 0x7) + 1;
+    const unsigned int index_sz = 2 + mag * frames;
+    // Check for superframe or not.
+    // Assume superframe has only one visible frame, the rest being
+    // invisible. If superframe index is not found, then there is only
+    // one frame.
+    if (!((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz &&
+          buffer[pkt->data.frame.sz - index_sz] == marker)) {
+      frames = 1;
+    }
+    return frames;
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return;
+    const int frames = GetNumFramesInPkt(pkt);
+    if (frames == 1) {
+      run_of_visible_frames_++;
+    } else if (frames == 2) {
+      if (min_run_ == ARF_NOT_SEEN) {
+        min_run_ = ARF_SEEN_ONCE;
+      } else if (min_run_ == ARF_SEEN_ONCE ||
+                 run_of_visible_frames_ < min_run_) {
+        min_run_ = run_of_visible_frames_;
+      }
+      run_of_visible_frames_ = 1;
+    } else {
+      min_run_ = 0;
+      run_of_visible_frames_ = 1;
+    }
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+      encoder->Control(AOME_SET_CPUUSED, test_encode_param_.cpu_used);
+      encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_arf_requested_);
+      if (test_encode_param_.mode != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+  }
+
+  int GetMinVisibleRun() const { return min_run_; }
+
+  int GetMinArfDistanceRequested() const {
+    if (min_arf_requested_)
+      return min_arf_requested_;
+    else
+      return av1_rc_get_default_min_gf_interval(
+          test_video_param_.width, test_video_param_.height,
+          (double)test_video_param_.framerate_num /
+              test_video_param_.framerate_den);
+  }
+
+  TestVideoParam test_video_param_;
+  TestEncodeParam test_encode_param_;
+
+ private:
+  int min_arf_requested_;
+  int min_run_;
+  int run_of_visible_frames_;
+};
+
+TEST_P(ArfFreqTestLarge, MinArfFreqTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+  testing::internal::scoped_ptr<libaom_test::VideoSource> video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                kFrames));
+  } else {
+    video.reset(new libaom_test::YUVVideoSource(
+        test_video_param_.filename, test_video_param_.fmt,
+        test_video_param_.width, test_video_param_.height,
+        test_video_param_.framerate_num, test_video_param_.framerate_den, 0,
+        kFrames));
+  }
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  const int min_run = GetMinVisibleRun();
+  const int min_arf_dist_requested = GetMinArfDistanceRequested();
+  if (min_run != ARF_NOT_SEEN && min_run != ARF_SEEN_ONCE) {
+    const int min_arf_dist = min_run + 1;
+    EXPECT_GE(min_arf_dist, min_arf_dist_requested);
+  }
+}
+
+#if CONFIG_HIGHBITDEPTH || CONFIG_EXT_REFS
+#if CONFIG_AV1_ENCODER
+// TODO(angiebird): 25-29 fail in high bitdepth mode.
+// TODO(zoeliu): This ArfFreqTest does not work with BWDREF_FRAME, as
+// BWDREF_FRAME is also a non-show frame, and the minimum run between two
+// consecutive BWDREF_FRAME's may vary between 1 and any arbitrary positive
+// number as long as it does not exceed the gf_group interval.
+INSTANTIATE_TEST_CASE_P(
+    DISABLED_AV1, ArfFreqTestLarge,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)),
+        ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors),
+        ::testing::ValuesIn(kMinArfVectors)));
+#endif  // CONFIG_AV1_ENCODER
+#else
+AV1_INSTANTIATE_TEST_CASE(ArfFreqTestLarge, ::testing::ValuesIn(kTestVectors),
+                          ::testing::ValuesIn(kEncodeVectors),
+                          ::testing::ValuesIn(kMinArfVectors));
+#endif  // CONFIG_HIGHBITDEPTH || CONFIG_EXT_REFS
+}  // namespace
diff --git a/third_party/aom/test/av1_convolve_optimz_test.cc b/third_party/aom/test/av1_convolve_optimz_test.cc
new file mode 100644
index 000000000..fd0f6dbce
--- /dev/null
+++ b/third_party/aom/test/av1_convolve_optimz_test.cc
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+
+using std::tr1::tuple;
+using libaom_test::ACMRandom;
+
+typedef void (*ConvInit)();
+typedef void (*conv_filter_t)(const uint8_t *, int, uint8_t *, int, int, int,
+                              const InterpFilterParams, int, int,
+                              ConvolveParams *);
+#if CONFIG_HIGHBITDEPTH
+typedef void (*hbd_conv_filter_t)(const uint16_t *, int, uint16_t *, int, int,
+                                  int, const InterpFilterParams, int, int, int,
+                                  int);
+#endif
+
+// Test parameter list:
+//  <convolve_horiz_func, convolve_vert_func,
+//  <width, height>, filter_params, subpel_x_q4, avg>
+typedef tuple<int, int> BlockDimension;
+typedef tuple<ConvInit, conv_filter_t, conv_filter_t, BlockDimension,
+              InterpFilter, int, int>
+    ConvParams;
+#if CONFIG_HIGHBITDEPTH
+// Test parameter list:
+//  <convolve_horiz_func, convolve_vert_func,
+//  <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth>
+typedef tuple<ConvInit, hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension,
+              InterpFilter, int, int, int>
+    HbdConvParams;
+#endif
+
+// Note:
+//  src_ and src_ref_ have special boundary requirement
+//  dst_ and dst_ref_ don't
+const size_t maxWidth = 256;
+const size_t maxHeight = 256;
+const size_t maxBlockSize = maxWidth * maxHeight;
+const int horizOffset = 32;
+const int vertiOffset = 32;
+const int stride = 128;
+const int x_step_q4 = 16;
+
+class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
+ public:
+  virtual ~AV1ConvolveOptimzTest() {}
+  virtual void SetUp() {
+    ConvInit conv_init = GET_PARAM(0);
+    conv_init();
+    conv_horiz_ = GET_PARAM(1);
+    conv_vert_ = GET_PARAM(2);
+    BlockDimension block = GET_PARAM(3);
+    width_ = std::tr1::get<0>(block);
+    height_ = std::tr1::get<1>(block);
+    filter_ = GET_PARAM(4);
+    subpel_ = GET_PARAM(5);
+    int ref = GET_PARAM(6);
+    const int plane = 0;
+    conv_params_ = get_conv_params(ref, plane);
+
+    alloc_ = new uint8_t[maxBlockSize * 4];
+    src_ = alloc_ + (vertiOffset * maxWidth);
+    src_ += horizOffset;
+    src_ref_ = src_ + maxBlockSize;
+
+    dst_ = alloc_ + 2 * maxBlockSize;
+    dst_ref_ = alloc_ + 3 * maxBlockSize;
+  }
+
+  virtual void TearDown() {
+    delete[] alloc_;
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunHorizFilterBitExactCheck();
+  void RunVertFilterBitExactCheck();
+
+ private:
+  void PrepFilterBuffer();
+  void DiffFilterBuffer();
+  conv_filter_t conv_horiz_;
+  conv_filter_t conv_vert_;
+  uint8_t *alloc_;
+  uint8_t *src_;
+  uint8_t *dst_;
+  uint8_t *src_ref_;
+  uint8_t *dst_ref_;
+  int width_;
+  int height_;
+  InterpFilter filter_;
+  int subpel_;
+  ConvolveParams conv_params_;
+};
+
+void AV1ConvolveOptimzTest::PrepFilterBuffer() {
+  int r, c;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
+
+  uint8_t *src_ptr = src_;
+  uint8_t *dst_ptr = dst_;
+  uint8_t *src_ref_ptr = src_ref_;
+  uint8_t *dst_ref_ptr = dst_ref_;
+
+  for (r = 0; r < height_; ++r) {
+    for (c = 0; c < width_; ++c) {
+      src_ptr[c] = rnd.Rand8();
+      src_ref_ptr[c] = src_ptr[c];
+      dst_ptr[c] = rnd.Rand8();
+      dst_ref_ptr[c] = dst_ptr[c];
+    }
+    src_ptr += stride;
+    src_ref_ptr += stride;
+    dst_ptr += stride;
+    dst_ref_ptr += stride;
+  }
+}
+
+void AV1ConvolveOptimzTest::DiffFilterBuffer() {
+  int r, c;
+  const uint8_t *dst_ptr = dst_;
+  const uint8_t *dst_ref_ptr = dst_ref_;
+  for (r = 0; r < height_; ++r) {
+    for (c = 0; c < width_; ++c) {
+      EXPECT_EQ((uint8_t)dst_ref_ptr[c], (uint8_t)dst_ptr[c])
+          << "Error at row: " << r << " col: " << c << " "
+          << "w = " << width_ << " "
+          << "h = " << height_ << " "
+          << "filter group index = " << filter_ << " "
+          << "filter index = " << subpel_;
+    }
+    dst_ptr += stride;
+    dst_ref_ptr += stride;
+  }
+}
+
+void AV1ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
+  PrepFilterBuffer();
+
+  InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
+
+  av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_, height_,
+                       filter_params, subpel_, x_step_q4, &conv_params_);
+
+  conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params,
+              subpel_, x_step_q4, &conv_params_);
+
+  DiffFilterBuffer();
+
+  // Note:
+  // Here we need calculate a height which is different from the specified one
+  // and test again.
+  int intermediate_height =
+      (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
+  PrepFilterBuffer();
+
+  av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_,
+                       intermediate_height, filter_params, subpel_, x_step_q4,
+                       &conv_params_);
+
+  conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
+              filter_params, subpel_, x_step_q4, &conv_params_);
+
+  DiffFilterBuffer();
+}
+
+void AV1ConvolveOptimzTest::RunVertFilterBitExactCheck() {
+  PrepFilterBuffer();
+
+  InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
+
+  av1_convolve_vert_c(src_ref_, stride, dst_ref_, stride, width_, height_,
+                      filter_params, subpel_, x_step_q4, &conv_params_);
+
+  conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params,
+             subpel_, x_step_q4, &conv_params_);
+
+  DiffFilterBuffer();
+}
+
+TEST_P(AV1ConvolveOptimzTest, HorizBitExactCheck) {
+  RunHorizFilterBitExactCheck();
+}
+TEST_P(AV1ConvolveOptimzTest, VerticalBitExactCheck) {
+  RunVertFilterBitExactCheck();
+}
+
+using std::tr1::make_tuple;
+
+#if (HAVE_SSSE3 || HAVE_SSE4_1) && CONFIG_DUAL_FILTER
+const BlockDimension kBlockDim[] = {
+  make_tuple(2, 2),    make_tuple(2, 4),    make_tuple(4, 4),
+  make_tuple(4, 8),    make_tuple(8, 4),    make_tuple(8, 8),
+  make_tuple(8, 16),   make_tuple(16, 8),   make_tuple(16, 16),
+  make_tuple(16, 32),  make_tuple(32, 16),  make_tuple(32, 32),
+  make_tuple(32, 64),  make_tuple(64, 32),  make_tuple(64, 64),
+  make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
+};
+
+// 10/12-tap filters
+const InterpFilter kFilter[] = { FILTER_REGULAR_UV, BILINEAR, MULTITAP_SHARP };
+
+const int kSubpelQ4[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+
+const int kAvg[] = { 0, 1 };
+#endif
+
+#if HAVE_SSSE3 && CONFIG_DUAL_FILTER
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AV1ConvolveOptimzTest,
+    ::testing::Combine(::testing::Values(av1_lowbd_convolve_init_ssse3),
+                       ::testing::Values(av1_convolve_horiz_ssse3),
+                       ::testing::Values(av1_convolve_vert_ssse3),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kFilter),
+                       ::testing::ValuesIn(kSubpelQ4),
+                       ::testing::ValuesIn(kAvg)));
+#endif  // HAVE_SSSE3 && CONFIG_DUAL_FILTER
+
+#if CONFIG_HIGHBITDEPTH
+typedef ::testing::TestWithParam<HbdConvParams> TestWithHbdConvParams;
+class AV1HbdConvolveOptimzTest : public TestWithHbdConvParams {
+ public:
+  virtual ~AV1HbdConvolveOptimzTest() {}
+  virtual void SetUp() {
+    ConvInit conv_init = GET_PARAM(0);
+    conv_init();
+    conv_horiz_ = GET_PARAM(1);
+    conv_vert_ = GET_PARAM(2);
+    BlockDimension block = GET_PARAM(3);
+    width_ = std::tr1::get<0>(block);
+    height_ = std::tr1::get<1>(block);
+    filter_ = GET_PARAM(4);
+    subpel_ = GET_PARAM(5);
+    avg_ = GET_PARAM(6);
+    bit_depth_ = GET_PARAM(7);
+
+    alloc_ = new uint16_t[maxBlockSize * 4];
+    src_ = alloc_ + (vertiOffset * maxWidth);
+    src_ += horizOffset;
+    src_ref_ = src_ + maxBlockSize;
+
+    dst_ = alloc_ + 2 * maxBlockSize;
+    dst_ref_ = alloc_ + 3 * maxBlockSize;
+  }
+
+  virtual void TearDown() {
+    delete[] alloc_;
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunHorizFilterBitExactCheck();
+  void RunVertFilterBitExactCheck();
+
+ private:
+  void PrepFilterBuffer();
+  void DiffFilterBuffer();
+  hbd_conv_filter_t conv_horiz_;
+  hbd_conv_filter_t conv_vert_;
+  uint16_t *alloc_;
+  uint16_t *src_;
+  uint16_t *dst_;
+  uint16_t *src_ref_;
+  uint16_t *dst_ref_;
+  int width_;
+  int height_;
+  InterpFilter filter_;
+  int subpel_;
+  int avg_;
+  int bit_depth_;
+};
+
+void AV1HbdConvolveOptimzTest::PrepFilterBuffer() {
+  int r, c;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
+
+  uint16_t *src_ptr = src_;
+  uint16_t *dst_ptr = dst_;
+  uint16_t *dst_ref_ptr = dst_ref_;
+  uint16_t hbd_mask = (1 << bit_depth_) - 1;
+
+  for (r = 0; r < height_; ++r) {
+    for (c = 0; c < width_; ++c) {
+      src_ptr[c] = rnd.Rand16() & hbd_mask;
+      dst_ptr[c] = rnd.Rand16() & hbd_mask;
+      dst_ref_ptr[c] = dst_ptr[c];
+    }
+    src_ptr += stride;
+    dst_ptr += stride;
+    dst_ref_ptr += stride;
+  }
+}
+
+void AV1HbdConvolveOptimzTest::DiffFilterBuffer() {
+  int r, c;
+  const uint16_t *dst_ptr = dst_;
+  const uint16_t *dst_ref_ptr = dst_ref_;
+  for (r = 0; r < height_; ++r) {
+    for (c = 0; c < width_; ++c) {
+      EXPECT_EQ((uint16_t)dst_ref_ptr[c], (uint16_t)dst_ptr[c])
+          << "Error at row: " << r << " col: " << c << " "
+          << "w = " << width_ << " "
+          << "h = " << height_ << " "
+          << "filter group index = " << filter_ << " "
+          << "filter index = " << subpel_ << " "
+          << "bit depth = " << bit_depth_;
+    }
+    dst_ptr += stride;
+    dst_ref_ptr += stride;
+  }
+}
+
+void AV1HbdConvolveOptimzTest::RunHorizFilterBitExactCheck() {
+  PrepFilterBuffer();
+
+  InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
+
+  av1_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_, height_,
+                              filter_params, subpel_, x_step_q4, avg_,
+                              bit_depth_);
+
+  conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params,
+              subpel_, x_step_q4, avg_, bit_depth_);
+
+  DiffFilterBuffer();
+
+  // Note:
+  // Here we need calculate a height which is different from the specified one
+  // and test again.
+  int intermediate_height =
+      (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
+  PrepFilterBuffer();
+
+  av1_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_,
+                              intermediate_height, filter_params, subpel_,
+                              x_step_q4, avg_, bit_depth_);
+
+  conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
+              filter_params, subpel_, x_step_q4, avg_, bit_depth_);
+
+  DiffFilterBuffer();
+}
+
+void AV1HbdConvolveOptimzTest::RunVertFilterBitExactCheck() {
+  PrepFilterBuffer();
+
+  InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
+
+  av1_highbd_convolve_vert_c(src_, stride, dst_ref_, stride, width_, height_,
+                             filter_params, subpel_, x_step_q4, avg_,
+                             bit_depth_);
+
+  conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params,
+             subpel_, x_step_q4, avg_, bit_depth_);
+
+  DiffFilterBuffer();
+}
+
+TEST_P(AV1HbdConvolveOptimzTest, HorizBitExactCheck) {
+  RunHorizFilterBitExactCheck();
+}
+TEST_P(AV1HbdConvolveOptimzTest, VertBitExactCheck) {
+  RunVertFilterBitExactCheck();
+}
+
+#if HAVE_SSE4_1 && CONFIG_DUAL_FILTER
+
+const int kBitdepth[] = { 10, 12 };
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AV1HbdConvolveOptimzTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_init_sse4_1),
+                       ::testing::Values(av1_highbd_convolve_horiz_sse4_1),
+                       ::testing::Values(av1_highbd_convolve_vert_sse4_1),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kFilter),
+                       ::testing::ValuesIn(kSubpelQ4),
+                       ::testing::ValuesIn(kAvg),
+                       ::testing::ValuesIn(kBitdepth)));
+#endif  // HAVE_SSE4_1 && CONFIG_DUAL_FILTER
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/av1_convolve_test.cc b/third_party/aom/test/av1_convolve_test.cc
new file mode 100644
index 000000000..02ac8e7bb
--- /dev/null
+++ b/third_party/aom/test/av1_convolve_test.cc
@@ -0,0 +1,522 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <algorithm>
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_ports/mem.h"
+#include "av1/common/filter.h"
+#include "av1/common/convolve.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+using std::tr1::tuple;
+static void filter_block1d_horiz_c(const uint8_t *src_ptr, int src_stride,
+                                   const int16_t *filter, int tap,
+                                   uint8_t *dst_ptr, int dst_stride, int w,
+                                   int h) {
+  src_ptr -= tap / 2 - 1;
+  for (int r = 0; r < h; ++r) {
+    for (int c = 0; c < w; ++c) {
+      int sum = 0;
+      for (int i = 0; i < tap; ++i) {
+        sum += src_ptr[c + i] * filter[i];
+      }
+      dst_ptr[c] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+    }
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+  }
+}
+
+static void filter_block1d_vert_c(const uint8_t *src_ptr, int src_stride,
+                                  const int16_t *filter, int tap,
+                                  uint8_t *dst_ptr, int dst_stride, int w,
+                                  int h) {
+  src_ptr -= (tap / 2 - 1) * src_stride;
+  for (int r = 0; r < h; ++r) {
+    for (int c = 0; c < w; ++c) {
+      int sum = 0;
+      for (int i = 0; i < tap; ++i) {
+        sum += src_ptr[c + i * src_stride] * filter[i];
+      }
+      dst_ptr[c] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+    }
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+  }
+}
+
+static int match(const uint8_t *out, int out_stride, const uint8_t *ref_out,
+                 int ref_out_stride, int w, int h) {
+  for (int r = 0; r < h; ++r) {
+    for (int c = 0; c < w; ++c) {
+      if (out[r * out_stride + c] != ref_out[r * ref_out_stride + c]) return 0;
+    }
+  }
+  return 1;
+}
+
+typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
+                             int dst_stride, int w, int h,
+                             const InterpFilterParams filter_params,
+                             const int subpel_q4, int step_q4,
+                             ConvolveParams *conv_params);
+
+struct ConvolveFunctions {
+  ConvolveFunctions(ConvolveFunc hf, ConvolveFunc vf) : hf_(hf), vf_(vf) {}
+  ConvolveFunc hf_;
+  ConvolveFunc vf_;
+};
+
+typedef tuple<ConvolveFunctions *, InterpFilter /*filter_x*/,
+              InterpFilter /*filter_y*/>
+    ConvolveParam;
+
+class Av1ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
+ public:
+  virtual void SetUp() {
+    rnd_(ACMRandom::DeterministicSeed());
+    cfs_ = GET_PARAM(0);
+    interp_filter_ls_[0] = GET_PARAM(2);
+    interp_filter_ls_[2] = interp_filter_ls_[0];
+    interp_filter_ls_[1] = GET_PARAM(1);
+    interp_filter_ls_[3] = interp_filter_ls_[1];
+  }
+  virtual void TearDown() {
+    while (buf_ls_.size() > 0) {
+      uint8_t *buf = buf_ls_.back();
+      aom_free(buf);
+      buf_ls_.pop_back();
+    }
+  }
+  virtual uint8_t *add_input(int w, int h, int *stride) {
+    uint8_t *buf =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, kBufferSize));
+    buf_ls_.push_back(buf);
+    *stride = w + MAX_FILTER_TAP - 1;
+    int offset = MAX_FILTER_TAP / 2 - 1;
+    for (int r = 0; r < h + MAX_FILTER_TAP - 1; ++r) {
+      for (int c = 0; c < w + MAX_FILTER_TAP - 1; ++c) {
+        buf[r * (*stride) + c] = rnd_.Rand8();
+      }
+    }
+    return buf + offset * (*stride) + offset;
+  }
+  virtual uint8_t *add_output(int w, int /*h*/, int *stride) {
+    uint8_t *buf =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, kBufferSize));
+    buf_ls_.push_back(buf);
+    *stride = w;
+    return buf;
+  }
+  virtual void random_init_buf(uint8_t *buf, int w, int h, int stride) {
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        buf[r * stride + c] = rnd_.Rand8();
+      }
+    }
+  }
+
+ protected:
+  static const int kDataAlignment = 16;
+  static const int kOuterBlockSize = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
+  static const int kBufferSize = kOuterBlockSize * kOuterBlockSize;
+  std::vector<uint8_t *> buf_ls_;
+  InterpFilter interp_filter_ls_[4];
+  ConvolveFunctions *cfs_;
+  ACMRandom rnd_;
+};
+
+int bsize_ls[] = { 1, 2, 4, 8, 16, 32, 64, 3, 7, 15, 31, 63 };
+int bsize_num = sizeof(bsize_ls) / sizeof(bsize_ls[0]);
+
+TEST_P(Av1ConvolveTest, av1_convolve_vert) {
+  const int y_step_q4 = 16;
+  ConvolveParams conv_params = get_conv_params(0, 0);
+
+  int in_stride, out_stride, ref_out_stride, avg_out_stride, ref_avg_out_stride;
+  uint8_t *in = add_input(MAX_SB_SIZE, MAX_SB_SIZE, &in_stride);
+  uint8_t *out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &out_stride);
+  uint8_t *ref_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_out_stride);
+  uint8_t *avg_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &avg_out_stride);
+  uint8_t *ref_avg_out =
+      add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_avg_out_stride);
+  for (int hb_idx = 0; hb_idx < bsize_num; ++hb_idx) {
+    for (int vb_idx = 0; vb_idx < bsize_num; ++vb_idx) {
+      int w = bsize_ls[hb_idx];
+      int h = bsize_ls[vb_idx];
+      for (int subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; ++subpel_y_q4) {
+        InterpFilter filter_y = interp_filter_ls_[0];
+        InterpFilterParams param_vert = av1_get_interp_filter_params(filter_y);
+        const int16_t *filter_vert =
+            av1_get_interp_filter_subpel_kernel(param_vert, subpel_y_q4);
+
+        filter_block1d_vert_c(in, in_stride, filter_vert, param_vert.taps,
+                              ref_out, ref_out_stride, w, h);
+
+        conv_params.ref = 0;
+        cfs_->vf_(in, in_stride, out, out_stride, w, h, param_vert, subpel_y_q4,
+                  y_step_q4, &conv_params);
+        EXPECT_EQ(match(out, out_stride, ref_out, ref_out_stride, w, h), 1)
+            << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_y "
+            << filter_y << " subpel_y_q4 " << subpel_y_q4;
+
+        random_init_buf(avg_out, w, h, avg_out_stride);
+        for (int r = 0; r < h; ++r) {
+          for (int c = 0; c < w; ++c) {
+            ref_avg_out[r * ref_avg_out_stride + c] = ROUND_POWER_OF_TWO(
+                avg_out[r * avg_out_stride + c] + out[r * out_stride + c], 1);
+          }
+        }
+        conv_params.ref = 1;
+        cfs_->vf_(in, in_stride, avg_out, avg_out_stride, w, h, param_vert,
+                  subpel_y_q4, y_step_q4, &conv_params);
+        EXPECT_EQ(match(avg_out, avg_out_stride, ref_avg_out,
+                        ref_avg_out_stride, w, h),
+                  1)
+            << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_y "
+            << filter_y << " subpel_y_q4 " << subpel_y_q4;
+      }
+    }
+  }
+};
+
+TEST_P(Av1ConvolveTest, av1_convolve_horiz) {
+  const int x_step_q4 = 16;
+  ConvolveParams conv_params = get_conv_params(0, 0);
+
+  int in_stride, out_stride, ref_out_stride, avg_out_stride, ref_avg_out_stride;
+  uint8_t *in = add_input(MAX_SB_SIZE, MAX_SB_SIZE, &in_stride);
+  uint8_t *out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &out_stride);
+  uint8_t *ref_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_out_stride);
+  uint8_t *avg_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &avg_out_stride);
+  uint8_t *ref_avg_out =
+      add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_avg_out_stride);
+  for (int hb_idx = 0; hb_idx < bsize_num; ++hb_idx) {
+    for (int vb_idx = 0; vb_idx < bsize_num; ++vb_idx) {
+      int w = bsize_ls[hb_idx];
+      int h = bsize_ls[vb_idx];
+      for (int subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; ++subpel_x_q4) {
+        InterpFilter filter_x = interp_filter_ls_[1];
+        InterpFilterParams param_horiz = av1_get_interp_filter_params(filter_x);
+        const int16_t *filter_horiz =
+            av1_get_interp_filter_subpel_kernel(param_horiz, subpel_x_q4);
+
+        filter_block1d_horiz_c(in, in_stride, filter_horiz, param_horiz.taps,
+                               ref_out, ref_out_stride, w, h);
+
+        conv_params.ref = 0;
+        cfs_->hf_(in, in_stride, out, out_stride, w, h, param_horiz,
+                  subpel_x_q4, x_step_q4, &conv_params);
+        EXPECT_EQ(match(out, out_stride, ref_out, ref_out_stride, w, h), 1)
+            << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_x "
+            << filter_x << " subpel_x_q4 " << subpel_x_q4;
+
+        random_init_buf(avg_out, w, h, avg_out_stride);
+        for (int r = 0; r < h; ++r) {
+          for (int c = 0; c < w; ++c) {
+            ref_avg_out[r * ref_avg_out_stride + c] = ROUND_POWER_OF_TWO(
+                avg_out[r * avg_out_stride + c] + out[r * out_stride + c], 1);
+          }
+        }
+        conv_params.ref = 1;
+        cfs_->hf_(in, in_stride, avg_out, avg_out_stride, w, h, param_horiz,
+                  subpel_x_q4, x_step_q4, &conv_params);
+        EXPECT_EQ(match(avg_out, avg_out_stride, ref_avg_out,
+                        ref_avg_out_stride, w, h),
+                  1)
+            << "hb_idx " << hb_idx << "vb_idx" << vb_idx << " filter_x "
+            << filter_x << "subpel_x_q4 " << subpel_x_q4;
+      }
+    }
+  }
+};
+
+ConvolveFunctions convolve_functions_c(av1_convolve_horiz_c,
+                                       av1_convolve_vert_c);
+
+InterpFilter filter_ls[] = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH,
+                             MULTITAP_SHARP };
+
+INSTANTIATE_TEST_CASE_P(
+    C, Av1ConvolveTest,
+    ::testing::Combine(::testing::Values(&convolve_functions_c),
+                       ::testing::ValuesIn(filter_ls),
+                       ::testing::ValuesIn(filter_ls)));
+
+#if CONFIG_HIGHBITDEPTH
+TEST(AV1ConvolveTest, av1_highbd_convolve) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+#if CONFIG_DUAL_FILTER
+  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
+                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
+  InterpFilterParams filter_params =
+      av1_get_interp_filter_params(interp_filter[0]);
+#else
+  InterpFilter interp_filter = EIGHTTAP_REGULAR;
+  InterpFilterParams filter_params =
+      av1_get_interp_filter_params(interp_filter);
+#endif
+  int filter_size = filter_params.taps;
+  int filter_center = filter_size / 2 - 1;
+  uint16_t src[12 * 12];
+  int src_stride = filter_size;
+  uint16_t dst[1] = { 0 };
+  int dst_stride = 1;
+  int x_step_q4 = 16;
+  int y_step_q4 = 16;
+  int avg = 0;
+  int bd = 10;
+  int w = 1;
+  int h = 1;
+
+  int subpel_x_q4;
+  int subpel_y_q4;
+
+  for (int i = 0; i < filter_size * filter_size; i++) {
+    src[i] = rnd.Rand16() % (1 << bd);
+  }
+
+  for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
+      av1_highbd_convolve(
+          CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
+          src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filter,
+          subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+
+      const int16_t *x_filter =
+          av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
+      const int16_t *y_filter =
+          av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
+
+      int temp[12];
+      int dst_ref = 0;
+      for (int r = 0; r < filter_size; r++) {
+        temp[r] = 0;
+        for (int c = 0; c < filter_size; c++) {
+          temp[r] += x_filter[c] * src[r * filter_size + c];
+        }
+        temp[r] =
+            clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
+        dst_ref += temp[r] * y_filter[r];
+      }
+      dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
+      EXPECT_EQ(dst[0], dst_ref);
+    }
+  }
+}
+
+TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+#if CONFIG_DUAL_FILTER
+  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
+                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
+  InterpFilterParams filter_params =
+      av1_get_interp_filter_params(interp_filter[0]);
+#else
+  InterpFilter interp_filter = EIGHTTAP_REGULAR;
+  InterpFilterParams filter_params =
+      av1_get_interp_filter_params(interp_filter);
+#endif
+  int filter_size = filter_params.taps;
+  int filter_center = filter_size / 2 - 1;
+  uint16_t src0[12 * 12];
+  uint16_t src1[12 * 12];
+  int src_stride = filter_size;
+  uint16_t dst0[1] = { 0 };
+  uint16_t dst1[1] = { 0 };
+  uint16_t dst[1] = { 0 };
+  int dst_stride = 1;
+  int x_step_q4 = 16;
+  int y_step_q4 = 16;
+  int avg = 0;
+  int bd = 10;
+
+  int w = 1;
+  int h = 1;
+
+  int subpel_x_q4;
+  int subpel_y_q4;
+
+  for (int i = 0; i < filter_size * filter_size; i++) {
+    src0[i] = rnd.Rand16() % (1 << bd);
+    src1[i] = rnd.Rand16() % (1 << bd);
+  }
+
+  for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
+      int offset = filter_size * filter_center + filter_center;
+
+      avg = 0;
+      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+                          CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
+                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          y_step_q4, avg, bd);
+      avg = 0;
+      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+                          CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
+                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          y_step_q4, avg, bd);
+
+      avg = 0;
+      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+                          CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
+                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          y_step_q4, avg, bd);
+      avg = 1;
+      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+                          CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
+                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          y_step_q4, avg, bd);
+
+      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+    }
+  }
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+#define CONVOLVE_SPEED_TEST 0
+#if CONVOLVE_SPEED_TEST
+#define highbd_convolve_speed(func, block_size, frame_size)                  \
+  TEST(AV1ConvolveTest, func##_speed_##block_size##_##frame_size) {          \
+    ACMRandom rnd(ACMRandom::DeterministicSeed());                           \
+    InterpFilter interp_filter = EIGHTTAP;                                   \
+    InterpFilterParams filter_params =                                       \
+        av1_get_interp_filter_params(interp_filter);                         \
+    int filter_size = filter_params.tap;                                     \
+    int filter_center = filter_size / 2 - 1;                                 \
+    DECLARE_ALIGNED(16, uint16_t,                                            \
+                    src[(frame_size + 7) * (frame_size + 7)]) = { 0 };       \
+    int src_stride = frame_size + 7;                                         \
+    DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = { 0 };     \
+    int dst_stride = frame_size;                                             \
+    int x_step_q4 = 16;                                                      \
+    int y_step_q4 = 16;                                                      \
+    int subpel_x_q4 = 8;                                                     \
+    int subpel_y_q4 = 6;                                                     \
+    int bd = 10;                                                             \
+                                                                             \
+    int w = block_size;                                                      \
+    int h = block_size;                                                      \
+                                                                             \
+    const int16_t *filter_x =                                                \
+        av1_get_interp_filter_kernel(filter_params, subpel_x_q4);            \
+    const int16_t *filter_y =                                                \
+        av1_get_interp_filter_kernel(filter_params, subpel_y_q4);            \
+                                                                             \
+    for (int i = 0; i < src_stride * src_stride; i++) {                      \
+      src[i] = rnd.Rand16() % (1 << bd);                                     \
+    }                                                                        \
+                                                                             \
+    int offset = filter_center * src_stride + filter_center;                 \
+    int row_offset = 0;                                                      \
+    int col_offset = 0;                                                      \
+    for (int i = 0; i < 100000; i++) {                                       \
+      int src_total_offset = offset + col_offset * src_stride + row_offset;  \
+      int dst_total_offset = col_offset * dst_stride + row_offset;           \
+      func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride,           \
+           CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \
+           x_step_q4, filter_y, y_step_q4, w, h, bd);                        \
+      if (offset + w + w < frame_size) {                                     \
+        row_offset += w;                                                     \
+      } else {                                                               \
+        row_offset = 0;                                                      \
+        col_offset += h;                                                     \
+      }                                                                      \
+      if (col_offset + h >= frame_size) {                                    \
+        col_offset = 0;                                                      \
+      }                                                                      \
+    }                                                                        \
+  }
+
+#define lowbd_convolve_speed(func, block_size, frame_size)                  \
+  TEST(AV1ConvolveTest, func##_speed_l_##block_size##_##frame_size) {       \
+    ACMRandom rnd(ACMRandom::DeterministicSeed());                          \
+    InterpFilter interp_filter = EIGHTTAP;                                  \
+    InterpFilterParams filter_params =                                      \
+        av1_get_interp_filter_params(interp_filter);                        \
+    int filter_size = filter_params.tap;                                    \
+    int filter_center = filter_size / 2 - 1;                                \
+    DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \
+    int src_stride = frame_size + 7;                                        \
+    DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]);             \
+    int dst_stride = frame_size;                                            \
+    int x_step_q4 = 16;                                                     \
+    int y_step_q4 = 16;                                                     \
+    int subpel_x_q4 = 8;                                                    \
+    int subpel_y_q4 = 6;                                                    \
+    int bd = 8;                                                             \
+                                                                            \
+    int w = block_size;                                                     \
+    int h = block_size;                                                     \
+                                                                            \
+    const int16_t *filter_x =                                               \
+        av1_get_interp_filter_kernel(filter_params, subpel_x_q4);           \
+    const int16_t *filter_y =                                               \
+        av1_get_interp_filter_kernel(filter_params, subpel_y_q4);           \
+                                                                            \
+    for (int i = 0; i < src_stride * src_stride; i++) {                     \
+      src[i] = rnd.Rand16() % (1 << bd);                                    \
+    }                                                                       \
+                                                                            \
+    int offset = filter_center * src_stride + filter_center;                \
+    int row_offset = 0;                                                     \
+    int col_offset = 0;                                                     \
+    for (int i = 0; i < 100000; i++) {                                      \
+      func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4,  \
+           filter_y, y_step_q4, w, h);                                      \
+      if (offset + w + w < frame_size) {                                    \
+        row_offset += w;                                                    \
+      } else {                                                              \
+        row_offset = 0;                                                     \
+        col_offset += h;                                                    \
+      }                                                                     \
+      if (col_offset + h >= frame_size) {                                   \
+        col_offset = 0;                                                     \
+      }                                                                     \
+    }                                                                       \
+  }
+
+// This experiment shows that when frame size is 64x64
+// aom_highbd_convolve8_sse2 and aom_convolve8_sse2's speed are similar.
+// However when frame size becomes 1024x1024
+// aom_highbd_convolve8_sse2 is around 50% slower than aom_convolve8_sse2
+// we think the bottleneck is from memory IO
+highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 64);
+highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 64);
+highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 64);
+highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 64);
+
+lowbd_convolve_speed(aom_convolve8_sse2, 8, 64);
+lowbd_convolve_speed(aom_convolve8_sse2, 16, 64);
+lowbd_convolve_speed(aom_convolve8_sse2, 32, 64);
+lowbd_convolve_speed(aom_convolve8_sse2, 64, 64);
+
+highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 1024);
+highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 1024);
+highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 1024);
+highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 1024);
+
+lowbd_convolve_speed(aom_convolve8_sse2, 8, 1024);
+lowbd_convolve_speed(aom_convolve8_sse2, 16, 1024);
+lowbd_convolve_speed(aom_convolve8_sse2, 32, 1024);
+lowbd_convolve_speed(aom_convolve8_sse2, 64, 1024);
+#endif  // CONVOLVE_SPEED_TEST
+}  // namespace
diff --git a/third_party/aom/test/av1_dct_test.cc b/third_party/aom/test/av1_dct_test.cc
new file mode 100644
index 000000000..691cc8b79
--- /dev/null
+++ b/third_party/aom/test/av1_dct_test.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <new>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "./aom_config.h"
+#include "aom_ports/msvc.h"
+
+#undef CONFIG_COEFFICIENT_RANGE_CHECKING
+#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
+#define AV1_DCT_GTEST
+#include "av1/encoder/dct.c"
+
+using libaom_test::ACMRandom;
+
+namespace {
+void reference_dct_1d(const double *in, double *out, int size) {
+  const double kInvSqrt2 = 0.707106781186547524400844362104;
+  for (int k = 0; k < size; ++k) {
+    out[k] = 0;
+    for (int n = 0; n < size; ++n) {
+      out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size));
+    }
+    if (k == 0) out[k] = out[k] * kInvSqrt2;
+  }
+}
+
+typedef void (*FdctFuncRef)(const double *in, double *out, int size);
+typedef void (*IdctFuncRef)(const double *in, double *out, int size);
+typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out);
+typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
+
+class TransTestBase {
+ public:
+  virtual ~TransTestBase() {}
+
+ protected:
+  void RunFwdAccuracyCheck() {
+    tran_low_t *input = new tran_low_t[txfm_size_];
+    tran_low_t *output = new tran_low_t[txfm_size_];
+    double *ref_input = new double[txfm_size_];
+    double *ref_output = new double[txfm_size_];
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+    for (int ti = 0; ti < count_test_block; ++ti) {
+      for (int ni = 0; ni < txfm_size_; ++ni) {
+        input[ni] = rnd.Rand8() - rnd.Rand8();
+        ref_input[ni] = static_cast<double>(input[ni]);
+      }
+
+      fwd_txfm_(input, output);
+      fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
+
+      for (int ni = 0; ni < txfm_size_; ++ni) {
+        EXPECT_LE(
+            abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
+            max_error_);
+      }
+    }
+
+    delete[] input;
+    delete[] output;
+    delete[] ref_input;
+    delete[] ref_output;
+  }
+
+  double max_error_;
+  int txfm_size_;
+  FdctFunc fwd_txfm_;
+  FdctFuncRef fwd_txfm_ref_;
+};
+
+typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam;
+class AV1FwdTxfm : public TransTestBase,
+                   public ::testing::TestWithParam<FdctParam> {
+ public:
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    fwd_txfm_ref_ = GET_PARAM(1);
+    txfm_size_ = GET_PARAM(2);
+    max_error_ = GET_PARAM(3);
+  }
+  virtual void TearDown() {}
+};
+
+TEST_P(AV1FwdTxfm, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); }
+
+INSTANTIATE_TEST_CASE_P(
+    C, AV1FwdTxfm,
+    ::testing::Values(FdctParam(&fdct4, &reference_dct_1d, 4, 1),
+                      FdctParam(&fdct8, &reference_dct_1d, 8, 1),
+                      FdctParam(&fdct16, &reference_dct_1d, 16, 2),
+                      FdctParam(&fdct32, &reference_dct_1d, 32, 3)));
+}  // namespace
diff --git a/third_party/aom/test/av1_ext_tile_test.cc b/third_party/aom/test/av1_ext_tile_test.cc
new file mode 100644
index 000000000..f96447965
--- /dev/null
+++ b/third_party/aom/test/av1_ext_tile_test.cc
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+
+namespace {
+// The number of frames to be encoded/decoded
+const int kLimit = 8;
+// Skip 1 frame to check the frame decoding independency.
+const int kSkip = 5;
+const int kTileSize = 1;
+const int kTIleSizeInPixels = (kTileSize << 6);
+// Fake width and height so that they can be multiples of the tile size.
+const int kImgWidth = 704;
+const int kImgHeight = 576;
+
+// This test tests "tile_encoding_mode = TILE_VR" case. The TILE_NORMAL case is
+// tested by the tile_independence test.
+class AV1ExtTileTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  AV1ExtTileTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        set_cpu_used_(GET_PARAM(2)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = kImgWidth;
+    cfg.h = kImgHeight;
+
+    decoder_ = codec_->CreateDecoder(cfg, 0);
+    decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+    decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
+
+    // Allocate buffer to store tile image.
+    aom_img_alloc(&tile_img_, AOM_IMG_FMT_I420, kImgWidth, kImgHeight, 32);
+
+    md5_.clear();
+    tile_md5_.clear();
+  }
+
+  virtual ~AV1ExtTileTest() {
+    aom_img_free(&tile_img_);
+    delete decoder_;
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 0;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.g_error_resilient = 1;
+
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      // Encode setting
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+
+      // The tile size is 64x64.
+      encoder->Control(AV1E_SET_TILE_COLUMNS, kTileSize);
+      encoder->Control(AV1E_SET_TILE_ROWS, kTileSize);
+      encoder->Control(AV1E_SET_TILE_ENCODING_MODE, 1);  // TILE_VR
+#if CONFIG_EXT_PARTITION
+      // Always use 64x64 max partition.
+      encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_64X64);
+#endif
+    }
+
+    if (video->frame() == 1) {
+      frame_flags_ =
+          AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF;
+    }
+  }
+
+  virtual void DecompressedFrameHook(const aom_image_t &img,
+                                     aom_codec_pts_t pts) {
+    // Skip 1 already decoded frame to be consistent with the decoder in this
+    // test.
+    if (pts == (aom_codec_pts_t)kSkip) return;
+
+    // Calculate MD5 as the reference.
+    ::libaom_test::MD5 md5_res;
+    md5_res.Add(&img);
+    md5_.push_back(md5_res.Get());
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    // Skip decoding 1 frame.
+    if (pkt->data.frame.pts == (aom_codec_pts_t)kSkip) return;
+
+    bool IsLastFrame = (pkt->data.frame.pts == (aom_codec_pts_t)(kLimit - 1));
+
+    // Decode the first (kLimit - 1) frames as whole frame, and decode the last
+    // frame in single tiles.
+    for (int r = 0; r < kImgHeight / kTIleSizeInPixels; ++r) {
+      for (int c = 0; c < kImgWidth / kTIleSizeInPixels; ++c) {
+        if (!IsLastFrame) {
+          decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+          decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
+        } else {
+          decoder_->Control(AV1_SET_DECODE_TILE_ROW, r);
+          decoder_->Control(AV1_SET_DECODE_TILE_COL, c);
+        }
+
+        const aom_codec_err_t res = decoder_->DecodeFrame(
+            reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
+            pkt->data.frame.sz);
+        if (res != AOM_CODEC_OK) {
+          abort_ = true;
+          ASSERT_EQ(AOM_CODEC_OK, res);
+        }
+        const aom_image_t *img = decoder_->GetDxData().Next();
+
+        if (!IsLastFrame) {
+          if (img) {
+            ::libaom_test::MD5 md5_res;
+            md5_res.Add(img);
+            tile_md5_.push_back(md5_res.Get());
+          }
+          break;
+        }
+
+        const int kMaxMBPlane = 3;
+        for (int plane = 0; plane < kMaxMBPlane; ++plane) {
+          const int shift = (plane == 0) ? 0 : 1;
+          int tile_height = kTIleSizeInPixels >> shift;
+          int tile_width = kTIleSizeInPixels >> shift;
+
+          for (int tr = 0; tr < tile_height; ++tr) {
+            memcpy(tile_img_.planes[plane] +
+                       tile_img_.stride[plane] * (r * tile_height + tr) +
+                       c * tile_width,
+                   img->planes[plane] + img->stride[plane] * tr, tile_width);
+          }
+        }
+      }
+
+      if (!IsLastFrame) break;
+    }
+
+    if (IsLastFrame) {
+      ::libaom_test::MD5 md5_res;
+      md5_res.Add(&tile_img_);
+      tile_md5_.push_back(md5_res.Get());
+    }
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  int set_cpu_used_;
+  ::libaom_test::Decoder *decoder_;
+  aom_image_t tile_img_;
+  std::vector<std::string> md5_;
+  std::vector<std::string> tile_md5_;
+};
+
+TEST_P(AV1ExtTileTest, DecoderResultTest) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", kImgWidth,
+                                       kImgHeight, 30, 1, 0, kLimit);
+  cfg_.rc_target_bitrate = 500;
+  cfg_.g_error_resilient = AOM_ERROR_RESILIENT_DEFAULT;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.g_threads = 1;
+
+  // Tile encoding
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Compare to check if two vectors are equal.
+  ASSERT_EQ(md5_, tile_md5_);
+}
+
+AV1_INSTANTIATE_TEST_CASE(
+    // Now only test 2-pass mode.
+    AV1ExtTileTest, ::testing::Values(::libaom_test::kTwoPassGood),
+    ::testing::Range(0, 4));
+}  // namespace
diff --git a/third_party/aom/test/av1_fht16x16_test.cc b/third_party/aom/test/av1_fht16x16_test.cc
new file mode 100644
index 000000000..e1032ef24
--- /dev/null
+++ b/third_party/aom/test/av1_fht16x16_test.cc
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x16Param;
+
+void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht16x16_c(in, out, stride, tx_type);
+}
+
+void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
+                  int tx_type) {
+  av1_iht16x16_256_add_c(in, dest, stride, tx_type);
+}
+
+#if CONFIG_HIGHBITDEPTH
+typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                           int tx_type, int bd);
+typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
+                          int tx_type, int bd);
+
+// Target optimized function, tx_type, bit depth
+typedef tuple<HbdHtFunc, int, int> HighbdHt16x16Param;
+
+void highbd_fht16x16_ref(const int16_t *in, int32_t *out, int stride,
+                         int tx_type, int bd) {
+  av1_fwd_txfm2d_16x16_c(in, out, stride, tx_type, bd);
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+class AV1Trans16x16HT : public libaom_test::TransformTestBase,
+                        public ::testing::TestWithParam<Ht16x16Param> {
+ public:
+  virtual ~AV1Trans16x16HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 16;
+    height_ = 16;
+    fwd_txfm_ref = fht16x16_ref;
+    inv_txfm_ref = iht16x16_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans16x16HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); }
+TEST_P(AV1Trans16x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+TEST_P(AV1Trans16x16HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans16x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+
+#if CONFIG_HIGHBITDEPTH
+class AV1HighbdTrans16x16HT
+    : public ::testing::TestWithParam<HighbdHt16x16Param> {
+ public:
+  virtual ~AV1HighbdTrans16x16HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    fwd_txfm_ref_ = highbd_fht16x16_ref;
+    tx_type_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = 256;
+
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(32, sizeof(int16_t) * num_coeffs_));
+    output_ = reinterpret_cast<int32_t *>(
+        aom_memalign(32, sizeof(int32_t) * num_coeffs_));
+    output_ref_ = reinterpret_cast<int32_t *>(
+        aom_memalign(32, sizeof(int32_t) * num_coeffs_));
+  }
+
+  virtual void TearDown() {
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(output_ref_);
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunBitexactCheck();
+
+ private:
+  HbdHtFunc fwd_txfm_;
+  HbdHtFunc fwd_txfm_ref_;
+  int tx_type_;
+  int bit_depth_;
+  int mask_;
+  int num_coeffs_;
+  int16_t *input_;
+  int32_t *output_;
+  int32_t *output_ref_;
+};
+
+void AV1HighbdTrans16x16HT::RunBitexactCheck() {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int i, j;
+  const int stride = 16;
+  const int num_tests = 1000;
+
+  for (i = 0; i < num_tests; ++i) {
+    for (j = 0; j < num_coeffs_; ++j) {
+      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+    }
+
+    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(
+        fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_));
+
+    for (j = 0; j < num_coeffs_; ++j) {
+      EXPECT_EQ(output_ref_[j], output_[j])
+          << "Not bit-exact result at index: " << j << " at test block: " << i;
+    }
+  }
+}
+
+TEST_P(AV1HighbdTrans16x16HT, HighbdCoeffCheck) { RunBitexactCheck(); }
+#endif  // CONFIG_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+const Ht16x16Param kArrayHt16x16Param_sse2[] = {
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 0, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 1, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 2, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 3, AOM_BITS_8,
+             256),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 4, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 5, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 6, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 7, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 8, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 9, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 10, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 11, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 12, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 13, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 14, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 15, AOM_BITS_8,
+             256)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x16HT,
+                        ::testing::ValuesIn(kArrayHt16x16Param_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+const Ht16x16Param kArrayHt16x16Param_avx2[] = {
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 0, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 1, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 2, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 3, AOM_BITS_8,
+             256),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 4, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 5, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 6, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 7, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 8, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 9, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 10, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 11, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 12, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 13, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 14, AOM_BITS_8,
+             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 15, AOM_BITS_8,
+             256)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans16x16HT,
+                        ::testing::ValuesIn(kArrayHt16x16Param_avx2));
+#endif  // HAVE_AVX2
+
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+const HighbdHt16x16Param kArrayHBDHt16x16Param_sse4_1[] = {
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 0, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 0, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 1, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 1, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 2, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 2, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 3, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 3, 12),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 4, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 4, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 5, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 5, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 6, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 6, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 7, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 7, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 8, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 8, 12),
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans16x16HT,
+                        ::testing::ValuesIn(kArrayHBDHt16x16Param_sse4_1));
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fht16x32_test.cc b/third_party/aom/test/av1_fht16x32_test.cc
new file mode 100644
index 000000000..43d025327
--- /dev/null
+++ b/third_party/aom/test/av1_fht16x32_test.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "./av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x32Param;
+
+void fht16x32_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht16x32_c(in, out, stride, tx_type);
+}
+
+void iht16x32_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_iht16x32_512_add_c(in, out, stride, tx_type);
+}
+
+class AV1Trans16x32HT : public libaom_test::TransformTestBase,
+                        public ::testing::TestWithParam<Ht16x32Param> {
+ public:
+  virtual ~AV1Trans16x32HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 16;
+    height_ = 32;
+    fwd_txfm_ref = fht16x32_ref;
+    inv_txfm_ref = iht16x32_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); }
+TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
+
+using std::tr1::make_tuple;
+const Ht16x32Param kArrayHt16x32Param_c[] = {
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 0, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 1, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 2, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 3, AOM_BITS_8, 512),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 4, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 5, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 6, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 7, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 8, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 9, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 10, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 11, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 12, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 13, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 14, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 15, AOM_BITS_8, 512)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(C, AV1Trans16x32HT,
+                        ::testing::ValuesIn(kArrayHt16x32Param_c));
+
+#if HAVE_SSE2
+const Ht16x32Param kArrayHt16x32Param_sse2[] = {
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 0, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 1, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 2, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 3, AOM_BITS_8,
+             512),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 4, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 5, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 6, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 7, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 8, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 9, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 10, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 11, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 12, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 13, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 14, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 15, AOM_BITS_8,
+             512)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x32HT,
+                        ::testing::ValuesIn(kArrayHt16x32Param_sse2));
+#endif  // HAVE_SSE2
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fht16x8_test.cc b/third_party/aom/test/av1_fht16x8_test.cc
new file mode 100644
index 000000000..d99bec5eb
--- /dev/null
+++ b/third_party/aom/test/av1_fht16x8_test.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "./av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x8Param;
+
+void fht16x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht16x8_c(in, out, stride, tx_type);
+}
+
+void iht16x8_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_iht16x8_128_add_c(in, out, stride, tx_type);
+}
+
+class AV1Trans16x8HT : public libaom_test::TransformTestBase,
+                       public ::testing::TestWithParam<Ht16x8Param> {
+ public:
+  virtual ~AV1Trans16x8HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 16;
+    height_ = 8;
+    inv_txfm_ref = iht16x8_ref;
+    fwd_txfm_ref = fht16x8_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); }
+TEST_P(AV1Trans16x8HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans16x8HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans16x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+
+using std::tr1::make_tuple;
+
+const Ht16x8Param kArrayHt16x8Param_c[] = {
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 0, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 1, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 2, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 3, AOM_BITS_8, 128),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 4, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 5, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 6, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 7, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 8, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 9, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 10, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 11, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 12, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 13, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 14, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 15, AOM_BITS_8, 128)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(C, AV1Trans16x8HT,
+                        ::testing::ValuesIn(kArrayHt16x8Param_c));
+
+#if HAVE_SSE2
+const Ht16x8Param kArrayHt16x8Param_sse2[] = {
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 0, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 1, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 2, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 3, AOM_BITS_8, 128),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 4, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 5, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 6, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 7, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 8, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 9, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 10, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 11, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 12, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 13, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 14, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 15, AOM_BITS_8, 128)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x8HT,
+                        ::testing::ValuesIn(kArrayHt16x8Param_sse2));
+#endif  // HAVE_SSE2
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fht32x16_test.cc b/third_party/aom/test/av1_fht32x16_test.cc
new file mode 100644
index 000000000..e38283f86
--- /dev/null
+++ b/third_party/aom/test/av1_fht32x16_test.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "./av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht32x16Param;
+
+void fht32x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht32x16_c(in, out, stride, tx_type);
+}
+
+void iht32x16_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_iht32x16_512_add_c(in, out, stride, tx_type);
+}
+
+class AV1Trans32x16HT : public libaom_test::TransformTestBase,
+                        public ::testing::TestWithParam<Ht32x16Param> {
+ public:
+  virtual ~AV1Trans32x16HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 32;
+    height_ = 16;
+    fwd_txfm_ref = fht32x16_ref;
+    inv_txfm_ref = iht32x16_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans32x16HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); }
+TEST_P(AV1Trans32x16HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans32x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
+
+using std::tr1::make_tuple;
+const Ht32x16Param kArrayHt32x16Param_c[] = {
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 0, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 1, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 2, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 3, AOM_BITS_8, 512),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 4, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 5, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 6, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 7, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 8, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 9, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 10, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 11, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 12, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 13, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 14, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 15, AOM_BITS_8, 512)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(C, AV1Trans32x16HT,
+                        ::testing::ValuesIn(kArrayHt32x16Param_c));
+
+#if HAVE_SSE2
+const Ht32x16Param kArrayHt32x16Param_sse2[] = {
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 0, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 1, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 2, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 3, AOM_BITS_8,
+             512),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 4, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 5, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 6, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 7, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 8, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 9, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 10, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 11, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 12, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 13, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 14, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 15, AOM_BITS_8,
+             512)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x16HT,
+                        ::testing::ValuesIn(kArrayHt32x16Param_sse2));
+#endif  // HAVE_SSE2
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fht4x4_test.cc b/third_party/aom/test/av1_fht4x4_test.cc
new file mode 100644
index 000000000..42837d3a4
--- /dev/null
+++ b/third_party/aom/test/av1_fht4x4_test.cc
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x4Param;
+
+void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht4x4_c(in, out, stride, tx_type);
+}
+
+void iht4x4_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_iht4x4_16_add_c(in, out, stride, tx_type);
+}
+
+#if CONFIG_HIGHBITDEPTH
+typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                              int tx_type, int bd);
+typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride,
+                           int tx_type, int bd);
+
+// HighbdHt4x4Param argument list:
+// <Target optimized function, tx_type, bit depth>
+typedef tuple<HBDFhtFunc, int, int> HighbdHt4x4Param;
+
+void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride, int tx_type,
+                       int bd) {
+  av1_fwd_txfm2d_4x4_c(in, out, stride, tx_type, bd);
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+class AV1Trans4x4HT : public libaom_test::TransformTestBase,
+                      public ::testing::TestWithParam<Ht4x4Param> {
+ public:
+  virtual ~AV1Trans4x4HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
+    height_ = 4;
+    fwd_txfm_ref = fht4x4_ref;
+    inv_txfm_ref = iht4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans4x4HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
+// Note:
+//  TODO(luoyi): Add tx_type, 9-15 for inverse transform.
+//  Need cleanup since same tests may be done in fdct4x4_test.cc
+// TEST_P(AV1Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(0); }
+// TEST_P(AV1Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
+// TEST_P(AV1Trans4x4HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+
+#if CONFIG_HIGHBITDEPTH
+class AV1HighbdTrans4x4HT : public ::testing::TestWithParam<HighbdHt4x4Param> {
+ public:
+  virtual ~AV1HighbdTrans4x4HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    fwd_txfm_ref_ = highbe_fht4x4_ref;
+    tx_type_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = 16;
+
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    output_ = reinterpret_cast<int32_t *>(
+        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
+    output_ref_ = reinterpret_cast<int32_t *>(
+        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
+  }
+
+  virtual void TearDown() {
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(output_ref_);
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunBitexactCheck();
+
+ private:
+  HBDFhtFunc fwd_txfm_;
+  HBDFhtFunc fwd_txfm_ref_;
+  int tx_type_;
+  int bit_depth_;
+  int mask_;
+  int num_coeffs_;
+  int16_t *input_;
+  int32_t *output_;
+  int32_t *output_ref_;
+};
+
+void AV1HighbdTrans4x4HT::RunBitexactCheck() {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int i, j;
+  const int stride = 4;
+  const int num_tests = 1000;
+  const int num_coeffs = 16;
+
+  for (i = 0; i < num_tests; ++i) {
+    for (j = 0; j < num_coeffs; ++j) {
+      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+    }
+
+    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
+    fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_);
+
+    for (j = 0; j < num_coeffs; ++j) {
+      EXPECT_EQ(output_[j], output_ref_[j])
+          << "Not bit-exact result at index: " << j << " at test block: " << i;
+    }
+  }
+}
+
+TEST_P(AV1HighbdTrans4x4HT, HighbdCoeffCheck) { RunBitexactCheck(); }
+#endif  // CONFIG_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+const Ht4x4Param kArrayHt4x4Param_sse2[] = {
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 0, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 1, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 2, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 3, AOM_BITS_8, 16),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 4, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 5, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 6, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 7, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 8, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 9, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 10, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 11, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 12, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 13, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 14, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 15, AOM_BITS_8, 16)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x4HT,
+                        ::testing::ValuesIn(kArrayHt4x4Param_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+const HighbdHt4x4Param kArrayHighbdHt4x4Param[] = {
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 0, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 0, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 1, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 1, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 2, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 2, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 3, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 3, 12),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 4, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 4, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 5, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 5, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 6, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 6, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 7, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 7, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 8, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 8, 12),
+#endif  // CONFIG_EXT_TX
+};
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans4x4HT,
+                        ::testing::ValuesIn(kArrayHighbdHt4x4Param));
+
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fht4x8_test.cc b/third_party/aom/test/av1_fht4x8_test.cc
new file mode 100644
index 000000000..a899c8739
--- /dev/null
+++ b/third_party/aom/test/av1_fht4x8_test.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "./av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x8Param;
+
+void fht4x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht4x8_c(in, out, stride, tx_type);
+}
+
+void iht4x8_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_iht4x8_32_add_c(in, out, stride, tx_type);
+}
+
+class AV1Trans4x8HT : public libaom_test::TransformTestBase,
+                      public ::testing::TestWithParam<Ht4x8Param> {
+ public:
+  virtual ~AV1Trans4x8HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
+    height_ = 8;
+    fwd_txfm_ref = fht4x8_ref;
+    inv_txfm_ref = iht4x8_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans4x8HT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
+TEST_P(AV1Trans4x8HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans4x8HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans4x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+TEST_P(AV1Trans4x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
+
+using std::tr1::make_tuple;
+
+const Ht4x8Param kArrayHt4x8Param_c[] = {
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 0, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 1, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 2, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 3, AOM_BITS_8, 32),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 4, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 5, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 6, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 7, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 8, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 9, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 10, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 11, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 12, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 13, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 14, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 15, AOM_BITS_8, 32)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(C, AV1Trans4x8HT,
+                        ::testing::ValuesIn(kArrayHt4x8Param_c));
+
+#if HAVE_SSE2
+const Ht4x8Param kArrayHt4x8Param_sse2[] = {
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 0, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 1, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 2, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 3, AOM_BITS_8, 32),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 4, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 5, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 6, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 7, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 8, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 9, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 10, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 11, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 12, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 13, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 14, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 15, AOM_BITS_8, 32)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x8HT,
+                        ::testing::ValuesIn(kArrayHt4x8Param_sse2));
+#endif  // HAVE_SSE2
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fht8x16_test.cc b/third_party/aom/test/av1_fht8x16_test.cc
new file mode 100644
index 000000000..ace9a8f47
--- /dev/null
+++ b/third_party/aom/test/av1_fht8x16_test.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "./av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x16Param;
+
+void fht8x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht8x16_c(in, out, stride, tx_type);
+}
+
+void iht8x16_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_iht8x16_128_add_c(in, out, stride, tx_type);
+}
+
+class AV1Trans8x16HT : public libaom_test::TransformTestBase,
+                       public ::testing::TestWithParam<Ht8x16Param> {
+ public:
+  virtual ~AV1Trans8x16HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 8;
+    height_ = 16;
+    inv_txfm_ref = iht8x16_ref;
+    fwd_txfm_ref = fht8x16_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); }
+TEST_P(AV1Trans8x16HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans8x16HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans8x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+
+using std::tr1::make_tuple;
+
+const Ht8x16Param kArrayHt8x16Param_c[] = {
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 0, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 1, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 2, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 3, AOM_BITS_8, 128),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 4, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 5, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 6, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 7, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 8, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 9, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 10, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 11, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 12, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 13, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 14, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 15, AOM_BITS_8, 128)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(C, AV1Trans8x16HT,
+                        ::testing::ValuesIn(kArrayHt8x16Param_c));
+
+#if HAVE_SSE2
+const Ht8x16Param kArrayHt8x16Param_sse2[] = {
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 0, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 1, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 2, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 3, AOM_BITS_8, 128),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 4, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 5, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 6, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 7, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 8, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 9, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 10, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 11, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 12, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 13, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 14, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 15, AOM_BITS_8, 128)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x16HT,
+                        ::testing::ValuesIn(kArrayHt8x16Param_sse2));
+#endif  // HAVE_SSE2
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fht8x4_test.cc b/third_party/aom/test/av1_fht8x4_test.cc
new file mode 100644
index 000000000..9bf4ff647
--- /dev/null
+++ b/third_party/aom/test/av1_fht8x4_test.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "./av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x4Param;
+
+void fht8x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht8x4_c(in, out, stride, tx_type);
+}
+
+void iht8x4_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_iht8x4_32_add_c(in, out, stride, tx_type);
+}
+
+class AV1Trans8x4HT : public libaom_test::TransformTestBase,
+                      public ::testing::TestWithParam<Ht8x4Param> {
+ public:
+  virtual ~AV1Trans8x4HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 8;
+    height_ = 4;
+    fwd_txfm_ref = fht8x4_ref;
+    inv_txfm_ref = iht8x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans8x4HT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
+TEST_P(AV1Trans8x4HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans8x4HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans8x4HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+TEST_P(AV1Trans8x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
+
+using std::tr1::make_tuple;
+
+const Ht8x4Param kArrayHt8x4Param_c[] = {
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 0, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 1, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 2, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 3, AOM_BITS_8, 32),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 4, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 5, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 6, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 7, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 8, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 9, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 10, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 11, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 12, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 13, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 14, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 15, AOM_BITS_8, 32)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(C, AV1Trans8x4HT,
+                        ::testing::ValuesIn(kArrayHt8x4Param_c));
+
+#if HAVE_SSE2
+const Ht8x4Param kArrayHt8x4Param_sse2[] = {
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 0, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 1, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 2, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 3, AOM_BITS_8, 32),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 4, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 5, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 6, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 7, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 8, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 9, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 10, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 11, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 12, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 13, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 14, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 15, AOM_BITS_8, 32)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x4HT,
+                        ::testing::ValuesIn(kArrayHt8x4Param_sse2));
+#endif  // HAVE_SSE2
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fht8x8_test.cc b/third_party/aom/test/av1_fht8x8_test.cc
new file mode 100644
index 000000000..99cff1014
--- /dev/null
+++ b/third_party/aom/test/av1_fht8x8_test.cc
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+
+using libaom_test::FhtFunc;
+using std::tr1::tuple;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x8Param;
+
+void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht8x8_c(in, out, stride, tx_type);
+}
+
+void iht8x8_ref(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_iht8x8_64_add_c(in, out, stride, tx_type);
+}
+
+#if CONFIG_HIGHBITDEPTH
+typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                           int tx_type, int bd);
+typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
+                          int tx_type, int bd);
+// Target optimized function, tx_type, bit depth
+typedef tuple<HbdHtFunc, int, int> HighbdHt8x8Param;
+
+void highbd_fht8x8_ref(const int16_t *in, int32_t *out, int stride, int tx_type,
+                       int bd) {
+  av1_fwd_txfm2d_8x8_c(in, out, stride, tx_type, bd);
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+class AV1Trans8x8HT : public libaom_test::TransformTestBase,
+                      public ::testing::TestWithParam<Ht8x8Param> {
+ public:
+  virtual ~AV1Trans8x8HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 8;
+    height_ = 8;
+    fwd_txfm_ref = fht8x8_ref;
+    inv_txfm_ref = iht8x8_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans8x8HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans8x8HT, CoeffCheck) { RunCoeffCheck(); }
+// Note:
+//  TODO(luoyi): Add tx_type, 9-15 for inverse transform.
+//  Need cleanup since same tests may be done in fdct8x8_test.cc
+// TEST_P(AV1Trans8x8HT, AccuracyCheck) { RunAccuracyCheck(0); }
+// TEST_P(AV1Trans8x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
+// TEST_P(AV1Trans8x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+
+#if CONFIG_HIGHBITDEPTH
+class AV1HighbdTrans8x8HT : public ::testing::TestWithParam<HighbdHt8x8Param> {
+ public:
+  virtual ~AV1HighbdTrans8x8HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    fwd_txfm_ref_ = highbd_fht8x8_ref;
+    tx_type_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = 64;
+
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    output_ = reinterpret_cast<int32_t *>(
+        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
+    output_ref_ = reinterpret_cast<int32_t *>(
+        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
+  }
+
+  virtual void TearDown() {
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(output_ref_);
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunBitexactCheck();
+
+ private:
+  HbdHtFunc fwd_txfm_;
+  HbdHtFunc fwd_txfm_ref_;
+  int tx_type_;
+  int bit_depth_;
+  int mask_;
+  int num_coeffs_;
+  int16_t *input_;
+  int32_t *output_;
+  int32_t *output_ref_;
+};
+
+void AV1HighbdTrans8x8HT::RunBitexactCheck() {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int i, j;
+  const int stride = 8;
+  const int num_tests = 1000;
+  const int num_coeffs = 64;
+
+  for (i = 0; i < num_tests; ++i) {
+    for (j = 0; j < num_coeffs; ++j) {
+      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+    }
+
+    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(
+        fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_));
+
+    for (j = 0; j < num_coeffs; ++j) {
+      EXPECT_EQ(output_ref_[j], output_[j])
+          << "Not bit-exact result at index: " << j << " at test block: " << i;
+    }
+  }
+}
+
+TEST_P(AV1HighbdTrans8x8HT, HighbdCoeffCheck) { RunBitexactCheck(); }
+#endif  // CONFIG_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+const Ht8x8Param kArrayHt8x8Param_sse2[] = {
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 0, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 1, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 2, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 3, AOM_BITS_8, 64),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 4, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 5, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 6, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 7, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 8, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 9, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 10, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 11, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 12, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 13, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 14, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 15, AOM_BITS_8, 64)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x8HT,
+                        ::testing::ValuesIn(kArrayHt8x8Param_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+const HighbdHt8x8Param kArrayHBDHt8x8Param_sse4_1[] = {
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 0, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 0, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 1, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 1, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 2, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 2, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 3, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 3, 12),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 4, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 4, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 5, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 5, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 6, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 6, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 7, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 7, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 8, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 8, 12),
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans8x8HT,
+                        ::testing::ValuesIn(kArrayHBDHt8x8Param_sse4_1));
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/av1_fwd_txfm1d_test.cc b/third_party/aom/test/av1_fwd_txfm1d_test.cc
new file mode 100644
index 000000000..a9b3f8e40
--- /dev/null
+++ b/third_party/aom/test/av1_fwd_txfm1d_test.cc
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/av1_fwd_txfm1d.h"
+#include "test/av1_txfm_test.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::input_base;
+using libaom_test::reference_hybrid_1d;
+using libaom_test::TYPE_TXFM;
+using libaom_test::TYPE_DCT;
+using libaom_test::TYPE_ADST;
+
+namespace {
+const int txfm_type_num = 2;
+const TYPE_TXFM txfm_type_ls[2] = { TYPE_DCT, TYPE_ADST };
+
+const int txfm_size_num = 5;
+const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
+
+const TxfmFunc fwd_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+  { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new,
+    av1_fdct64_new },
+#else
+  { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL },
+#endif
+  { av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL }
+};
+
+// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
+const int8_t cos_bit[12] = { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 };
+const int8_t range_bit[12] = { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 };
+
+TEST(av1_fwd_txfm1d, round_shift) {
+  EXPECT_EQ(round_shift(7, 1), 4);
+  EXPECT_EQ(round_shift(-7, 1), -3);
+
+  EXPECT_EQ(round_shift(7, 2), 2);
+  EXPECT_EQ(round_shift(-7, 2), -2);
+
+  EXPECT_EQ(round_shift(8, 2), 2);
+  EXPECT_EQ(round_shift(-8, 2), -2);
+}
+
+TEST(av1_fwd_txfm1d, get_max_bit) {
+  int max_bit = get_max_bit(8);
+  EXPECT_EQ(max_bit, 3);
+}
+
+TEST(av1_fwd_txfm1d, cospi_arr) {
+  for (int i = 0; i < 7; i++) {
+    for (int j = 0; j < 64; j++) {
+      EXPECT_EQ(cospi_arr[i][j],
+                (int32_t)round(cos(M_PI * j / 128) * (1 << (cos_bit_min + i))));
+    }
+  }
+}
+
+TEST(av1_fwd_txfm1d, clamp_block) {
+  int16_t block[5][5] = { { 7, -5, 6, -3, 9 },
+                          { 7, -5, 6, -3, 9 },
+                          { 7, -5, 6, -3, 9 },
+                          { 7, -5, 6, -3, 9 },
+                          { 7, -5, 6, -3, 9 } };
+
+  int16_t ref_block[5][5] = { { 7, -5, 6, -3, 9 },
+                              { 7, -5, 6, -3, 9 },
+                              { 7, -4, 2, -3, 9 },
+                              { 7, -4, 2, -3, 9 },
+                              { 7, -4, 2, -3, 9 } };
+
+  int row = 2;
+  int col = 1;
+  int block_size = 3;
+  int stride = 5;
+  clamp_block(block[row] + col, block_size, stride, -4, 2);
+  for (int r = 0; r < stride; r++) {
+    for (int c = 0; c < stride; c++) {
+      EXPECT_EQ(block[r][c], ref_block[r][c]);
+    }
+  }
+}
+
+TEST(av1_fwd_txfm1d, accuracy) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int si = 0; si < txfm_size_num; ++si) {
+    int txfm_size = txfm_size_ls[si];
+    int32_t *input = new int32_t[txfm_size];
+    int32_t *output = new int32_t[txfm_size];
+    double *ref_input = new double[txfm_size];
+    double *ref_output = new double[txfm_size];
+
+    for (int ti = 0; ti < txfm_type_num; ++ti) {
+      TYPE_TXFM txfm_type = txfm_type_ls[ti];
+      TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si];
+      int max_error = 7;
+
+      const int count_test_block = 5000;
+      if (fwd_txfm_func != NULL) {
+        for (int ti = 0; ti < count_test_block; ++ti) {
+          for (int ni = 0; ni < txfm_size; ++ni) {
+            input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base;
+            ref_input[ni] = static_cast<double>(input[ni]);
+          }
+
+          fwd_txfm_func(input, output, cos_bit, range_bit);
+          reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type);
+
+          for (int ni = 0; ni < txfm_size; ++ni) {
+            EXPECT_LE(
+                abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))),
+                max_error);
+          }
+        }
+      }
+    }
+
+    delete[] input;
+    delete[] output;
+    delete[] ref_input;
+    delete[] ref_output;
+  }
+}
+}  // namespace
diff --git a/third_party/aom/test/av1_fwd_txfm2d_test.cc b/third_party/aom/test/av1_fwd_txfm2d_test.cc
new file mode 100644
index 000000000..25cf5ad53
--- /dev/null
+++ b/third_party/aom/test/av1_fwd_txfm2d_test.cc
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/av1_txfm_test.h"
+#include "av1/common/av1_txfm.h"
+#include "./av1_rtcd.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::input_base;
+using libaom_test::bd;
+using libaom_test::compute_avg_abs_error;
+using libaom_test::Fwd_Txfm2d_Func;
+using libaom_test::TYPE_TXFM;
+
+namespace {
+#if CONFIG_HIGHBITDEPTH
+// tx_type_, tx_size_, max_error_, max_avg_error_
+typedef std::tr1::tuple<TX_TYPE, TX_SIZE, double, double> AV1FwdTxfm2dParam;
+
+class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
+ public:
+  virtual void SetUp() {
+    tx_type_ = GET_PARAM(0);
+    tx_size_ = GET_PARAM(1);
+    max_error_ = GET_PARAM(2);
+    max_avg_error_ = GET_PARAM(3);
+    count_ = 500;
+    TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg =
+        av1_get_fwd_txfm_cfg(tx_type_, tx_size_);
+    const TXFM_2D_CFG *fwd_txfm_cfg = fwd_txfm_flip_cfg.cfg;
+    int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] +
+                      fwd_txfm_cfg->shift[2];
+    ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
+    lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
+    amplify_factor_ =
+        amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
+
+    fwd_txfm_ = libaom_test::fwd_txfm_func_ls[tx_size_];
+    txfm1d_size_ = libaom_test::get_txfm1d_size(tx_size_);
+    txfm2d_size_ = txfm1d_size_ * txfm1d_size_;
+    get_txfm1d_type(tx_type_, &type0_, &type1_);
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(input_[0]) * txfm2d_size_));
+    output_ = reinterpret_cast<int32_t *>(
+        aom_memalign(16, sizeof(output_[0]) * txfm2d_size_));
+    ref_input_ = reinterpret_cast<double *>(
+        aom_memalign(16, sizeof(ref_input_[0]) * txfm2d_size_));
+    ref_output_ = reinterpret_cast<double *>(
+        aom_memalign(16, sizeof(ref_output_[0]) * txfm2d_size_));
+  }
+
+  void RunFwdAccuracyCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    double avg_abs_error = 0;
+    for (int ci = 0; ci < count_; ci++) {
+      for (int ni = 0; ni < txfm2d_size_; ++ni) {
+        input_[ni] = rnd.Rand16() % input_base;
+        ref_input_[ni] = static_cast<double>(input_[ni]);
+        output_[ni] = 0;
+        ref_output_[ni] = 0;
+      }
+
+      fwd_txfm_(input_, output_, txfm1d_size_, tx_type_, bd);
+
+      if (lr_flip_ && ud_flip_)
+        libaom_test::fliplrud(ref_input_, txfm1d_size_, txfm1d_size_);
+      else if (lr_flip_)
+        libaom_test::fliplr(ref_input_, txfm1d_size_, txfm1d_size_);
+      else if (ud_flip_)
+        libaom_test::flipud(ref_input_, txfm1d_size_, txfm1d_size_);
+
+      reference_hybrid_2d(ref_input_, ref_output_, txfm1d_size_, type0_,
+                          type1_);
+
+      for (int ni = 0; ni < txfm2d_size_; ++ni) {
+        ref_output_[ni] = round(ref_output_[ni] * amplify_factor_);
+        EXPECT_GE(max_error_,
+                  fabs(output_[ni] - ref_output_[ni]) / amplify_factor_);
+      }
+      avg_abs_error += compute_avg_abs_error<int32_t, double>(
+          output_, ref_output_, txfm2d_size_);
+    }
+
+    avg_abs_error /= amplify_factor_;
+    avg_abs_error /= count_;
+    // max_abs_avg_error comes from upper bound of avg_abs_error
+    // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
+    // %f\n", type0_, type1_, txfm1d_size_, avg_abs_error);
+    EXPECT_GE(max_avg_error_, avg_abs_error);
+  }
+
+  virtual void TearDown() {
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(ref_input_);
+    aom_free(ref_output_);
+  }
+
+ private:
+  double max_error_;
+  double max_avg_error_;
+  int count_;
+  double amplify_factor_;
+  TX_TYPE tx_type_;
+  TX_SIZE tx_size_;
+  int txfm1d_size_;
+  int txfm2d_size_;
+  Fwd_Txfm2d_Func fwd_txfm_;
+  TYPE_TXFM type0_;
+  TYPE_TXFM type1_;
+  int16_t *input_;
+  int32_t *output_;
+  double *ref_input_;
+  double *ref_output_;
+  int ud_flip_;  // flip upside down
+  int lr_flip_;  // flip left to right
+};
+
+TEST_P(AV1FwdTxfm2d, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); }
+const AV1FwdTxfm2dParam av1_fwd_txfm2d_param_c[] = {
+#if CONFIG_EXT_TX
+  AV1FwdTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(ADST_FLIPADST, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(FLIPADST_ADST, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(FLIPADST_DCT, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(DCT_FLIPADST, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(ADST_FLIPADST, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(FLIPADST_ADST, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(FLIPADST_DCT, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(DCT_FLIPADST, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(ADST_FLIPADST, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(FLIPADST_ADST, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(FLIPADST_DCT, TX_32X32, 70, 7),
+  AV1FwdTxfm2dParam(DCT_FLIPADST, TX_32X32, 70, 7),
+  AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_32X32, 70, 7),
+  AV1FwdTxfm2dParam(ADST_FLIPADST, TX_32X32, 70, 7),
+  AV1FwdTxfm2dParam(FLIPADST_ADST, TX_32X32, 70, 7),
+#endif
+  AV1FwdTxfm2dParam(DCT_DCT, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(ADST_DCT, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(DCT_ADST, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(ADST_ADST, TX_4X4, 2, 0.2),
+  AV1FwdTxfm2dParam(DCT_DCT, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(ADST_DCT, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(DCT_ADST, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(ADST_ADST, TX_8X8, 5, 0.6),
+  AV1FwdTxfm2dParam(DCT_DCT, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(ADST_DCT, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(DCT_ADST, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(ADST_ADST, TX_16X16, 11, 1.5),
+  AV1FwdTxfm2dParam(DCT_DCT, TX_32X32, 70, 7),
+  AV1FwdTxfm2dParam(ADST_DCT, TX_32X32, 70, 7),
+  AV1FwdTxfm2dParam(DCT_ADST, TX_32X32, 70, 7),
+  AV1FwdTxfm2dParam(ADST_ADST, TX_32X32, 70, 7)
+};
+
+INSTANTIATE_TEST_CASE_P(C, AV1FwdTxfm2d,
+                        ::testing::ValuesIn(av1_fwd_txfm2d_param_c));
+
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/av1_highbd_iht_test.cc b/third_party/aom/test/av1_highbd_iht_test.cc
new file mode 100644
index 000000000..3b263638f
--- /dev/null
+++ b/third_party/aom/test/av1_highbd_iht_test.cc
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/enums.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_ports/mem.h"
+
+namespace {
+
+using std::tr1::tuple;
+using libaom_test::ACMRandom;
+
+typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
+                          int tx_type, int bd);
+
+typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
+                           int tx_type, int bd);
+
+// Test parameter argument list:
+//   <transform reference function,
+//    optimized inverse transform function,
+//    inverse transform reference function,
+//    num_coeffs,
+//    tx_type,
+//    bit_depth>
+typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, int, int> IHbdHtParam;
+
+class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
+ public:
+  virtual ~AV1HighbdInvHTNxN() {}
+
+  virtual void SetUp() {
+    txfm_ref_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    inv_txfm_ref_ = GET_PARAM(2);
+    num_coeffs_ = GET_PARAM(3);
+    tx_type_ = GET_PARAM(4);
+    bit_depth_ = GET_PARAM(5);
+
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(input_[0]) * num_coeffs_));
+
+    // Note:
+    // Inverse transform input buffer is 32-byte aligned
+    // Refer to <root>/av1/encoder/context_tree.c, function,
+    // void alloc_mode_context().
+    coeffs_ = reinterpret_cast<int32_t *>(
+        aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
+    output_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(32, sizeof(output_[0]) * num_coeffs_));
+    output_ref_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
+  }
+
+  virtual void TearDown() {
+    aom_free(input_);
+    aom_free(coeffs_);
+    aom_free(output_);
+    aom_free(output_ref_);
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunBitexactCheck();
+
+ private:
+  int GetStride() const {
+    if (16 == num_coeffs_) {
+      return 4;
+    } else if (64 == num_coeffs_) {
+      return 8;
+    } else if (256 == num_coeffs_) {
+      return 16;
+    } else if (1024 == num_coeffs_) {
+      return 32;
+    } else {
+      return 0;
+    }
+  }
+
+  HbdHtFunc txfm_ref_;
+  IHbdHtFunc inv_txfm_;
+  IHbdHtFunc inv_txfm_ref_;
+  int num_coeffs_;
+  int tx_type_;
+  int bit_depth_;
+
+  int16_t *input_;
+  int32_t *coeffs_;
+  uint16_t *output_;
+  uint16_t *output_ref_;
+};
+
+void AV1HighbdInvHTNxN::RunBitexactCheck() {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int stride = GetStride();
+  const int num_tests = 20000;
+  const uint16_t mask = (1 << bit_depth_) - 1;
+
+  for (int i = 0; i < num_tests; ++i) {
+    for (int j = 0; j < num_coeffs_; ++j) {
+      input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
+      output_ref_[j] = rnd.Rand16() & mask;
+      output_[j] = output_ref_[j];
+    }
+
+    txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
+    inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(
+        inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_));
+
+    for (int j = 0; j < num_coeffs_; ++j) {
+      EXPECT_EQ(output_ref_[j], output_[j])
+          << "Not bit-exact result at index: " << j << " At test block: " << i;
+    }
+  }
+}
+
+TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#define PARAM_LIST_4X4                                   \
+  &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
+      &av1_inv_txfm2d_add_4x4_c, 16
+
+#define PARAM_LIST_8X8                                   \
+  &av1_fwd_txfm2d_8x8_c, &av1_inv_txfm2d_add_8x8_sse4_1, \
+      &av1_inv_txfm2d_add_8x8_c, 64
+
+#define PARAM_LIST_16X16                                     \
+  &av1_fwd_txfm2d_16x16_c, &av1_inv_txfm2d_add_16x16_sse4_1, \
+      &av1_inv_txfm2d_add_16x16_c, 256
+
+const IHbdHtParam kArrayIhtParam[] = {
+  // 16x16
+  make_tuple(PARAM_LIST_16X16, DCT_DCT, 10),
+  make_tuple(PARAM_LIST_16X16, DCT_DCT, 12),
+  make_tuple(PARAM_LIST_16X16, ADST_DCT, 10),
+  make_tuple(PARAM_LIST_16X16, ADST_DCT, 12),
+  make_tuple(PARAM_LIST_16X16, DCT_ADST, 10),
+  make_tuple(PARAM_LIST_16X16, DCT_ADST, 12),
+  make_tuple(PARAM_LIST_16X16, ADST_ADST, 10),
+  make_tuple(PARAM_LIST_16X16, ADST_ADST, 12),
+#if CONFIG_EXT_TX
+  make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 10),
+  make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 12),
+  make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 10),
+  make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 12),
+  make_tuple(PARAM_LIST_16X16, FLIPADST_FLIPADST, 10),
+  make_tuple(PARAM_LIST_16X16, FLIPADST_FLIPADST, 12),
+  make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 10),
+  make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 12),
+  make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 10),
+  make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 12),
+#endif
+  // 8x8
+  make_tuple(PARAM_LIST_8X8, DCT_DCT, 10),
+  make_tuple(PARAM_LIST_8X8, DCT_DCT, 12),
+  make_tuple(PARAM_LIST_8X8, ADST_DCT, 10),
+  make_tuple(PARAM_LIST_8X8, ADST_DCT, 12),
+  make_tuple(PARAM_LIST_8X8, DCT_ADST, 10),
+  make_tuple(PARAM_LIST_8X8, DCT_ADST, 12),
+  make_tuple(PARAM_LIST_8X8, ADST_ADST, 10),
+  make_tuple(PARAM_LIST_8X8, ADST_ADST, 12),
+#if CONFIG_EXT_TX
+  make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 10),
+  make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 12),
+  make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 10),
+  make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 12),
+  make_tuple(PARAM_LIST_8X8, FLIPADST_FLIPADST, 10),
+  make_tuple(PARAM_LIST_8X8, FLIPADST_FLIPADST, 12),
+  make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 10),
+  make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 12),
+  make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 10),
+  make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 12),
+#endif
+  // 4x4
+  make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
+  make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
+  make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
+  make_tuple(PARAM_LIST_4X4, ADST_DCT, 12),
+  make_tuple(PARAM_LIST_4X4, DCT_ADST, 10),
+  make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
+  make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
+  make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
+#if CONFIG_EXT_TX
+  make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
+  make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
+  make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12),
+  make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10),
+  make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
+  make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
+#endif
+};
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvHTNxN,
+                        ::testing::ValuesIn(kArrayIhtParam));
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+
+#if HAVE_AVX2 && CONFIG_HIGHBITDEPTH
+#define PARAM_LIST_32X32                                   \
+  &av1_fwd_txfm2d_32x32_c, &av1_inv_txfm2d_add_32x32_avx2, \
+      &av1_inv_txfm2d_add_32x32_c, 1024
+
+const IHbdHtParam kArrayIhtParam32x32[] = {
+  // 32x32
+  make_tuple(PARAM_LIST_32X32, DCT_DCT, 10),
+  make_tuple(PARAM_LIST_32X32, DCT_DCT, 12),
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdInvHTNxN,
+                        ::testing::ValuesIn(kArrayIhtParam32x32));
+
+#endif  // HAVE_AVX2 && CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/av1_inv_txfm1d_test.cc b/third_party/aom/test/av1_inv_txfm1d_test.cc
new file mode 100644
index 000000000..9cf33a2fd
--- /dev/null
+++ b/third_party/aom/test/av1_inv_txfm1d_test.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/av1_txfm_test.h"
+#include "av1/common/av1_fwd_txfm1d.h"
+#include "av1/common/av1_inv_txfm1d.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::input_base;
+
+namespace {
+const int txfm_type_num = 2;
+const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
+
+const TxfmFunc fwd_txfm_func_ls[][2] = {
+  { av1_fdct4_new, av1_fadst4_new },
+  { av1_fdct8_new, av1_fadst8_new },
+  { av1_fdct16_new, av1_fadst16_new },
+  { av1_fdct32_new, av1_fadst32_new },
+#if CONFIG_TX64X64
+  { av1_fdct64_new, NULL },
+#endif
+};
+
+const TxfmFunc inv_txfm_func_ls[][2] = {
+  { av1_idct4_new, av1_iadst4_new },
+  { av1_idct8_new, av1_iadst8_new },
+  { av1_idct16_new, av1_iadst16_new },
+  { av1_idct32_new, av1_iadst32_new },
+#if CONFIG_TX64X64
+  { av1_idct64_new, NULL },
+#endif
+};
+
+// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
+const int8_t cos_bit[12] = { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 };
+const int8_t range_bit[12] = { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 };
+
+#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
+
+TEST(av1_inv_txfm1d, round_trip) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int si = 0; si < ARRAY_SIZE(fwd_txfm_func_ls); ++si) {
+    int txfm_size = txfm_size_ls[si];
+
+    for (int ti = 0; ti < txfm_type_num; ++ti) {
+      TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti];
+      TxfmFunc inv_txfm_func = inv_txfm_func_ls[si][ti];
+      int max_error = 2;
+
+      if (!fwd_txfm_func) continue;
+
+      const int count_test_block = 5000;
+      for (int ci = 0; ci < count_test_block; ++ci) {
+        int32_t input[64];
+        int32_t output[64];
+        int32_t round_trip_output[64];
+
+        assert(txfm_size <= ARRAY_SIZE(input));
+
+        for (int ni = 0; ni < txfm_size; ++ni) {
+          input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base;
+        }
+
+        fwd_txfm_func(input, output, cos_bit, range_bit);
+        inv_txfm_func(output, round_trip_output, cos_bit, range_bit);
+
+        for (int ni = 0; ni < txfm_size; ++ni) {
+          int node_err =
+              abs(input[ni] - round_shift(round_trip_output[ni],
+                                          get_max_bit(txfm_size) - 1));
+          EXPECT_LE(node_err, max_error);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/av1_inv_txfm2d_test.cc b/third_party/aom/test/av1_inv_txfm2d_test.cc
new file mode 100644
index 000000000..bb2743af1
--- /dev/null
+++ b/third_party/aom/test/av1_inv_txfm2d_test.cc
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "./av1_rtcd.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/av1_txfm_test.h"
+#include "av1/common/av1_inv_txfm2d_cfg.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::input_base;
+using libaom_test::bd;
+using libaom_test::compute_avg_abs_error;
+using libaom_test::Fwd_Txfm2d_Func;
+using libaom_test::Inv_Txfm2d_Func;
+
+namespace {
+
+#if CONFIG_HIGHBITDEPTH
+// AV1InvTxfm2dParam argument list:
+// tx_type_, tx_size_, max_error_, max_avg_error_
+typedef std::tr1::tuple<TX_TYPE, TX_SIZE, int, double> AV1InvTxfm2dParam;
+
+class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
+ public:
+  virtual void SetUp() {
+    tx_type_ = GET_PARAM(0);
+    tx_size_ = GET_PARAM(1);
+    max_error_ = GET_PARAM(2);
+    max_avg_error_ = GET_PARAM(3);
+    txfm1d_size_ = libaom_test::get_txfm1d_size(tx_size_);
+    txfm2d_size_ = txfm1d_size_ * txfm1d_size_;
+    count_ = 500;
+
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * txfm2d_size_));
+    ref_input_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * txfm2d_size_));
+    output_ = reinterpret_cast<int32_t *>(
+        aom_memalign(16, sizeof(int32_t) * txfm2d_size_));
+  }
+
+  void RunRoundtripCheck() {
+    const Fwd_Txfm2d_Func fwd_txfm_func =
+        libaom_test::fwd_txfm_func_ls[tx_size_];
+    const Inv_Txfm2d_Func inv_txfm_func =
+        libaom_test::inv_txfm_func_ls[tx_size_];
+    double avg_abs_error = 0;
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    for (int ci = 0; ci < count_; ci++) {
+      for (int ni = 0; ni < txfm2d_size_; ++ni) {
+        if (ci == 0) {
+          int extreme_input = input_base - 1;
+          input_[ni] = extreme_input;  // extreme case
+          ref_input_[ni] = 0;
+        } else {
+          input_[ni] = rnd.Rand16() % input_base;
+          ref_input_[ni] = 0;
+        }
+      }
+
+      fwd_txfm_func(input_, output_, txfm1d_size_, tx_type_, bd);
+      inv_txfm_func(output_, ref_input_, txfm1d_size_, tx_type_, bd);
+
+      for (int ni = 0; ni < txfm2d_size_; ++ni) {
+        EXPECT_GE(max_error_, abs(input_[ni] - ref_input_[ni]));
+      }
+      avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
+          input_, ref_input_, txfm2d_size_);
+    }
+
+    avg_abs_error /= count_;
+    // max_abs_avg_error comes from upper bound of
+    // printf("txfm1d_size: %d accuracy_avg_abs_error: %f\n",
+    // txfm1d_size_, avg_abs_error);
+    EXPECT_GE(max_avg_error_, avg_abs_error);
+  }
+
+  virtual void TearDown() {
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(ref_input_);
+  }
+
+ private:
+  int count_;
+  int max_error_;
+  double max_avg_error_;
+  TX_TYPE tx_type_;
+  TX_SIZE tx_size_;
+  int txfm1d_size_;
+  int txfm2d_size_;
+  int16_t *input_;
+  uint16_t *ref_input_;
+  int32_t *output_;
+};
+
+TEST_P(AV1InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); }
+
+const AV1InvTxfm2dParam av1_inv_txfm2d_param[] = {
+#if CONFIG_EXT_TX
+  AV1InvTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(ADST_FLIPADST, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(FLIPADST_ADST, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(FLIPADST_DCT, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(DCT_FLIPADST, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(ADST_FLIPADST, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(FLIPADST_ADST, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(FLIPADST_DCT, TX_16X16, 2, 0.04),
+  AV1InvTxfm2dParam(DCT_FLIPADST, TX_16X16, 2, 0.04),
+  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_16X16, 11, 0.04),
+  AV1InvTxfm2dParam(ADST_FLIPADST, TX_16X16, 2, 0.04),
+  AV1InvTxfm2dParam(FLIPADST_ADST, TX_16X16, 2, 0.04),
+  AV1InvTxfm2dParam(FLIPADST_DCT, TX_32X32, 4, 0.4),
+  AV1InvTxfm2dParam(DCT_FLIPADST, TX_32X32, 4, 0.4),
+  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_32X32, 4, 0.4),
+  AV1InvTxfm2dParam(ADST_FLIPADST, TX_32X32, 4, 0.4),
+  AV1InvTxfm2dParam(FLIPADST_ADST, TX_32X32, 4, 0.4),
+#endif
+  AV1InvTxfm2dParam(DCT_DCT, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(ADST_DCT, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(DCT_ADST, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(ADST_ADST, TX_4X4, 2, 0.002),
+  AV1InvTxfm2dParam(DCT_DCT, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(ADST_DCT, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(DCT_ADST, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(ADST_ADST, TX_8X8, 2, 0.02),
+  AV1InvTxfm2dParam(DCT_DCT, TX_16X16, 2, 0.04),
+  AV1InvTxfm2dParam(ADST_DCT, TX_16X16, 2, 0.04),
+  AV1InvTxfm2dParam(DCT_ADST, TX_16X16, 2, 0.04),
+  AV1InvTxfm2dParam(ADST_ADST, TX_16X16, 2, 0.04),
+  AV1InvTxfm2dParam(DCT_DCT, TX_32X32, 4, 0.4),
+  AV1InvTxfm2dParam(ADST_DCT, TX_32X32, 4, 0.4),
+  AV1InvTxfm2dParam(DCT_ADST, TX_32X32, 4, 0.4),
+  AV1InvTxfm2dParam(ADST_ADST, TX_32X32, 4, 0.4)
+};
+
+INSTANTIATE_TEST_CASE_P(C, AV1InvTxfm2d,
+                        ::testing::ValuesIn(av1_inv_txfm2d_param));
+
+#endif  // CONFIG_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/av1_inv_txfm_test.cc b/third_party/aom/test/av1_inv_txfm_test.cc
new file mode 100644
index 000000000..af3fee872
--- /dev/null
+++ b/third_party/aom/test/av1_inv_txfm_test.cc
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/blockd.h"
+#include "av1/common/scan.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/inv_txfm.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+const double kInvSqrt2 = 0.707106781186547524400844362104;
+
+void reference_idct_1d(const double *in, double *out, int size) {
+  for (int n = 0; n < size; ++n) {
+    out[n] = 0;
+    for (int k = 0; k < size; ++k) {
+      if (k == 0)
+        out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
+      else
+        out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
+    }
+  }
+}
+
+typedef void (*IdctFuncRef)(const double *in, double *out, int size);
+typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
+
+class TransTestBase {
+ public:
+  virtual ~TransTestBase() {}
+
+ protected:
+  void RunInvAccuracyCheck() {
+    tran_low_t *input = new tran_low_t[txfm_size_];
+    tran_low_t *output = new tran_low_t[txfm_size_];
+    double *ref_input = new double[txfm_size_];
+    double *ref_output = new double[txfm_size_];
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+    for (int ti = 0; ti < count_test_block; ++ti) {
+      for (int ni = 0; ni < txfm_size_; ++ni) {
+        input[ni] = rnd.Rand8() - rnd.Rand8();
+        ref_input[ni] = static_cast<double>(input[ni]);
+      }
+
+      fwd_txfm_(input, output);
+      fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
+
+      for (int ni = 0; ni < txfm_size_; ++ni) {
+        EXPECT_LE(
+            abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
+            max_error_);
+      }
+    }
+
+    delete[] input;
+    delete[] output;
+    delete[] ref_input;
+    delete[] ref_output;
+  }
+
+  double max_error_;
+  int txfm_size_;
+  IdctFunc fwd_txfm_;
+  IdctFuncRef fwd_txfm_ref_;
+};
+
+typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam;
+class AV1InvTxfm : public TransTestBase,
+                   public ::testing::TestWithParam<IdctParam> {
+ public:
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    fwd_txfm_ref_ = GET_PARAM(1);
+    txfm_size_ = GET_PARAM(2);
+    max_error_ = GET_PARAM(3);
+  }
+  virtual void TearDown() {}
+};
+
+TEST_P(AV1InvTxfm, RunInvAccuracyCheck) { RunInvAccuracyCheck(); }
+
+INSTANTIATE_TEST_CASE_P(
+    C, AV1InvTxfm,
+    ::testing::Values(IdctParam(&aom_idct4_c, &reference_idct_1d, 4, 1),
+                      IdctParam(&aom_idct8_c, &reference_idct_1d, 8, 2),
+                      IdctParam(&aom_idct16_c, &reference_idct_1d, 16, 4),
+                      IdctParam(&aom_idct32_c, &reference_idct_1d, 32, 6)));
+
+#if CONFIG_AV1_ENCODER
+typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, InvTxfmFunc, TX_SIZE, int>
+    PartialInvTxfmParam;
+#if !CONFIG_ADAPT_SCAN
+const int kMaxNumCoeffs = 1024;
+#endif
+class AV1PartialIDctTest
+    : public ::testing::TestWithParam<PartialInvTxfmParam> {
+ public:
+  virtual ~AV1PartialIDctTest() {}
+  virtual void SetUp() {
+    ftxfm_ = GET_PARAM(0);
+    full_itxfm_ = GET_PARAM(1);
+    partial_itxfm_ = GET_PARAM(2);
+    tx_size_ = GET_PARAM(3);
+    last_nonzero_ = GET_PARAM(4);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int last_nonzero_;
+  TX_SIZE tx_size_;
+  FwdTxfmFunc ftxfm_;
+  InvTxfmFunc full_itxfm_;
+  InvTxfmFunc partial_itxfm_;
+};
+
+#if !CONFIG_ADAPT_SCAN
+TEST_P(AV1PartialIDctTest, RunQuantCheck) {
+  int size;
+  switch (tx_size_) {
+    case TX_4X4: size = 4; break;
+    case TX_8X8: size = 8; break;
+    case TX_16X16: size = 16; break;
+    case TX_32X32: size = 32; break;
+    default: FAIL() << "Wrong Size!"; break;
+  }
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
+
+  const int count_test_block = 1000;
+  const int block_size = size * size;
+
+  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
+
+  int max_error = 0;
+  for (int m = 0; m < count_test_block; ++m) {
+    // clear out destination buffer
+    memset(dst1, 0, sizeof(*dst1) * block_size);
+    memset(dst2, 0, sizeof(*dst2) * block_size);
+    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
+    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    for (int n = 0; n < count_test_block; ++n) {
+      // Initialize a test block with input range [-255, 255].
+      if (n == 0) {
+        for (int j = 0; j < block_size; ++j) input_extreme_block[j] = 255;
+      } else if (n == 1) {
+        for (int j = 0; j < block_size; ++j) input_extreme_block[j] = -255;
+      } else {
+        for (int j = 0; j < block_size; ++j) {
+          input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+        }
+      }
+
+      ftxfm_(input_extreme_block, output_ref_block, size);
+
+      // quantization with maximum allowed step sizes
+      test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
+      for (int j = 1; j < last_nonzero_; ++j)
+        test_coef_block1[get_scan((const AV1_COMMON *)NULL, tx_size_, DCT_DCT,
+                                  0)
+                             ->scan[j]] = (output_ref_block[j] / 1828) * 1828;
+    }
+
+    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
+
+    for (int j = 0; j < block_size; ++j) {
+      const int diff = dst1[j] - dst2[j];
+      const int error = diff * diff;
+      if (max_error < error) max_error = error;
+    }
+  }
+
+  EXPECT_EQ(0, max_error)
+      << "Error: partial inverse transform produces different results";
+}
+
+TEST_P(AV1PartialIDctTest, ResultsMatch) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int size;
+  switch (tx_size_) {
+    case TX_4X4: size = 4; break;
+    case TX_8X8: size = 8; break;
+    case TX_16X16: size = 16; break;
+    case TX_32X32: size = 32; break;
+    default: FAIL() << "Wrong Size!"; break;
+  }
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
+  const int count_test_block = 1000;
+  const int max_coeff = 32766 / 4;
+  const int block_size = size * size;
+  int max_error = 0;
+  for (int i = 0; i < count_test_block; ++i) {
+    // clear out destination buffer
+    memset(dst1, 0, sizeof(*dst1) * block_size);
+    memset(dst2, 0, sizeof(*dst2) * block_size);
+    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
+    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
+    int max_energy_leftover = max_coeff * max_coeff;
+    for (int j = 0; j < last_nonzero_; ++j) {
+      int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
+                                          (rnd.Rand16() - 32768) / 65536);
+      max_energy_leftover -= coef * coef;
+      if (max_energy_leftover < 0) {
+        max_energy_leftover = 0;
+        coef = 0;
+      }
+      test_coef_block1[get_scan((const AV1_COMMON *)NULL, tx_size_, DCT_DCT, 0)
+                           ->scan[j]] = coef;
+    }
+
+    memcpy(test_coef_block2, test_coef_block1,
+           sizeof(*test_coef_block2) * block_size);
+
+    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+
+    for (int j = 0; j < block_size; ++j) {
+      const int diff = dst1[j] - dst2[j];
+      const int error = diff * diff;
+      if (max_error < error) max_error = error;
+    }
+  }
+
+  EXPECT_EQ(0, max_error)
+      << "Error: partial inverse transform produces different results";
+}
+#endif
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, AV1PartialIDctTest,
+    ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c,
+                                 &aom_idct32x32_34_add_c, TX_32X32, 34),
+                      make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c,
+                                 &aom_idct32x32_1_add_c, TX_32X32, 1),
+                      make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_c,
+                                 &aom_idct16x16_10_add_c, TX_16X16, 10),
+                      make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_c,
+                                 &aom_idct16x16_1_add_c, TX_16X16, 1),
+                      make_tuple(&aom_fdct8x8_c, &aom_idct8x8_64_add_c,
+                                 &aom_idct8x8_12_add_c, TX_8X8, 12),
+                      make_tuple(&aom_fdct8x8_c, &aom_idct8x8_64_add_c,
+                                 &aom_idct8x8_1_add_c, TX_8X8, 1),
+                      make_tuple(&aom_fdct4x4_c, &aom_idct4x4_16_add_c,
+                                 &aom_idct4x4_1_add_c, TX_4X4, 1)));
+#endif  // CONFIG_AV1_ENCODER
+}  // namespace
diff --git a/third_party/aom/test/av1_quantize_test.cc b/third_party/aom/test/av1_quantize_test.cc
new file mode 100644
index 000000000..b5d1531f5
--- /dev/null
+++ b/third_party/aom/test/av1_quantize_test.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <stdlib.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "av1/common/scan.h"
+
+namespace {
+
+typedef void (*QuantizeFpFunc)(
+    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
+    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, int log_scale);
+
+struct QuantizeFuncParams {
+  QuantizeFuncParams(QuantizeFpFunc qF = NULL, QuantizeFpFunc qRefF = NULL,
+                     int count = 16)
+      : qFunc(qF), qFuncRef(qRefF), coeffCount(count) {}
+  QuantizeFpFunc qFunc;
+  QuantizeFpFunc qFuncRef;
+  int coeffCount;
+};
+
+using libaom_test::ACMRandom;
+
+const int numTests = 1000;
+const int maxSize = 1024;
+const int roundFactorRange = 127;
+const int dequantRange = 32768;
+const int coeffRange = (1 << 20) - 1;
+
+class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
+ public:
+  void RunQuantizeTest() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+    DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+    uint16_t eob;
+    uint16_t ref_eob;
+    int err_count_total = 0;
+    int first_failure = -1;
+    int skip_block = 0;
+    int count = params_.coeffCount;
+    const TX_SIZE txSize = getTxSize(count);
+    int log_scale = (txSize == TX_32X32);
+    QuantizeFpFunc quanFunc = params_.qFunc;
+    QuantizeFpFunc quanFuncRef = params_.qFuncRef;
+
+    const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize];
+    for (int i = 0; i < numTests; i++) {
+      int err_count = 0;
+      ref_eob = eob = -1;
+      for (int j = 0; j < count; j++) {
+        coeff_ptr[j] = rnd(coeffRange);
+      }
+
+      for (int j = 0; j < 2; j++) {
+        zbin_ptr[j] = rnd.Rand16();
+        quant_shift_ptr[j] = rnd.Rand16();
+        // int16_t positive
+        dequant_ptr[j] = abs(rnd(dequantRange));
+        quant_ptr[j] = (1 << 16) / dequant_ptr[j];
+        round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
+      }
+
+      quanFuncRef(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+                  quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
+                  &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
+
+      ASM_REGISTER_STATE_CHECK(
+          quanFunc(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+                   quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
+                   scanOrder.scan, scanOrder.iscan, log_scale));
+
+      for (int j = 0; j < count; ++j) {
+        err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+                     (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+        EXPECT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j]) << "qcoeff error: i = " << i
+                                                    << " j = " << j << "\n";
+        EXPECT_EQ(ref_dqcoeff_ptr[j], dqcoeff_ptr[j])
+            << "dqcoeff error: i = " << i << " j = " << j << "\n";
+      }
+      EXPECT_EQ(ref_eob, eob) << "eob error: "
+                              << "i = " << i << "\n";
+      err_count += (ref_eob != eob);
+      if (err_count && !err_count_total) {
+        first_failure = i;
+      }
+      err_count_total += err_count;
+    }
+    EXPECT_EQ(0, err_count_total)
+        << "Error: Quantization Test, C output doesn't match SSE2 output. "
+        << "First failed at test case " << first_failure;
+  }
+
+  void RunEobTest() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+    DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
+    DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+    uint16_t eob;
+    uint16_t ref_eob;
+    int skip_block = 0;
+    int count = params_.coeffCount;
+    const TX_SIZE txSize = getTxSize(count);
+    int log_scale = (txSize == TX_32X32);
+    QuantizeFpFunc quanFunc = params_.qFunc;
+    QuantizeFpFunc quanFuncRef = params_.qFuncRef;
+    const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize];
+
+    for (int i = 0; i < numTests; i++) {
+      ref_eob = eob = -1;
+      for (int j = 0; j < count; j++) {
+        coeff_ptr[j] = 0;
+      }
+
+      coeff_ptr[rnd(count)] = rnd(coeffRange);
+      coeff_ptr[rnd(count)] = rnd(coeffRange);
+      coeff_ptr[rnd(count)] = rnd(coeffRange);
+
+      for (int j = 0; j < 2; j++) {
+        zbin_ptr[j] = rnd.Rand16();
+        quant_shift_ptr[j] = rnd.Rand16();
+        // int16_t positive
+        dequant_ptr[j] = abs(rnd(dequantRange));
+        quant_ptr[j] = (1 << 16) / dequant_ptr[j];
+        round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
+      }
+
+      quanFuncRef(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+                  quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
+                  &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
+
+      ASM_REGISTER_STATE_CHECK(
+          quanFunc(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+                   quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
+                   scanOrder.scan, scanOrder.iscan, log_scale));
+      EXPECT_EQ(ref_eob, eob) << "eob error: "
+                              << "i = " << i << "\n";
+    }
+  }
+
+  virtual void SetUp() { params_ = GetParam(); }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+  virtual ~AV1QuantizeTest() {}
+
+ private:
+  TX_SIZE getTxSize(int count) {
+    switch (count) {
+      case 16: return TX_4X4;
+      case 64: return TX_8X8;
+      case 256: return TX_16X16;
+      case 1024: return TX_32X32;
+      default: return TX_4X4;
+    }
+  }
+
+  QuantizeFuncParams params_;
+};
+
+TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); }
+TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
+
+#if HAVE_SSE4_1
+#if !CONFIG_AOM_QM
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AV1QuantizeTest,
+    ::testing::Values(QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1,
+                                         &av1_highbd_quantize_fp_c, 16),
+                      QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1,
+                                         &av1_highbd_quantize_fp_c, 64),
+                      QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1,
+                                         &av1_highbd_quantize_fp_c, 256),
+                      QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1,
+                                         &av1_highbd_quantize_fp_c, 1024)));
+#endif  // !CONFIG_AOM_QM
+#endif  // HAVE_SSE4_1
+}  // namespace
diff --git a/third_party/aom/test/av1_txfm_test.cc b/third_party/aom/test/av1_txfm_test.cc
new file mode 100644
index 000000000..1e473b304
--- /dev/null
+++ b/third_party/aom/test/av1_txfm_test.cc
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdio.h>
+#include "test/av1_txfm_test.h"
+
+namespace libaom_test {
+
+int get_txfm1d_size(TX_SIZE tx_size) { return tx_size_wide[tx_size]; }
+
+void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) {
+  switch (txfm2d_type) {
+    case DCT_DCT:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_DCT;
+      break;
+    case ADST_DCT:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_DCT;
+      break;
+    case DCT_ADST:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_ADST;
+      break;
+    case ADST_ADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_ADST;
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_DCT;
+      break;
+    case DCT_FLIPADST:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_ADST;
+      break;
+    case FLIPADST_FLIPADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_ADST;
+      break;
+    case ADST_FLIPADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_ADST;
+      break;
+    case FLIPADST_ADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_ADST;
+      break;
+#endif  // CONFIG_EXT_TX
+    default:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_DCT;
+      assert(0);
+      break;
+  }
+}
+
+double invSqrt2 = 1 / pow(2, 0.5);
+
+void reference_dct_1d(const double *in, double *out, int size) {
+  for (int k = 0; k < size; ++k) {
+    out[k] = 0;
+    for (int n = 0; n < size; ++n) {
+      out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (2 * size));
+    }
+    if (k == 0) out[k] = out[k] * invSqrt2;
+  }
+}
+
+void reference_adst_1d(const double *in, double *out, int size) {
+  for (int k = 0; k < size; ++k) {
+    out[k] = 0;
+    for (int n = 0; n < size; ++n) {
+      out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
+    }
+  }
+}
+
+void reference_hybrid_1d(double *in, double *out, int size, int type) {
+  if (type == TYPE_DCT)
+    reference_dct_1d(in, out, size);
+  else
+    reference_adst_1d(in, out, size);
+}
+
+void reference_hybrid_2d(double *in, double *out, int size, int type0,
+                         int type1) {
+  double *tempOut = new double[size * size];
+
+  for (int r = 0; r < size; r++) {
+    // out ->tempOut
+    for (int c = 0; c < size; c++) {
+      tempOut[r * size + c] = in[c * size + r];
+    }
+  }
+
+  // dct each row: in -> out
+  for (int r = 0; r < size; r++) {
+    reference_hybrid_1d(tempOut + r * size, out + r * size, size, type0);
+  }
+
+  for (int r = 0; r < size; r++) {
+    // out ->tempOut
+    for (int c = 0; c < size; c++) {
+      tempOut[r * size + c] = out[c * size + r];
+    }
+  }
+
+  for (int r = 0; r < size; r++) {
+    reference_hybrid_1d(tempOut + r * size, out + r * size, size, type1);
+  }
+  delete[] tempOut;
+}
+
+template <typename Type>
+void fliplr(Type *dest, int stride, int length) {
+  int i, j;
+  for (i = 0; i < length; ++i) {
+    for (j = 0; j < length / 2; ++j) {
+      const Type tmp = dest[i * stride + j];
+      dest[i * stride + j] = dest[i * stride + length - 1 - j];
+      dest[i * stride + length - 1 - j] = tmp;
+    }
+  }
+}
+
+template <typename Type>
+void flipud(Type *dest, int stride, int length) {
+  int i, j;
+  for (j = 0; j < length; ++j) {
+    for (i = 0; i < length / 2; ++i) {
+      const Type tmp = dest[i * stride + j];
+      dest[i * stride + j] = dest[(length - 1 - i) * stride + j];
+      dest[(length - 1 - i) * stride + j] = tmp;
+    }
+  }
+}
+
+template <typename Type>
+void fliplrud(Type *dest, int stride, int length) {
+  int i, j;
+  for (i = 0; i < length / 2; ++i) {
+    for (j = 0; j < length; ++j) {
+      const Type tmp = dest[i * stride + j];
+      dest[i * stride + j] = dest[(length - 1 - i) * stride + length - 1 - j];
+      dest[(length - 1 - i) * stride + length - 1 - j] = tmp;
+    }
+  }
+}
+
+template void fliplr<double>(double *dest, int stride, int length);
+template void flipud<double>(double *dest, int stride, int length);
+template void fliplrud<double>(double *dest, int stride, int length);
+
+}  // namespace libaom_test
diff --git a/third_party/aom/test/av1_txfm_test.h b/third_party/aom/test/av1_txfm_test.h
new file mode 100644
index 000000000..70f971d09
--- /dev/null
+++ b/third_party/aom/test/av1_txfm_test.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_TXFM_TEST_H_
+#define AV1_TXFM_TEST_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef _MSC_VER
+#define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "av1/common/enums.h"
+#include "av1/common/av1_txfm.h"
+#include "./av1_rtcd.h"
+
+namespace libaom_test {
+typedef enum {
+  TYPE_DCT = 0,
+  TYPE_ADST,
+  TYPE_IDCT,
+  TYPE_IADST,
+  TYPE_LAST
+} TYPE_TXFM;
+
+int get_txfm1d_size(TX_SIZE tx_size);
+
+void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1);
+
+void reference_dct_1d(const double *in, double *out, int size);
+
+void reference_adst_1d(const double *in, double *out, int size);
+
+void reference_hybrid_1d(double *in, double *out, int size, int type);
+
+void reference_hybrid_2d(double *in, double *out, int size, int type0,
+                         int type1);
+template <typename Type1, typename Type2>
+static double compute_avg_abs_error(const Type1 *a, const Type2 *b,
+                                    const int size) {
+  double error = 0;
+  for (int i = 0; i < size; i++) {
+    error += fabs(static_cast<double>(a[i]) - static_cast<double>(b[i]));
+  }
+  error = error / size;
+  return error;
+}
+
+template <typename Type>
+void fliplr(Type *dest, int stride, int length);
+
+template <typename Type>
+void flipud(Type *dest, int stride, int length);
+
+template <typename Type>
+void fliplrud(Type *dest, int stride, int length);
+
+typedef void (*TxfmFunc)(const int32_t *in, int32_t *out, const int8_t *cos_bit,
+                         const int8_t *range_bit);
+
+typedef void (*Fwd_Txfm2d_Func)(const int16_t *, int32_t *, int, int, int);
+typedef void (*Inv_Txfm2d_Func)(const int32_t *, uint16_t *, int, int, int);
+
+static const int bd = 10;
+static const int input_base = (1 << bd);
+
+#if CONFIG_HIGHBITDEPTH
+#if CONFIG_AV1_ENCODER
+static const Fwd_Txfm2d_Func fwd_txfm_func_ls[TX_SIZES] = {
+#if CONFIG_CB4X4
+  NULL,
+#endif
+  av1_fwd_txfm2d_4x4_c, av1_fwd_txfm2d_8x8_c, av1_fwd_txfm2d_16x16_c,
+  av1_fwd_txfm2d_32x32_c
+};
+#endif
+
+static const Inv_Txfm2d_Func inv_txfm_func_ls[TX_SIZES] = {
+#if CONFIG_CB4X4
+  NULL,
+#endif
+  av1_inv_txfm2d_add_4x4_c, av1_inv_txfm2d_add_8x8_c,
+  av1_inv_txfm2d_add_16x16_c, av1_inv_txfm2d_add_32x32_c
+};
+#endif  // CONFIG_HIGHBITDEPTH
+
+}  // namespace libaom_test
+#endif  // AV1_TXFM_TEST_H_
diff --git a/third_party/aom/test/av1_wedge_utils_test.cc b/third_party/aom/test/av1_wedge_utils_test.cc
new file mode 100644
index 000000000..d4b560fc1
--- /dev/null
+++ b/third_party/aom/test/av1_wedge_utils_test.cc
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "./av1_rtcd.h"
+
+#include "aom_dsp/aom_dsp_common.h"
+
+#include "av1/common/enums.h"
+
+#include "test/acm_random.h"
+#include "test/function_equivalence_test.h"
+#include "test/register_state_check.h"
+
+#define WEDGE_WEIGHT_BITS 6
+#define MAX_MASK_VALUE (1 << (WEDGE_WEIGHT_BITS))
+
+using libaom_test::ACMRandom;
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+static const int16_t kInt13Max = (1 << 12) - 1;
+
+//////////////////////////////////////////////////////////////////////////////
+// av1_wedge_sse_from_residuals - functionality
+//////////////////////////////////////////////////////////////////////////////
+
+class WedgeUtilsSSEFuncTest : public testing::Test {
+ protected:
+  WedgeUtilsSSEFuncTest() : rng_(ACMRandom::DeterministicSeed()) {}
+
+  static const int kIterations = 1000;
+
+  ACMRandom rng_;
+};
+
+static void equiv_blend_residuals(int16_t *r, const int16_t *r0,
+                                  const int16_t *r1, const uint8_t *m, int N) {
+  for (int i = 0; i < N; i++) {
+    const int32_t m0 = m[i];
+    const int32_t m1 = MAX_MASK_VALUE - m0;
+    const int16_t R = m0 * r0[i] + m1 * r1[i];
+    // Note that this rounding is designed to match the result
+    // you would get when actually blending the 2 predictors and computing
+    // the residuals.
+    r[i] = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS);
+  }
+}
+
+static uint64_t equiv_sse_from_residuals(const int16_t *r0, const int16_t *r1,
+                                         const uint8_t *m, int N) {
+  uint64_t acc = 0;
+  for (int i = 0; i < N; i++) {
+    const int32_t m0 = m[i];
+    const int32_t m1 = MAX_MASK_VALUE - m0;
+    const int16_t R = m0 * r0[i] + m1 * r1[i];
+    const int32_t r = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS);
+    acc += r * r;
+  }
+  return acc;
+}
+
+TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingEquiv) {
+  DECLARE_ALIGNED(32, uint8_t, s[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, p0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, p1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, p[MAX_SB_SQUARE]);
+
+  DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r_ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r_tst[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      s[i] = rng_.Rand8();
+      m[i] = rng_(MAX_MASK_VALUE + 1);
+    }
+
+    const int w = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3);
+    const int h = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3);
+    const int N = w * h;
+
+    for (int j = 0; j < N; j++) {
+      p0[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX);
+      p1[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX);
+    }
+
+    aom_blend_a64_mask(p, w, p0, w, p1, w, m, w, h, w, 0, 0);
+
+    aom_subtract_block(h, w, r0, w, s, w, p0, w);
+    aom_subtract_block(h, w, r1, w, s, w, p1, w);
+
+    aom_subtract_block(h, w, r_ref, w, s, w, p, w);
+    equiv_blend_residuals(r_tst, r0, r1, m, N);
+
+    for (int i = 0; i < N; ++i) ASSERT_EQ(r_ref[i], r_tst[i]);
+
+    uint64_t ref_sse = aom_sum_squares_i16(r_ref, N);
+    uint64_t tst_sse = equiv_sse_from_residuals(r0, r1, m, N);
+
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+static uint64_t sse_from_residuals(const int16_t *r0, const int16_t *r1,
+                                   const uint8_t *m, int N) {
+  uint64_t acc = 0;
+  for (int i = 0; i < N; i++) {
+    const int32_t m0 = m[i];
+    const int32_t m1 = MAX_MASK_VALUE - m0;
+    const int32_t r = m0 * r0[i] + m1 * r1[i];
+    acc += r * r;
+  }
+  return ROUND_POWER_OF_TWO(acc, 2 * WEDGE_WEIGHT_BITS);
+}
+
+TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingMethod) {
+  DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      r1[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN;
+      d[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN;
+      m[i] = rng_(MAX_MASK_VALUE + 1);
+    }
+
+    const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
+
+    for (int i = 0; i < N; i++) r0[i] = r1[i] + d[i];
+
+    const uint64_t ref_res = sse_from_residuals(r0, r1, m, N);
+    const uint64_t tst_res = av1_wedge_sse_from_residuals(r1, d, m, N);
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// av1_wedge_sse_from_residuals - optimizations
+//////////////////////////////////////////////////////////////////////////////
+
+typedef uint64_t (*FSSE)(const int16_t *r1, const int16_t *d, const uint8_t *m,
+                         int N);
+typedef libaom_test::FuncParam<FSSE> TestFuncsFSSE;
+
+class WedgeUtilsSSEOptTest : public FunctionEquivalenceTest<FSSE> {
+ protected:
+  static const int kIterations = 10000;
+};
+
+TEST_P(WedgeUtilsSSEOptTest, RandomValues) {
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
+      d[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
+      m[i] = rng_(MAX_MASK_VALUE + 1);
+    }
+
+    const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
+
+    const uint64_t ref_res = params_.ref_func(r1, d, m, N);
+    uint64_t tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(WedgeUtilsSSEOptTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    if (rng_(2)) {
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = kInt13Max;
+    } else {
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = -kInt13Max;
+    }
+
+    if (rng_(2)) {
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = kInt13Max;
+    } else {
+      for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = -kInt13Max;
+    }
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE;
+
+    const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
+
+    const uint64_t ref_res = params_.ref_func(r1, d, m, N);
+    uint64_t tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, WedgeUtilsSSEOptTest,
+    ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c,
+                                    av1_wedge_sse_from_residuals_sse2)));
+
+#endif  // HAVE_SSE2
+
+//////////////////////////////////////////////////////////////////////////////
+// av1_wedge_sign_from_residuals
+//////////////////////////////////////////////////////////////////////////////
+
+typedef int (*FSign)(const int16_t *ds, const uint8_t *m, int N, int64_t limit);
+typedef libaom_test::FuncParam<FSign> TestFuncsFSign;
+
+class WedgeUtilsSignOptTest : public FunctionEquivalenceTest<FSign> {
+ protected:
+  static const int kIterations = 10000;
+  static const int kMaxSize = 8196;  // Size limited by SIMD implementation.
+};
+
+TEST_P(WedgeUtilsSignOptTest, RandomValues) {
+  DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      r0[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
+      r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
+      m[i] = rng_(MAX_MASK_VALUE + 1);
+    }
+
+    const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE);
+    const int N = 64 * (rng_(maxN / 64 - 1) + 1);
+
+    int64_t limit;
+    limit = (int64_t)aom_sum_squares_i16(r0, N);
+    limit -= (int64_t)aom_sum_squares_i16(r1, N);
+    limit *= (1 << WEDGE_WEIGHT_BITS) / 2;
+
+    for (int i = 0; i < N; i++)
+      ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX);
+
+    const int ref_res = params_.ref_func(ds, m, N, limit);
+    int tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(WedgeUtilsSignOptTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(4)) {
+      case 0:
+        for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+          r0[i] = 0;
+          r1[i] = kInt13Max;
+        }
+        break;
+      case 1:
+        for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+          r0[i] = kInt13Max;
+          r1[i] = 0;
+        }
+        break;
+      case 2:
+        for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+          r0[i] = 0;
+          r1[i] = -kInt13Max;
+        }
+        break;
+      default:
+        for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+          r0[i] = -kInt13Max;
+          r1[i] = 0;
+        }
+        break;
+    }
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE;
+
+    const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE);
+    const int N = 64 * (rng_(maxN / 64 - 1) + 1);
+
+    int64_t limit;
+    limit = (int64_t)aom_sum_squares_i16(r0, N);
+    limit -= (int64_t)aom_sum_squares_i16(r1, N);
+    limit *= (1 << WEDGE_WEIGHT_BITS) / 2;
+
+    for (int i = 0; i < N; i++)
+      ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX);
+
+    const int ref_res = params_.ref_func(ds, m, N, limit);
+    int tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+#if HAVE_SSE2
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, WedgeUtilsSignOptTest,
+    ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c,
+                                     av1_wedge_sign_from_residuals_sse2)));
+
+#endif  // HAVE_SSE2
+
+//////////////////////////////////////////////////////////////////////////////
+// av1_wedge_compute_delta_squares
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FDS)(int16_t *d, const int16_t *a, const int16_t *b, int N);
+typedef libaom_test::FuncParam<FDS> TestFuncsFDS;
+
+class WedgeUtilsDeltaSquaresOptTest : public FunctionEquivalenceTest<FDS> {
+ protected:
+  static const int kIterations = 10000;
+};
+
+TEST_P(WedgeUtilsDeltaSquaresOptTest, RandomValues) {
+  DECLARE_ALIGNED(32, int16_t, a[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, b[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d_ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int16_t, d_tst[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      a[i] = rng_.Rand16();
+      b[i] = rng_(2 * INT16_MAX + 1) - INT16_MAX;
+    }
+
+    const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
+
+    memset(&d_ref, INT16_MAX, sizeof(d_ref));
+    memset(&d_tst, INT16_MAX, sizeof(d_tst));
+
+    params_.ref_func(d_ref, a, b, N);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(d_tst, a, b, N));
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) ASSERT_EQ(d_ref[i], d_tst[i]);
+  }
+}
+
+#if HAVE_SSE2
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, WedgeUtilsDeltaSquaresOptTest,
+    ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c,
+                                   av1_wedge_compute_delta_squares_sse2)));
+
+#endif  // HAVE_SSE2
+
+}  // namespace
diff --git a/third_party/aom/test/avg_test.cc b/third_party/aom/test/avg_test.cc
new file mode 100644
index 000000000..b040f6a34
--- /dev/null
+++ b/third_party/aom/test/avg_test.cc
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "aom_mem/aom_mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+class AverageTestBase : public ::testing::Test {
+ public:
+  AverageTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kDataBlockSize));
+  }
+
+  static void TearDownTestCase() {
+    aom_free(source_data_);
+    source_data_ = NULL;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  // Handle blocks up to 4 blocks 64x64 with stride up to 128
+  static const int kDataAlignment = 16;
+  static const int kDataBlockSize = 64 * 128;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  // Sum Pixels
+  static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
+    unsigned int average = 0;
+    for (int h = 0; h < 8; ++h)
+      for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
+    return ((average + 32) >> 6);
+  }
+
+  static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
+    unsigned int average = 0;
+    for (int h = 0; h < 4; ++h)
+      for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
+    return ((average + 8) >> 4);
+  }
+
+  void FillConstant(uint8_t fill_constant) {
+    for (int i = 0; i < width_ * height_; ++i) {
+      source_data_[i] = fill_constant;
+    }
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < width_ * height_; ++i) {
+      source_data_[i] = rnd_.Rand8();
+    }
+  }
+
+  int width_, height_;
+  static uint8_t *source_data_;
+  int source_stride_;
+
+  ACMRandom rnd_;
+};
+typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
+
+typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;
+
+class AverageTest : public AverageTestBase,
+                    public ::testing::WithParamInterface<AvgFunc> {
+ public:
+  AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  void CheckAverages() {
+    const int block_size = GET_PARAM(3);
+    unsigned int expected = 0;
+    if (block_size == 8) {
+      expected =
+          ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_);
+    } else if (block_size == 4) {
+      expected =
+          ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_);
+    }
+
+    ASM_REGISTER_STATE_CHECK(
+        GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_));
+    unsigned int actual =
+        GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_);
+
+    EXPECT_EQ(expected, actual);
+  }
+};
+
+typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
+                              const int ref_stride, const int height);
+
+typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
+
+class IntProRowTest : public AverageTestBase,
+                      public ::testing::WithParamInterface<IntProRowParam> {
+ public:
+  IntProRowTest()
+      : AverageTestBase(16, GET_PARAM(0)), hbuf_asm_(NULL), hbuf_c_(NULL) {
+    asm_func_ = GET_PARAM(1);
+    c_func_ = GET_PARAM(2);
+  }
+
+ protected:
+  virtual void SetUp() {
+    hbuf_asm_ = reinterpret_cast<int16_t *>(
+        aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
+    hbuf_c_ = reinterpret_cast<int16_t *>(
+        aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16));
+  }
+
+  virtual void TearDown() {
+    aom_free(hbuf_c_);
+    hbuf_c_ = NULL;
+    aom_free(hbuf_asm_);
+    hbuf_asm_ = NULL;
+  }
+
+  void RunComparison() {
+    ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_));
+    ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_));
+    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16))
+        << "Output mismatch";
+  }
+
+ private:
+  IntProRowFunc asm_func_;
+  IntProRowFunc c_func_;
+  int16_t *hbuf_asm_;
+  int16_t *hbuf_c_;
+};
+
+typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
+
+typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
+
+class IntProColTest : public AverageTestBase,
+                      public ::testing::WithParamInterface<IntProColParam> {
+ public:
+  IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) {
+    asm_func_ = GET_PARAM(1);
+    c_func_ = GET_PARAM(2);
+  }
+
+ protected:
+  void RunComparison() {
+    ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_));
+    ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_));
+    EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch";
+  }
+
+ private:
+  IntProColFunc asm_func_;
+  IntProColFunc c_func_;
+  int16_t sum_asm_;
+  int16_t sum_c_;
+};
+
+typedef int (*SatdFunc)(const int16_t *coeffs, int length);
+typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
+
+class SatdTest : public ::testing::Test,
+                 public ::testing::WithParamInterface<SatdTestParam> {
+ protected:
+  virtual void SetUp() {
+    satd_size_ = GET_PARAM(0);
+    satd_func_ = GET_PARAM(1);
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(*src_) * satd_size_));
+    ASSERT_TRUE(src_ != NULL);
+  }
+
+  virtual void TearDown() {
+    libaom_test::ClearSystemState();
+    aom_free(src_);
+  }
+
+  void FillConstant(const int16_t val) {
+    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
+  }
+
+  void Check(int expected) {
+    int total;
+    ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_));
+    EXPECT_EQ(expected, total);
+  }
+
+  int satd_size_;
+
+ private:
+  int16_t *src_;
+  SatdFunc satd_func_;
+  ACMRandom rnd_;
+};
+
+uint8_t *AverageTestBase::source_data_ = NULL;
+
+TEST_P(AverageTest, MinValue) {
+  FillConstant(0);
+  CheckAverages();
+}
+
+TEST_P(AverageTest, MaxValue) {
+  FillConstant(255);
+  CheckAverages();
+}
+
+TEST_P(AverageTest, Random) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  for (int i = 0; i < 1000; i++) {
+    FillRandom();
+    CheckAverages();
+  }
+}
+
+TEST_P(IntProRowTest, MinValue) {
+  FillConstant(0);
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, MaxValue) {
+  FillConstant(255);
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, Random) {
+  FillRandom();
+  RunComparison();
+}
+
+TEST_P(IntProColTest, MinValue) {
+  FillConstant(0);
+  RunComparison();
+}
+
+TEST_P(IntProColTest, MaxValue) {
+  FillConstant(255);
+  RunComparison();
+}
+
+TEST_P(IntProColTest, Random) {
+  FillRandom();
+  RunComparison();
+}
+
+TEST_P(SatdTest, MinValue) {
+  const int kMin = -32640;
+  const int expected = -kMin * satd_size_;
+  FillConstant(kMin);
+  Check(expected);
+}
+
+TEST_P(SatdTest, MaxValue) {
+  const int kMax = 32640;
+  const int expected = kMax * satd_size_;
+  FillConstant(kMax);
+  Check(expected);
+}
+
+TEST_P(SatdTest, Random) {
+  int expected;
+  switch (satd_size_) {
+    case 16: expected = 205298; break;
+    case 64: expected = 1113950; break;
+    case 256: expected = 4268415; break;
+    case 1024: expected = 16954082; break;
+    default:
+      FAIL() << "Invalid satd size (" << satd_size_
+             << ") valid: 16/64/256/1024";
+  }
+  FillRandom();
+  Check(expected);
+}
+
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, AverageTest,
+    ::testing::Values(make_tuple(16, 16, 1, 8, &aom_avg_8x8_c),
+                      make_tuple(16, 16, 1, 4, &aom_avg_4x4_c)));
+
+INSTANTIATE_TEST_CASE_P(C, SatdTest,
+                        ::testing::Values(make_tuple(16, &aom_satd_c),
+                                          make_tuple(64, &aom_satd_c),
+                                          make_tuple(256, &aom_satd_c),
+                                          make_tuple(1024, &aom_satd_c)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AverageTest,
+    ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_sse2),
+                      make_tuple(16, 16, 5, 8, &aom_avg_8x8_sse2),
+                      make_tuple(32, 32, 15, 8, &aom_avg_8x8_sse2),
+                      make_tuple(16, 16, 0, 4, &aom_avg_4x4_sse2),
+                      make_tuple(16, 16, 5, 4, &aom_avg_4x4_sse2),
+                      make_tuple(32, 32, 15, 4, &aom_avg_4x4_sse2)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, IntProRowTest,
+    ::testing::Values(make_tuple(16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
+                      make_tuple(32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
+                      make_tuple(64, &aom_int_pro_row_sse2,
+                                 &aom_int_pro_row_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, IntProColTest,
+    ::testing::Values(make_tuple(16, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
+                      make_tuple(32, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
+                      make_tuple(64, &aom_int_pro_col_sse2,
+                                 &aom_int_pro_col_c)));
+
+INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
+                        ::testing::Values(make_tuple(16, &aom_satd_sse2),
+                                          make_tuple(64, &aom_satd_sse2),
+                                          make_tuple(256, &aom_satd_sse2),
+                                          make_tuple(1024, &aom_satd_sse2)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, AverageTest,
+    ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_neon),
+                      make_tuple(16, 16, 5, 8, &aom_avg_8x8_neon),
+                      make_tuple(32, 32, 15, 8, &aom_avg_8x8_neon),
+                      make_tuple(16, 16, 0, 4, &aom_avg_4x4_neon),
+                      make_tuple(16, 16, 5, 4, &aom_avg_4x4_neon),
+                      make_tuple(32, 32, 15, 4, &aom_avg_4x4_neon)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, IntProRowTest,
+    ::testing::Values(make_tuple(16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
+                      make_tuple(32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
+                      make_tuple(64, &aom_int_pro_row_neon,
+                                 &aom_int_pro_row_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, IntProColTest,
+    ::testing::Values(make_tuple(16, &aom_int_pro_col_neon, &aom_int_pro_col_c),
+                      make_tuple(32, &aom_int_pro_col_neon, &aom_int_pro_col_c),
+                      make_tuple(64, &aom_int_pro_col_neon,
+                                 &aom_int_pro_col_c)));
+
+INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
+                        ::testing::Values(make_tuple(16, &aom_satd_neon),
+                                          make_tuple(64, &aom_satd_neon),
+                                          make_tuple(256, &aom_satd_neon),
+                                          make_tuple(1024, &aom_satd_neon)));
+#endif
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, AverageTest,
+    ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_msa),
+                      make_tuple(16, 16, 5, 8, &aom_avg_8x8_msa),
+                      make_tuple(32, 32, 15, 8, &aom_avg_8x8_msa),
+                      make_tuple(16, 16, 0, 4, &aom_avg_4x4_msa),
+                      make_tuple(16, 16, 5, 4, &aom_avg_4x4_msa),
+                      make_tuple(32, 32, 15, 4, &aom_avg_4x4_msa)));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/binary_codes_test.cc b/third_party/aom/test/binary_codes_test.cc
new file mode 100644
index 000000000..385ec7687
--- /dev/null
+++ b/third_party/aom/test/binary_codes_test.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/bitreader.h"
+#include "aom_dsp/bitwriter.h"
+#include "aom_dsp/binary_codes_reader.h"
+#include "aom_dsp/binary_codes_writer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+// Test for Bilevel code with reference
+TEST(AV1, TestPrimitiveRefbilivel) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int kBufferSize = 65536;
+  aom_writer bw;
+  uint8_t bw_buffer[kBufferSize];
+  const uint16_t kRanges = 8;
+  const uint16_t kNearRanges = 8;
+  const uint16_t kReferences = 8;
+  const uint16_t kValues = 16;
+  const uint16_t range_vals[kRanges] = { 1, 13, 64, 120, 230, 420, 1100, 8000 };
+  uint16_t enc_values[kRanges][kNearRanges][kReferences][kValues][4];
+  aom_start_encode(&bw, bw_buffer);
+  for (int n = 0; n < kRanges; ++n) {
+    const uint16_t range = range_vals[n];
+    for (int p = 0; p < kNearRanges; ++p) {
+      const uint16_t near_range = 1 + rnd(range);
+      for (int r = 0; r < kReferences; ++r) {
+        const uint16_t ref = rnd(range);
+        for (int v = 0; v < kValues; ++v) {
+          const uint16_t value = rnd(range);
+          enc_values[n][p][r][v][0] = range;
+          enc_values[n][p][r][v][1] = near_range;
+          enc_values[n][p][r][v][2] = ref;
+          enc_values[n][p][r][v][3] = value;
+          aom_write_primitive_refbilevel(&bw, range, near_range, ref, value);
+        }
+      }
+    }
+  }
+  aom_stop_encode(&bw);
+  aom_reader br;
+  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+  GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
+  GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
+  for (int n = 0; n < kRanges; ++n) {
+    for (int p = 0; p < kNearRanges; ++p) {
+      for (int r = 0; r < kReferences; ++r) {
+        for (int v = 0; v < kValues; ++v) {
+          const uint16_t range = enc_values[n][p][r][v][0];
+          const uint16_t near_range = enc_values[n][p][r][v][1];
+          const uint16_t ref = enc_values[n][p][r][v][2];
+          const uint16_t value =
+              aom_read_primitive_refbilevel(&br, range, near_range, ref);
+          GTEST_ASSERT_EQ(value, enc_values[n][p][r][v][3]);
+        }
+      }
+    }
+  }
+}
+
+// Test for Finite subexponential code with reference
+TEST(AV1, TestPrimitiveRefsubexpfin) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int kBufferSize = 65536;
+  aom_writer bw;
+  uint8_t bw_buffer[kBufferSize];
+  const uint16_t kRanges = 8;
+  const uint16_t kSubexpParams = 6;
+  const uint16_t kReferences = 8;
+  const uint16_t kValues = 16;
+  uint16_t enc_values[kRanges][kSubexpParams][kReferences][kValues][4];
+  const uint16_t range_vals[kRanges] = { 1, 13, 64, 120, 230, 420, 1100, 8000 };
+  aom_start_encode(&bw, bw_buffer);
+  for (int n = 0; n < kRanges; ++n) {
+    const uint16_t range = range_vals[n];
+    for (int k = 0; k < kSubexpParams; ++k) {
+      for (int r = 0; r < kReferences; ++r) {
+        const uint16_t ref = rnd(range);
+        for (int v = 0; v < kValues; ++v) {
+          const uint16_t value = rnd(range);
+          enc_values[n][k][r][v][0] = range;
+          enc_values[n][k][r][v][1] = k;
+          enc_values[n][k][r][v][2] = ref;
+          enc_values[n][k][r][v][3] = value;
+          aom_write_primitive_refsubexpfin(&bw, range, k, ref, value);
+        }
+      }
+    }
+  }
+  aom_stop_encode(&bw);
+  aom_reader br;
+  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+  GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
+  GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
+  for (int n = 0; n < kRanges; ++n) {
+    for (int k = 0; k < kSubexpParams; ++k) {
+      for (int r = 0; r < kReferences; ++r) {
+        for (int v = 0; v < kValues; ++v) {
+          const uint16_t range = enc_values[n][k][r][v][0];
+          assert(k == enc_values[n][k][r][v][1]);
+          const uint16_t ref = enc_values[n][k][r][v][2];
+          const uint16_t value =
+              aom_read_primitive_refsubexpfin(&br, range, k, ref);
+          GTEST_ASSERT_EQ(value, enc_values[n][k][r][v][3]);
+        }
+      }
+    }
+  }
+}
+// TODO(debargha): Adds tests for other primitives
+}  // namespace
diff --git a/third_party/aom/test/blend_a64_mask_1d_test.cc b/third_party/aom/test/blend_a64_mask_1d_test.cc
new file mode 100644
index 000000000..66e741a74
--- /dev/null
+++ b/third_party/aom/test/blend_a64_mask_1d_test.cc
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+#include "test/function_equivalence_test.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "aom/aom_integer.h"
+
+#include "./av1_rtcd.h"
+
+#include "av1/common/enums.h"
+
+#include "aom_dsp/blend.h"
+
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+template <typename F, typename T>
+class BlendA64Mask1DTest : public FunctionEquivalenceTest<F> {
+ public:
+  static const int kIterations = 10000;
+  static const int kMaxWidth = MAX_SB_SIZE * 5;  // * 5 to cover longer strides
+  static const int kMaxHeight = MAX_SB_SIZE;
+  static const int kBufSize = kMaxWidth * kMaxHeight;
+  static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
+  static const int kMaxMaskSize = kMaxMaskWidth;
+
+  virtual ~BlendA64Mask1DTest() {}
+
+  virtual void Execute(const T *p_src0, const T *p_src1) = 0;
+
+  void Common() {
+    w_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1);
+    h_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1);
+
+    dst_offset_ = this->rng_(33);
+    dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    src0_offset_ = this->rng_(33);
+    src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    src1_offset_ = this->rng_(33);
+    src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    T *p_src0;
+    T *p_src1;
+
+    switch (this->rng_(3)) {
+      case 0:  // Separate sources
+        p_src0 = src0_;
+        p_src1 = src1_;
+        break;
+      case 1:  // src0 == dst
+        p_src0 = dst_tst_;
+        src0_stride_ = dst_stride_;
+        src0_offset_ = dst_offset_;
+        p_src1 = src1_;
+        break;
+      case 2:  // src1 == dst
+        p_src0 = src0_;
+        p_src1 = dst_tst_;
+        src1_stride_ = dst_stride_;
+        src1_offset_ = dst_offset_;
+        break;
+      default: FAIL();
+    }
+
+    Execute(p_src0, p_src1);
+
+    for (int r = 0; r < h_; ++r) {
+      for (int c = 0; c < w_; ++c) {
+        ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
+                  dst_tst_[dst_offset_ + r * dst_stride_ + c]);
+      }
+    }
+  }
+
+  T dst_ref_[kBufSize];
+  T dst_tst_[kBufSize];
+  uint32_t dst_stride_;
+  uint32_t dst_offset_;
+
+  T src0_[kBufSize];
+  uint32_t src0_stride_;
+  uint32_t src0_offset_;
+
+  T src1_[kBufSize];
+  uint32_t src1_stride_;
+  uint32_t src1_offset_;
+
+  uint8_t mask_[kMaxMaskSize];
+
+  int w_;
+  int h_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+                    uint32_t src0_stride, const uint8_t *src1,
+                    uint32_t src1_stride, const uint8_t *mask, int h, int w);
+typedef libaom_test::FuncParam<F8B> TestFuncs;
+
+class BlendA64Mask1DTest8B : public BlendA64Mask1DTest<F8B, uint8_t> {
+ protected:
+  void Execute(const uint8_t *p_src0, const uint8_t *p_src1) {
+    params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
+                     src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_,
+                     h_, w_);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(
+        dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
+        src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, h_, w_));
+  }
+};
+
+TEST_P(BlendA64Mask1DTest8B, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand8();
+      src1_[i] = rng_.Rand8();
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    Common();
+  }
+}
+
+TEST_P(BlendA64Mask1DTest8B, ExtremeValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_(2) + 254;
+      dst_tst_[i] = rng_(2) + 254;
+      src0_[i] = rng_(2) + 254;
+      src1_[i] = rng_(2) + 254;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
+
+    Common();
+  }
+}
+
+static void blend_a64_hmask_ref(uint8_t *dst, uint32_t dst_stride,
+                                const uint8_t *src0, uint32_t src0_stride,
+                                const uint8_t *src1, uint32_t src1_stride,
+                                const uint8_t *mask, int h, int w) {
+  uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize]
+                [BlendA64Mask1DTest8B::kMaxMaskSize];
+
+  for (int row = 0; row < h; ++row)
+    for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col];
+
+  aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
+                       &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, h, w,
+                       0, 0);
+}
+
+static void blend_a64_vmask_ref(uint8_t *dst, uint32_t dst_stride,
+                                const uint8_t *src0, uint32_t src0_stride,
+                                const uint8_t *src1, uint32_t src1_stride,
+                                const uint8_t *mask, int h, int w) {
+  uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize]
+                [BlendA64Mask1DTest8B::kMaxMaskSize];
+
+  for (int row = 0; row < h; ++row)
+    for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row];
+
+  aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
+                       &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, h, w,
+                       0, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, BlendA64Mask1DTest8B,
+    ::testing::Values(TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_c),
+                      TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_c)));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, BlendA64Mask1DTest8B,
+    ::testing::Values(
+        TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_sse4_1),
+        TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if CONFIG_HIGHBITDEPTH
+//////////////////////////////////////////////////////////////////////////////
+// High bit-depth version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+                     uint32_t src0_stride, const uint8_t *src1,
+                     uint32_t src1_stride, const uint8_t *mask, int h, int w,
+                     int bd);
+typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
+
+class BlendA64Mask1DTestHBD : public BlendA64Mask1DTest<FHBD, uint16_t> {
+ protected:
+  void Execute(const uint16_t *p_src0, const uint16_t *p_src1) {
+    params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
+                     CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
+                     CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
+                     mask_, h_, w_, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(
+        CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
+        CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
+        CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_, h_, w_,
+        bit_depth_));
+  }
+
+  int bit_depth_;
+};
+
+TEST_P(BlendA64Mask1DTestHBD, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+
+    const int hi = 1 << bit_depth_;
+
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_(hi);
+      dst_tst_[i] = rng_(hi);
+      src0_[i] = rng_(hi);
+      src1_[i] = rng_(hi);
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    Common();
+  }
+}
+
+TEST_P(BlendA64Mask1DTestHBD, ExtremeValues) {
+  for (int iter = 0; iter < 1000 && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+
+    const int hi = 1 << bit_depth_;
+    const int lo = hi - 2;
+
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_(hi - lo) + lo;
+      dst_tst_[i] = rng_(hi - lo) + lo;
+      src0_[i] = rng_(hi - lo) + lo;
+      src1_[i] = rng_(hi - lo) + lo;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
+
+    Common();
+  }
+}
+
+static void highbd_blend_a64_hmask_ref(
+    uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+    uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
+    const uint8_t *mask, int h, int w, int bd) {
+  uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize]
+                [BlendA64Mask1DTestHBD::kMaxMaskSize];
+
+  for (int row = 0; row < h; ++row)
+    for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col];
+
+  aom_highbd_blend_a64_mask_c(
+      dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0],
+      BlendA64Mask1DTestHBD::kMaxMaskSize, h, w, 0, 0, bd);
+}
+
+static void highbd_blend_a64_vmask_ref(
+    uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+    uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
+    const uint8_t *mask, int h, int w, int bd) {
+  uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize]
+                [BlendA64Mask1DTestHBD::kMaxMaskSize];
+
+  for (int row = 0; row < h; ++row)
+    for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row];
+
+  aom_highbd_blend_a64_mask_c(
+      dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0],
+      BlendA64Mask1DTestHBD::kMaxMaskSize, h, w, 0, 0, bd);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, BlendA64Mask1DTestHBD,
+    ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref,
+                                   aom_highbd_blend_a64_hmask_c),
+                      TestFuncsHBD(highbd_blend_a64_vmask_ref,
+                                   aom_highbd_blend_a64_vmask_c)));
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, BlendA64Mask1DTestHBD,
+    ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref,
+                                   aom_highbd_blend_a64_hmask_sse4_1),
+                      TestFuncsHBD(highbd_blend_a64_vmask_ref,
+                                   aom_highbd_blend_a64_vmask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/blend_a64_mask_test.cc b/third_party/aom/test/blend_a64_mask_test.cc
new file mode 100644
index 000000000..fef124d34
--- /dev/null
+++ b/third_party/aom/test/blend_a64_mask_test.cc
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+#include "test/function_equivalence_test.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "aom/aom_integer.h"
+
+#include "./av1_rtcd.h"
+
+#include "av1/common/enums.h"
+
+#include "aom_dsp/blend.h"
+
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+template <typename F, typename T>
+class BlendA64MaskTest : public FunctionEquivalenceTest<F> {
+ protected:
+  static const int kIterations = 10000;
+  static const int kMaxWidth = MAX_SB_SIZE * 5;  // * 5 to cover longer strides
+  static const int kMaxHeight = MAX_SB_SIZE;
+  static const int kBufSize = kMaxWidth * kMaxHeight;
+  static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
+  static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth;
+
+  virtual ~BlendA64MaskTest() {}
+
+  virtual void Execute(const T *p_src0, const T *p_src1) = 0;
+
+  void Common() {
+    w_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1);
+    h_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1);
+
+    subx_ = this->rng_(2);
+    suby_ = this->rng_(2);
+
+    dst_offset_ = this->rng_(33);
+    dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    src0_offset_ = this->rng_(33);
+    src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    src1_offset_ = this->rng_(33);
+    src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
+
+    mask_stride_ =
+        this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1);
+
+    T *p_src0;
+    T *p_src1;
+
+    switch (this->rng_(3)) {
+      case 0:  // Separate sources
+        p_src0 = src0_;
+        p_src1 = src1_;
+        break;
+      case 1:  // src0 == dst
+        p_src0 = dst_tst_;
+        src0_stride_ = dst_stride_;
+        src0_offset_ = dst_offset_;
+        p_src1 = src1_;
+        break;
+      case 2:  // src1 == dst
+        p_src0 = src0_;
+        p_src1 = dst_tst_;
+        src1_stride_ = dst_stride_;
+        src1_offset_ = dst_offset_;
+        break;
+      default: FAIL();
+    }
+
+    Execute(p_src0, p_src1);
+
+    for (int r = 0; r < h_; ++r) {
+      for (int c = 0; c < w_; ++c) {
+        ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
+                  dst_tst_[dst_offset_ + r * dst_stride_ + c]);
+      }
+    }
+  }
+
+  T dst_ref_[kBufSize];
+  T dst_tst_[kBufSize];
+  uint32_t dst_stride_;
+  uint32_t dst_offset_;
+
+  T src0_[kBufSize];
+  uint32_t src0_stride_;
+  uint32_t src0_offset_;
+
+  T src1_[kBufSize];
+  uint32_t src1_stride_;
+  uint32_t src1_offset_;
+
+  uint8_t mask_[kMaxMaskSize];
+  size_t mask_stride_;
+
+  int w_;
+  int h_;
+
+  int suby_;
+  int subx_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+                    uint32_t src0_stride, const uint8_t *src1,
+                    uint32_t src1_stride, const uint8_t *mask,
+                    uint32_t mask_stride, int h, int w, int suby, int subx);
+typedef libaom_test::FuncParam<F8B> TestFuncs;
+
+class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t> {
+ protected:
+  void Execute(const uint8_t *p_src0, const uint8_t *p_src1) {
+    params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
+                     src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_,
+                     kMaxMaskWidth, h_, w_, suby_, subx_);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(
+        dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
+        src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, kMaxMaskWidth,
+        h_, w_, suby_, subx_));
+  }
+};
+
+TEST_P(BlendA64MaskTest8B, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand8();
+      src1_[i] = rng_.Rand8();
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    Common();
+  }
+}
+
+TEST_P(BlendA64MaskTest8B, ExtremeValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_(2) + 254;
+      dst_tst_[i] = rng_(2) + 254;
+      src0_[i] = rng_(2) + 254;
+      src1_[i] = rng_(2) + 254;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
+
+    Common();
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(SSE4_1, BlendA64MaskTest8B,
+                        ::testing::Values(TestFuncs(
+                            aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if CONFIG_HIGHBITDEPTH
+//////////////////////////////////////////////////////////////////////////////
+// High bit-depth version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
+                     uint32_t src0_stride, const uint8_t *src1,
+                     uint32_t src1_stride, const uint8_t *mask,
+                     uint32_t mask_stride, int h, int w, int suby, int subx,
+                     int bd);
+typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
+
+class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t> {
+ protected:
+  void Execute(const uint16_t *p_src0, const uint16_t *p_src1) {
+    params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
+                     CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
+                     CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
+                     mask_, kMaxMaskWidth, h_, w_, suby_, subx_, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(
+        CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
+        CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
+        CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_,
+        kMaxMaskWidth, h_, w_, suby_, subx_, bit_depth_));
+  }
+
+  int bit_depth_;
+};
+
+TEST_P(BlendA64MaskTestHBD, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+
+    const int hi = 1 << bit_depth_;
+
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_(hi);
+      dst_tst_[i] = rng_(hi);
+      src0_[i] = rng_(hi);
+      src1_[i] = rng_(hi);
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    Common();
+  }
+}
+
+TEST_P(BlendA64MaskTestHBD, ExtremeValues) {
+  for (int iter = 0; iter < 1000 && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+
+    const int hi = 1 << bit_depth_;
+    const int lo = hi - 2;
+
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_(hi - lo) + lo;
+      dst_tst_[i] = rng_(hi - lo) + lo;
+      src0_[i] = rng_(hi - lo) + lo;
+      src1_[i] = rng_(hi - lo) + lo;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
+
+    Common();
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, BlendA64MaskTestHBD,
+    ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
+                                   aom_highbd_blend_a64_mask_sse4_1)));
+#endif  // HAVE_SSE4_1
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/boolcoder_test.cc b/third_party/aom/test/boolcoder_test.cc
new file mode 100644
index 000000000..4d9d7aaf4
--- /dev/null
+++ b/third_party/aom/test/boolcoder_test.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/bitreader.h"
+#include "aom_dsp/bitwriter.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+const int num_tests = 10;
+}  // namespace
+
+TEST(AV1, TestBitIO) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int n = 0; n < num_tests; ++n) {
+    for (int method = 0; method <= 7; ++method) {  // we generate various proba
+      const int kBitsToTest = 1000;
+      uint8_t probas[kBitsToTest];
+
+      for (int i = 0; i < kBitsToTest; ++i) {
+        const int parity = i & 1;
+        /* clang-format off */
+        probas[i] =
+          (method == 0) ? 0 : (method == 1) ? 255 :
+          (method == 2) ? 128 :
+          (method == 3) ? rnd.Rand8() :
+          (method == 4) ? (parity ? 0 : 255) :
+            // alternate between low and high proba:
+            (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :
+            (method == 6) ?
+            (parity ? rnd(64) : 255 - rnd(64)) :
+            (parity ? rnd(32) : 255 - rnd(32));
+        /* clang-format on */
+      }
+      for (int bit_method = 0; bit_method <= 3; ++bit_method) {
+        const int random_seed = 6432;
+        const int kBufferSize = 10000;
+        ACMRandom bit_rnd(random_seed);
+        aom_writer bw;
+        uint8_t bw_buffer[kBufferSize];
+        aom_start_encode(&bw, bw_buffer);
+
+        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
+        for (int i = 0; i < kBitsToTest; ++i) {
+          if (bit_method == 2) {
+            bit = (i & 1);
+          } else if (bit_method == 3) {
+            bit = bit_rnd(2);
+          }
+          aom_write(&bw, bit, static_cast<int>(probas[i]));
+        }
+
+        aom_stop_encode(&bw);
+
+#if !CONFIG_DAALA_EC
+        // First bit should be zero
+        GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
+#endif
+
+        aom_reader br;
+        aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+        bit_rnd.Reset(random_seed);
+        for (int i = 0; i < kBitsToTest; ++i) {
+          if (bit_method == 2) {
+            bit = (i & 1);
+          } else if (bit_method == 3) {
+            bit = bit_rnd(2);
+          }
+          GTEST_ASSERT_EQ(aom_read(&br, probas[i], NULL), bit)
+              << "pos: " << i << " / " << kBitsToTest
+              << " bit_method: " << bit_method << " method: " << method;
+        }
+      }
+    }
+  }
+}
+
+#if CONFIG_DAALA_EC
+#define FRAC_DIFF_TOTAL_ERROR 0.07
+#else
+#define FRAC_DIFF_TOTAL_ERROR 0.2
+#endif
+
+TEST(AV1, TestTell) {
+  const int kBufferSize = 10000;
+  aom_writer bw;
+  uint8_t bw_buffer[kBufferSize];
+  const int kSymbols = 1024;
+  // Coders are noisier at low probabilities, so we start at p = 4.
+  for (int p = 4; p < 256; p++) {
+    double probability = p / 256.;
+    aom_start_encode(&bw, bw_buffer);
+    for (int i = 0; i < kSymbols; i++) {
+      aom_write(&bw, 0, p);
+    }
+    aom_stop_encode(&bw);
+    aom_reader br;
+    aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+    uint32_t last_tell = aom_reader_tell(&br);
+    uint32_t last_tell_frac = aom_reader_tell_frac(&br);
+    double frac_diff_total = 0;
+    GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
+    GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
+    for (int i = 0; i < kSymbols; i++) {
+      aom_read(&br, p, NULL);
+      uint32_t tell = aom_reader_tell(&br);
+      uint32_t tell_frac = aom_reader_tell_frac(&br);
+      GTEST_ASSERT_GE(tell, last_tell) << "tell: " << tell
+                                       << ", last_tell: " << last_tell;
+      GTEST_ASSERT_GE(tell_frac, last_tell_frac)
+          << "tell_frac: " << tell_frac
+          << ", last_tell_frac: " << last_tell_frac;
+      // Frac tell should round up to tell.
+      GTEST_ASSERT_EQ(tell, (tell_frac + 7) >> 3);
+      last_tell = tell;
+      frac_diff_total +=
+          fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability));
+      last_tell_frac = tell_frac;
+    }
+    const uint32_t expected = (uint32_t)(-kSymbols * log2(probability));
+    // Last tell should be close to the expected value.
+    GTEST_ASSERT_LE(last_tell, expected + 20) << " last_tell: " << last_tell;
+    // The average frac_diff error should be pretty small.
+    GTEST_ASSERT_LE(frac_diff_total / kSymbols, FRAC_DIFF_TOTAL_ERROR)
+        << " frac_diff_total: " << frac_diff_total;
+  }
+}
diff --git a/third_party/aom/test/borders_test.cc b/third_party/aom/test/borders_test.cc
new file mode 100644
index 000000000..076f91404
--- /dev/null
+++ b/third_party/aom/test/borders_test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class BordersTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> {
+ protected:
+  BordersTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~BordersTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, 1);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (pkt->data.frame.flags & AOM_FRAME_IS_KEY) {
+    }
+  }
+};
+
+TEST_P(BordersTest, TestEncodeHighBitrate) {
+  // Validate that this non multiple of 64 wide clip encodes and decodes
+  // without a mismatch when passing in a very low max q.  This pushes
+  // the encoder to producing lots of big partitions which will likely
+  // extend into the border and test the border condition.
+  cfg_.g_lag_in_frames = 25;
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.rc_max_quantizer = 10;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       40);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+TEST_P(BordersTest, TestLowBitrate) {
+  // Validate that this clip encodes and decodes without a mismatch
+  // when passing in a very high min q.  This pushes the encoder to producing
+  // lots of small partitions which might will test the other condition.
+
+  cfg_.g_lag_in_frames = 25;
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 200;
+  cfg_.rc_min_quantizer = 40;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       40);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_CASE(BordersTest,
+                          ::testing::Values(::libaom_test::kTwoPassGood));
+}  // namespace
diff --git a/third_party/aom/test/clear_system_state.h b/third_party/aom/test/clear_system_state.h
new file mode 100644
index 000000000..4f3c1eed0
--- /dev/null
+++ b/third_party/aom/test/clear_system_state.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_CLEAR_SYSTEM_STATE_H_
+#define TEST_CLEAR_SYSTEM_STATE_H_
+
+#include "./aom_config.h"
+#if ARCH_X86 || ARCH_X86_64
+#include "aom_ports/x86.h"
+#endif
+
+namespace libaom_test {
+
+// Reset system to a known state. This function should be used for all non-API
+// test cases.
+inline void ClearSystemState() {
+#if ARCH_X86 || ARCH_X86_64
+  aom_reset_mmx_state();
+#endif
+}
+
+}  // namespace libaom_test
+#endif  // TEST_CLEAR_SYSTEM_STATE_H_
diff --git a/third_party/aom/test/clpf_test.cc b/third_party/aom/test/clpf_test.cc
new file mode 100644
index 000000000..2c0f8cf7f
--- /dev/null
+++ b/third_party/aom/test/clpf_test.cc
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/common/od_dering.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+typedef void (*clpf_block_t)(uint8_t *dst, const uint16_t *src, int dstride,
+                             int sstride, int sizex, int sizey,
+                             unsigned int strength, unsigned int bitdepth);
+
+typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int>
+    clpf_block_param_t;
+
+class CDEFClpfBlockTest : public ::testing::TestWithParam<clpf_block_param_t> {
+ public:
+  virtual ~CDEFClpfBlockTest() {}
+  virtual void SetUp() {
+    clpf = GET_PARAM(0);
+    ref_clpf = GET_PARAM(1);
+    sizex = GET_PARAM(2);
+    sizey = GET_PARAM(3);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int sizex;
+  int sizey;
+  clpf_block_t clpf;
+  clpf_block_t ref_clpf;
+};
+
+typedef CDEFClpfBlockTest CDEFClpfSpeedTest;
+
+#if CONFIG_HIGHBITDEPTH
+typedef void (*clpf_block_hbd_t)(uint16_t *dst, const uint16_t *src,
+                                 int dstride, int sstride, int sizex, int sizey,
+                                 unsigned int strength, unsigned int bitdepth);
+
+typedef std::tr1::tuple<clpf_block_hbd_t, clpf_block_hbd_t, int, int>
+    clpf_block_hbd_param_t;
+
+class CDEFClpfBlockHbdTest
+    : public ::testing::TestWithParam<clpf_block_hbd_param_t> {
+ public:
+  virtual ~CDEFClpfBlockHbdTest() {}
+  virtual void SetUp() {
+    clpf = GET_PARAM(0);
+    ref_clpf = GET_PARAM(1);
+    sizex = GET_PARAM(2);
+    sizey = GET_PARAM(3);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int sizex;
+  int sizey;
+  clpf_block_hbd_t clpf;
+  clpf_block_hbd_t ref_clpf;
+};
+
+typedef CDEFClpfBlockHbdTest ClpfHbdSpeedTest;
+#endif
+
+template <typename pixel>
+void test_clpf(int w, int h, unsigned int depth, unsigned int iterations,
+               void (*clpf)(pixel *dst, const uint16_t *src, int dstride,
+                            int sstride, int sizex, int sizey,
+                            unsigned int strength, unsigned int bitdepth),
+               void (*ref_clpf)(pixel *dst, const uint16_t *src, int dstride,
+                                int sstride, int sizex, int sizey,
+                                unsigned int strength, unsigned int bitdepth)) {
+  const int size = 24;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, s[size * size]);
+  DECLARE_ALIGNED(16, pixel, d[size * size]);
+  DECLARE_ALIGNED(16, pixel, ref_d[size * size]);
+  memset(ref_d, 0, size * size * sizeof(*ref_d));
+  memset(d, 0, size * size * sizeof(*d));
+
+  int error = 0, pos = 0, xpos = 8, ypos = 8;
+  unsigned int strength = 0, bits, level, count, damp = 0, boundary = 0;
+
+  assert(size >= w + 16 && size >= h + 16);
+  assert(depth >= 8);
+
+  // Test every combination of:
+  // * Input with up to <depth> bits of noise
+  // * Noise level around every value from 0 to (1<<depth)-1
+  // * All strengths
+  // * All dampings
+  // * Boundaries
+  // If clpf and ref_clpf are the same, we're just testing speed
+  for (boundary = 0; boundary < 16; boundary++) {
+    for (count = 0; count < iterations; count++) {
+      for (level = 0; level < (1U << depth) && !error;
+           level += (1 + 4 * !!boundary) << (depth - 8)) {
+        for (bits = 1; bits <= depth && !error; bits++) {
+          for (damp = 4 + depth - 8; damp < depth - 1 && !error; damp++) {
+            for (int i = 0; i < size * size; i++)
+              s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                           (1 << depth) - 1);
+            if (boundary) {
+              if (boundary & 1) {  // Left
+                for (int i = 0; i < size; i++)
+                  for (int j = 0; j < xpos; j++)
+                    s[i * size + j] = OD_DERING_VERY_LARGE;
+              }
+              if (boundary & 2) {  // Right
+                for (int i = 0; i < size; i++)
+                  for (int j = xpos + w; j < size; j++)
+                    s[i * size + j] = OD_DERING_VERY_LARGE;
+              }
+              if (boundary & 4) {  // Above
+                for (int i = 0; i < ypos; i++)
+                  for (int j = 0; j < size; j++)
+                    s[i * size + j] = OD_DERING_VERY_LARGE;
+              }
+              if (boundary & 8) {  // Below
+                for (int i = ypos + h; i < size; i++)
+                  for (int j = 0; j < size; j++)
+                    s[i * size + j] = OD_DERING_VERY_LARGE;
+              }
+            }
+            for (strength = depth - 8; strength < depth - 5 && !error;
+                 strength += !error) {
+              ref_clpf(ref_d + ypos * size + xpos, s + ypos * size + xpos, size,
+                       size, w, h, 1 << strength, damp);
+              if (clpf != ref_clpf)
+                ASM_REGISTER_STATE_CHECK(clpf(d + ypos * size + xpos,
+                                              s + ypos * size + xpos, size,
+                                              size, w, h, 1 << strength, damp));
+              if (ref_clpf != clpf) {
+                for (pos = 0; pos < size * size && !error; pos++) {
+                  error = ref_d[pos] != d[pos];
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  pos--;
+  EXPECT_EQ(0, error)
+      << "Error: CDEFClpfBlockTest, SIMD and C mismatch." << std::endl
+      << "First error at " << pos % size << "," << pos / size << " ("
+      << (int16_t)ref_d[pos] << " != " << (int16_t)d[pos] << ") " << std::endl
+      << "strength: " << (1 << strength) << std::endl
+      << "damping: " << damp << std::endl
+      << "depth: " << depth << std::endl
+      << "boundary: " << boundary << std::endl
+      << "w: " << w << std::endl
+      << "h: " << h << std::endl
+      << "A=" << (pos > 2 * size ? (int16_t)s[pos - 2 * size] : -1) << std::endl
+      << "B=" << (pos > size ? (int16_t)s[pos - size] : -1) << std::endl
+      << "C=" << (pos % size - 2 >= 0 ? (int16_t)s[pos - 2] : -1) << std::endl
+      << "D=" << (pos % size - 1 >= 0 ? (int16_t)s[pos - 1] : -1) << std::endl
+      << "X=" << (int16_t)s[pos] << std::endl
+      << "E=" << (pos % size + 1 < size ? (int16_t)s[pos + 1] : -1) << std::endl
+      << "F=" << (pos % size + 2 < size ? (int16_t)s[pos + 2] : -1) << std::endl
+      << "G=" << (pos + size < size * size ? (int16_t)s[pos + size] : -1)
+      << std::endl
+      << "H="
+      << (pos + 2 * size < size * size ? (int16_t)s[pos + 2 * size] : -1)
+      << std::endl;
+}
+
+template <typename pixel>
+void test_clpf_speed(int w, int h, unsigned int depth, unsigned int iterations,
+                     void (*clpf)(pixel *dst, const uint16_t *src, int dstride,
+                                  int sstride, int sizex, int sizey,
+                                  unsigned int strength, unsigned int bitdepth),
+                     void (*ref_clpf)(pixel *dst, const uint16_t *src,
+                                      int dstride, int sstride, int sizex,
+                                      int sizey, unsigned int strength,
+                                      unsigned int bitdepth)) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&ref_timer);
+  test_clpf(w, h, depth, iterations, ref_clpf, ref_clpf);
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  test_clpf(w, h, depth, iterations, clpf, clpf);
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+
+#if 0
+  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
+            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
+#endif
+
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CDEFClpfSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+TEST_P(CDEFClpfBlockTest, TestSIMDNoMismatch) {
+  test_clpf(sizex, sizey, 8, 1, clpf, ref_clpf);
+}
+
+TEST_P(CDEFClpfSpeedTest, DISABLED_TestSpeed) {
+  test_clpf_speed(sizex, sizey, 8, 16, clpf, ref_clpf);
+}
+
+#if CONFIG_HIGHBITDEPTH
+TEST_P(CDEFClpfBlockHbdTest, TestSIMDNoMismatch) {
+  test_clpf(sizex, sizey, 12, 1, clpf, ref_clpf);
+}
+
+TEST_P(ClpfHbdSpeedTest, DISABLED_TestSpeed) {
+  test_clpf_speed(sizex, sizey, 12, 4, clpf, ref_clpf);
+}
+#endif
+
+using std::tr1::make_tuple;
+
+// VS compiling for 32 bit targets does not support vector types in
+// structs as arguments, which makes the v256 type of the intrinsics
+// hard to support, so optimizations for this target are disabled.
+#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
+// Test all supported architectures and block sizes
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, CDEFClpfBlockTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
+        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 4),
+        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 8),
+        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 4),
+        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 4),
+        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 8),
+        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 4)));
+#endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, CDEFClpfBlockTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 8),
+        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 4),
+        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 8),
+        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 4),
+        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 4),
+        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 8),
+        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 4)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, CDEFClpfBlockTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 8),
+        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 4),
+        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 8),
+        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 4),
+        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 4),
+        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 8),
+        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 4)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, CDEFClpfBlockTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
+        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 4),
+        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 8),
+        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 4),
+        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 4),
+        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 8),
+        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 4)));
+#endif
+
+#if CONFIG_HIGHBITDEPTH
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, CDEFClpfBlockHbdTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 4),
+        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 8),
+        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 4),
+        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 4),
+        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 8),
+        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 4)));
+#endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, CDEFClpfBlockHbdTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 4),
+        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 8),
+        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 4),
+        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 4),
+        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 8),
+        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 4)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, CDEFClpfBlockHbdTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 4),
+        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 8),
+        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 4),
+        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 4),
+        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 8),
+        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 4)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, CDEFClpfBlockHbdTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 4),
+        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 8),
+        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 4),
+        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 4),
+        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 8),
+        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 4)));
+#endif
+#endif  // CONFIG_HIGHBITDEPTH
+
+// Test speed for all supported architectures
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, CDEFClpfSpeedTest,
+    ::testing::Values(make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
+                      make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8,
+                                 8)));
+#endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFClpfSpeedTest,
+                        ::testing::Values(make_tuple(&aom_clpf_block_ssse3,
+                                                     &aom_clpf_block_c, 8, 8),
+                                          make_tuple(&aom_clpf_hblock_ssse3,
+                                                     &aom_clpf_hblock_c, 8,
+                                                     8)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFClpfSpeedTest,
+                        ::testing::Values(make_tuple(&aom_clpf_block_sse4_1,
+                                                     &aom_clpf_block_c, 8, 8),
+                                          make_tuple(&aom_clpf_hblock_sse4_1,
+                                                     &aom_clpf_hblock_c, 8,
+                                                     8)));
+
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, CDEFClpfSpeedTest,
+    ::testing::Values(make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
+                      make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8,
+                                 8)));
+#endif
+
+#if CONFIG_HIGHBITDEPTH
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, ClpfHbdSpeedTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8)));
+#endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, ClpfHbdSpeedTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, ClpfHbdSpeedTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, ClpfHbdSpeedTest,
+    ::testing::Values(
+        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
+        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8)));
+#endif
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // defined(_WIN64) || !defined(_MSC_VER)
+
+}  // namespace
diff --git a/third_party/aom/test/codec_factory.h b/third_party/aom/test/codec_factory.h
new file mode 100644
index 000000000..d2f20b832
--- /dev/null
+++ b/third_party/aom/test/codec_factory.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_CODEC_FACTORY_H_
+#define TEST_CODEC_FACTORY_H_
+
+#include "./aom_config.h"
+#include "aom/aom_decoder.h"
+#include "aom/aom_encoder.h"
+#if CONFIG_AV1_ENCODER
+#include "aom/aomcx.h"
+#endif
+#if CONFIG_AV1_DECODER
+#include "aom/aomdx.h"
+#endif
+
+#include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+namespace libaom_test {
+
+const int kCodecFactoryParam = 0;
+
+class CodecFactory {
+ public:
+  CodecFactory() {}
+
+  virtual ~CodecFactory() {}
+
+  virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const = 0;
+
+  virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg,
+                                 const aom_codec_flags_t flags) const = 0;
+
+  virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg,
+                                 unsigned long deadline,
+                                 const unsigned long init_flags,
+                                 TwopassStatsStore *stats) const = 0;
+
+  virtual aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg,
+                                               int usage) const = 0;
+};
+
+/* Provide CodecTestWith<n>Params classes for a variable number of parameters
+ * to avoid having to include a pointer to the CodecFactory in every test
+ * definition.
+ */
+template <class T1>
+class CodecTestWithParam
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<const libaom_test::CodecFactory *, T1> > {};
+
+template <class T1, class T2>
+class CodecTestWith2Params
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<const libaom_test::CodecFactory *, T1, T2> > {};
+
+template <class T1, class T2, class T3>
+class CodecTestWith3Params
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<const libaom_test::CodecFactory *, T1, T2, T3> > {};
+
+/*
+ * AV1 Codec Definitions
+ */
+#if CONFIG_AV1
+class AV1Decoder : public Decoder {
+ public:
+  explicit AV1Decoder(aom_codec_dec_cfg_t cfg) : Decoder(cfg) {}
+
+  AV1Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag)
+      : Decoder(cfg, flag) {}
+
+ protected:
+  virtual aom_codec_iface_t *CodecInterface() const {
+#if CONFIG_AV1_DECODER
+    return &aom_codec_av1_dx_algo;
+#else
+    return NULL;
+#endif
+  }
+};
+
+class AV1Encoder : public Encoder {
+ public:
+  AV1Encoder(aom_codec_enc_cfg_t cfg, unsigned long deadline,
+             const unsigned long init_flags, TwopassStatsStore *stats)
+      : Encoder(cfg, deadline, init_flags, stats) {}
+
+ protected:
+  virtual aom_codec_iface_t *CodecInterface() const {
+#if CONFIG_AV1_ENCODER
+    return &aom_codec_av1_cx_algo;
+#else
+    return NULL;
+#endif
+  }
+};
+
+class AV1CodecFactory : public CodecFactory {
+ public:
+  AV1CodecFactory() : CodecFactory() {}
+
+  virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const {
+    return CreateDecoder(cfg, 0);
+  }
+
+  virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg,
+                                 const aom_codec_flags_t flags) const {
+#if CONFIG_AV1_DECODER
+    return new AV1Decoder(cfg, flags);
+#else
+    (void)cfg;
+    (void)flags;
+    return NULL;
+#endif
+  }
+
+  virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg,
+                                 unsigned long deadline,
+                                 const unsigned long init_flags,
+                                 TwopassStatsStore *stats) const {
+#if CONFIG_AV1_ENCODER
+    return new AV1Encoder(cfg, deadline, init_flags, stats);
+#else
+    (void)cfg;
+    (void)deadline;
+    (void)init_flags;
+    (void)stats;
+    return NULL;
+#endif
+  }
+
+  virtual aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg,
+                                               int usage) const {
+#if CONFIG_AV1_ENCODER
+    return aom_codec_enc_config_default(&aom_codec_av1_cx_algo, cfg, usage);
+#else
+    (void)cfg;
+    (void)usage;
+    return AOM_CODEC_INCAPABLE;
+#endif
+  }
+};
+
+const libaom_test::AV1CodecFactory kAV1;
+
+#define AV1_INSTANTIATE_TEST_CASE(test, ...)                                \
+  INSTANTIATE_TEST_CASE_P(                                                  \
+      AV1, test,                                                            \
+      ::testing::Combine(                                                   \
+          ::testing::Values(static_cast<const libaom_test::CodecFactory *>( \
+              &libaom_test::kAV1)),                                         \
+          __VA_ARGS__))
+#else
+#define AV1_INSTANTIATE_TEST_CASE(test, ...)
+#endif  // CONFIG_AV1
+
+}  // namespace libaom_test
+#endif  // TEST_CODEC_FACTORY_H_
diff --git a/third_party/aom/test/convolve_test.cc b/third_party/aom/test/convolve_test.cc
new file mode 100644
index 000000000..a84ef4ec8
--- /dev/null
+++ b/third_party/aom/test/convolve_test.cc
@@ -0,0 +1,1345 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/common/filter.h"
+
+namespace {
+
+static const unsigned int kMaxDimension = MAX_SB_SIZE;
+
+typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
+                             uint8_t *dst, ptrdiff_t dst_stride,
+                             const int16_t *filter_x, int filter_x_stride,
+                             const int16_t *filter_y, int filter_y_stride,
+                             int w, int h);
+
+struct ConvolveFunctions {
+  ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
+                    ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
+                    ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8,
+                    ConvolveFunc sh8_avg, ConvolveFunc sv8,
+                    ConvolveFunc sv8_avg, ConvolveFunc shv8,
+                    ConvolveFunc shv8_avg, int bd)
+      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
+        v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
+        sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
+        use_highbd_(bd) {}
+
+  ConvolveFunc copy_;
+  ConvolveFunc avg_;
+  ConvolveFunc h8_;
+  ConvolveFunc v8_;
+  ConvolveFunc hv8_;
+  ConvolveFunc h8_avg_;
+  ConvolveFunc v8_avg_;
+  ConvolveFunc hv8_avg_;
+  ConvolveFunc sh8_;       // scaled horiz
+  ConvolveFunc sv8_;       // scaled vert
+  ConvolveFunc shv8_;      // scaled horiz/vert
+  ConvolveFunc sh8_avg_;   // scaled avg horiz
+  ConvolveFunc sv8_avg_;   // scaled avg vert
+  ConvolveFunc shv8_avg_;  // scaled avg horiz/vert
+  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
+};
+
+typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
+
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+#define ALL_SIZES(convolve_fn)                                            \
+  make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn),   \
+      make_tuple(128, 128, &convolve_fn), make_tuple(4, 4, &convolve_fn), \
+      make_tuple(8, 4, &convolve_fn), make_tuple(4, 8, &convolve_fn),     \
+      make_tuple(8, 8, &convolve_fn), make_tuple(16, 8, &convolve_fn),    \
+      make_tuple(8, 16, &convolve_fn), make_tuple(16, 16, &convolve_fn),  \
+      make_tuple(32, 16, &convolve_fn), make_tuple(16, 32, &convolve_fn), \
+      make_tuple(32, 32, &convolve_fn), make_tuple(64, 32, &convolve_fn), \
+      make_tuple(32, 64, &convolve_fn), make_tuple(64, 64, &convolve_fn)
+#else
+#define ALL_SIZES(convolve_fn)                                            \
+  make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
+      make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
+      make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
+      make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
+      make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
+      make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
+      make_tuple(64, 64, &convolve_fn)
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+
+// Reference 8-tap subpixel filter, slightly modified to fit into this test.
+#define AV1_FILTER_WEIGHT 128
+#define AV1_FILTER_SHIFT 7
+uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
+
+void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
+                        const int16_t *HFilter, const int16_t *VFilter,
+                        uint8_t *dst_ptr, unsigned int dst_stride,
+                        unsigned int output_width, unsigned int output_height) {
+  // Between passes, we use an intermediate buffer whose height is extended to
+  // have enough horizontally filtered values as input for the vertical pass.
+  // This buffer is allocated to be big enough for the largest block type we
+  // support.
+  const int kInterp_Extend = 4;
+  const unsigned int intermediate_height =
+      (kInterp_Extend - 1) + output_height + kInterp_Extend;
+  unsigned int i, j;
+
+  assert(intermediate_height > 7);
+
+  // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
+  // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
+  //                                 + kInterp_Extend
+  //                               = 3 + 16 + 4
+  //                               = 23
+  // and filter_max_width          = 16
+  //
+  uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
+  const int intermediate_next_stride =
+      1 - static_cast<int>(intermediate_height * output_width);
+
+  // Horizontal pass (src -> transposed intermediate).
+  uint8_t *output_ptr = intermediate_buffer;
+  const int src_next_row_stride = src_stride - output_width;
+  src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+  for (i = 0; i < intermediate_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      // Apply filter...
+      const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
+                       (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
+                       (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
+                       (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
+                       (AV1_FILTER_WEIGHT >> 1);  // Rounding
+
+      // Normalize back to 0-255...
+      *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
+      ++src_ptr;
+      output_ptr += intermediate_height;
+    }
+    src_ptr += src_next_row_stride;
+    output_ptr += intermediate_next_stride;
+  }
+
+  // Vertical pass (transposed intermediate -> dst).
+  src_ptr = intermediate_buffer;
+  const int dst_next_row_stride = dst_stride - output_width;
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      // Apply filter...
+      const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
+                       (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
+                       (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
+                       (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
+                       (AV1_FILTER_WEIGHT >> 1);  // Rounding
+
+      // Normalize back to 0-255...
+      *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
+      src_ptr += intermediate_height;
+    }
+    src_ptr += intermediate_next_stride;
+    dst_ptr += dst_next_row_stride;
+  }
+}
+
+void block2d_average_c(uint8_t *src, unsigned int src_stride,
+                       uint8_t *output_ptr, unsigned int output_stride,
+                       unsigned int output_width, unsigned int output_height) {
+  unsigned int i, j;
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
+    }
+    output_ptr += output_stride;
+  }
+}
+
+void filter_average_block2d_8_c(const uint8_t *src_ptr,
+                                const unsigned int src_stride,
+                                const int16_t *HFilter, const int16_t *VFilter,
+                                uint8_t *dst_ptr, unsigned int dst_stride,
+                                unsigned int output_width,
+                                unsigned int output_height) {
+  uint8_t tmp[kMaxDimension * kMaxDimension];
+
+  assert(output_width <= kMaxDimension);
+  assert(output_height <= kMaxDimension);
+  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
+                     output_width, output_height);
+  block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
+                    output_height);
+}
+
+#if CONFIG_HIGHBITDEPTH
+void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
+                               const unsigned int src_stride,
+                               const int16_t *HFilter, const int16_t *VFilter,
+                               uint16_t *dst_ptr, unsigned int dst_stride,
+                               unsigned int output_width,
+                               unsigned int output_height, int bd) {
+  // Between passes, we use an intermediate buffer whose height is extended to
+  // have enough horizontally filtered values as input for the vertical pass.
+  // This buffer is allocated to be big enough for the largest block type we
+  // support.
+  const int kInterp_Extend = 4;
+  const unsigned int intermediate_height =
+      (kInterp_Extend - 1) + output_height + kInterp_Extend;
+
+  /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
+   * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
+   *                                 + kInterp_Extend
+   *                               = 3 + 16 + 4
+   *                               = 23
+   * and filter_max_width = 16
+   */
+  uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
+  const int intermediate_next_stride =
+      1 - static_cast<int>(intermediate_height * output_width);
+
+  // Horizontal pass (src -> transposed intermediate).
+  {
+    uint16_t *output_ptr = intermediate_buffer;
+    const int src_next_row_stride = src_stride - output_width;
+    unsigned int i, j;
+    src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+    for (i = 0; i < intermediate_height; ++i) {
+      for (j = 0; j < output_width; ++j) {
+        // Apply filter...
+        const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
+                         (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
+                         (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
+                         (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
+                         (AV1_FILTER_WEIGHT >> 1);  // Rounding
+
+        // Normalize back to 0-255...
+        *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
+        ++src_ptr;
+        output_ptr += intermediate_height;
+      }
+      src_ptr += src_next_row_stride;
+      output_ptr += intermediate_next_stride;
+    }
+  }
+
+  // Vertical pass (transposed intermediate -> dst).
+  {
+    const uint16_t *interm_ptr = intermediate_buffer;
+    const int dst_next_row_stride = dst_stride - output_width;
+    unsigned int i, j;
+    for (i = 0; i < output_height; ++i) {
+      for (j = 0; j < output_width; ++j) {
+        // Apply filter...
+        const int temp =
+            (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
+            (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
+            (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
+            (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
+            (AV1_FILTER_WEIGHT >> 1);  // Rounding
+
+        // Normalize back to 0-255...
+        *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
+        interm_ptr += intermediate_height;
+      }
+      interm_ptr += intermediate_next_stride;
+      dst_ptr += dst_next_row_stride;
+    }
+  }
+}
+
+void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
+                              uint16_t *output_ptr, unsigned int output_stride,
+                              unsigned int output_width,
+                              unsigned int output_height) {
+  unsigned int i, j;
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
+    }
+    output_ptr += output_stride;
+  }
+}
+
+void highbd_filter_average_block2d_8_c(
+    const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
+    const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
+    unsigned int output_width, unsigned int output_height, int bd) {
+  uint16_t tmp[kMaxDimension * kMaxDimension];
+
+  assert(output_width <= kMaxDimension);
+  assert(output_height <= kMaxDimension);
+  highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
+                            kMaxDimension, output_width, output_height, bd);
+  highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
+                           output_width, output_height);
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
+ public:
+  static void SetUpTestCase() {
+    // Force input_ to be unaligned, output to be 16 byte aligned.
+    input_ = reinterpret_cast<uint8_t *>(
+                 aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
+             1;
+    output_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kOutputBufferSize));
+    output_ref_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kOutputBufferSize));
+#if CONFIG_HIGHBITDEPTH
+    input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
+                   kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
+               1;
+    output16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
+    output16_ref_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
+#endif
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+  static void TearDownTestCase() {
+    aom_free(input_ - 1);
+    input_ = NULL;
+    aom_free(output_);
+    output_ = NULL;
+    aom_free(output_ref_);
+    output_ref_ = NULL;
+#if CONFIG_HIGHBITDEPTH
+    aom_free(input16_ - 1);
+    input16_ = NULL;
+    aom_free(output16_);
+    output16_ = NULL;
+    aom_free(output16_ref_);
+    output16_ref_ = NULL;
+#endif
+  }
+
+ protected:
+  static const int kDataAlignment = 16;
+  static const int kOuterBlockSize = 4 * kMaxDimension;
+  static const int kInputStride = kOuterBlockSize;
+  static const int kOutputStride = kOuterBlockSize;
+  static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
+  static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
+
+  int Width() const { return GET_PARAM(0); }
+  int Height() const { return GET_PARAM(1); }
+  int BorderLeft() const {
+    const int center = (kOuterBlockSize - Width()) / 2;
+    return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
+  }
+  int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
+
+  bool IsIndexInBorder(int i) {
+    return (i < BorderTop() * kOuterBlockSize ||
+            i >= (BorderTop() + Height()) * kOuterBlockSize ||
+            i % kOuterBlockSize < BorderLeft() ||
+            i % kOuterBlockSize >= (BorderLeft() + Width()));
+  }
+
+  virtual void SetUp() {
+    UUT_ = GET_PARAM(2);
+#if CONFIG_HIGHBITDEPTH
+    if (UUT_->use_highbd_ != 0)
+      mask_ = (1 << UUT_->use_highbd_) - 1;
+    else
+      mask_ = 255;
+#endif
+    /* Set up guard blocks for an inner block centered in the outer block */
+    for (int i = 0; i < kOutputBufferSize; ++i) {
+      if (IsIndexInBorder(i))
+        output_[i] = 255;
+      else
+        output_[i] = 0;
+    }
+
+    ::libaom_test::ACMRandom prng;
+    for (int i = 0; i < kInputBufferSize; ++i) {
+      if (i & 1) {
+        input_[i] = 255;
+#if CONFIG_HIGHBITDEPTH
+        input16_[i] = mask_;
+#endif
+      } else {
+        input_[i] = prng.Rand8Extremes();
+#if CONFIG_HIGHBITDEPTH
+        input16_[i] = prng.Rand16() & mask_;
+#endif
+      }
+    }
+  }
+
+  void SetConstantInput(int value) {
+    memset(input_, value, kInputBufferSize);
+#if CONFIG_HIGHBITDEPTH
+    aom_memset16(input16_, value, kInputBufferSize);
+#endif
+  }
+
+  void CopyOutputToRef() {
+    memcpy(output_ref_, output_, kOutputBufferSize);
+#if CONFIG_HIGHBITDEPTH
+    // Copy 16-bit pixels values. The effective number of bytes is double.
+    memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
+#endif
+  }
+
+  void CheckGuardBlocks() {
+    for (int i = 0; i < kOutputBufferSize; ++i) {
+      if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]);
+    }
+  }
+
+  uint8_t *input() const {
+    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
+#if CONFIG_HIGHBITDEPTH
+    if (UUT_->use_highbd_ == 0) {
+      return input_ + offset;
+    } else {
+      return CONVERT_TO_BYTEPTR(input16_) + offset;
+    }
+#else
+    return input_ + offset;
+#endif
+  }
+
+  uint8_t *output() const {
+    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
+#if CONFIG_HIGHBITDEPTH
+    if (UUT_->use_highbd_ == 0) {
+      return output_ + offset;
+    } else {
+      return CONVERT_TO_BYTEPTR(output16_) + offset;
+    }
+#else
+    return output_ + offset;
+#endif
+  }
+
+  uint8_t *output_ref() const {
+    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
+#if CONFIG_HIGHBITDEPTH
+    if (UUT_->use_highbd_ == 0) {
+      return output_ref_ + offset;
+    } else {
+      return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
+    }
+#else
+    return output_ref_ + offset;
+#endif
+  }
+
+  uint16_t lookup(uint8_t *list, int index) const {
+#if CONFIG_HIGHBITDEPTH
+    if (UUT_->use_highbd_ == 0) {
+      return list[index];
+    } else {
+      return CONVERT_TO_SHORTPTR(list)[index];
+    }
+#else
+    return list[index];
+#endif
+  }
+
+  void assign_val(uint8_t *list, int index, uint16_t val) const {
+#if CONFIG_HIGHBITDEPTH
+    if (UUT_->use_highbd_ == 0) {
+      list[index] = (uint8_t)val;
+    } else {
+      CONVERT_TO_SHORTPTR(list)[index] = val;
+    }
+#else
+    list[index] = (uint8_t)val;
+#endif
+  }
+
+  void wrapper_filter_average_block2d_8_c(
+      const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
+      const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
+      unsigned int output_width, unsigned int output_height) {
+#if CONFIG_HIGHBITDEPTH
+    if (UUT_->use_highbd_ == 0) {
+      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
+                                 dst_stride, output_width, output_height);
+    } else {
+      highbd_filter_average_block2d_8_c(
+          CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
+          CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
+          UUT_->use_highbd_);
+    }
+#else
+    filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
+                               dst_stride, output_width, output_height);
+#endif
+  }
+
+  void wrapper_filter_block2d_8_c(
+      const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
+      const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
+      unsigned int output_width, unsigned int output_height) {
+#if CONFIG_HIGHBITDEPTH
+    if (UUT_->use_highbd_ == 0) {
+      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
+                         dst_stride, output_width, output_height);
+    } else {
+      highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
+                                HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
+                                dst_stride, output_width, output_height,
+                                UUT_->use_highbd_);
+    }
+#else
+    filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
+                       dst_stride, output_width, output_height);
+#endif
+  }
+
+  const ConvolveFunctions *UUT_;
+  static uint8_t *input_;
+  static uint8_t *output_;
+  static uint8_t *output_ref_;
+#if CONFIG_HIGHBITDEPTH
+  static uint16_t *input16_;
+  static uint16_t *output16_;
+  static uint16_t *output16_ref_;
+  int mask_;
+#endif
+};
+
+uint8_t *ConvolveTest::input_ = NULL;
+uint8_t *ConvolveTest::output_ = NULL;
+uint8_t *ConvolveTest::output_ref_ = NULL;
+#if CONFIG_HIGHBITDEPTH
+uint16_t *ConvolveTest::input16_ = NULL;
+uint16_t *ConvolveTest::output16_ = NULL;
+uint16_t *ConvolveTest::output16_ref_ = NULL;
+#endif
+
+TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
+
+TEST_P(ConvolveTest, Copy) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+
+  ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
+                                       NULL, 0, NULL, 0, Width(), Height()));
+
+  CheckGuardBlocks();
+
+  for (int y = 0; y < Height(); ++y)
+    for (int x = 0; x < Width(); ++x)
+      ASSERT_EQ(lookup(out, y * kOutputStride + x),
+                lookup(in, y * kInputStride + x))
+          << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, Avg) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+  uint8_t *const out_ref = output_ref();
+  CopyOutputToRef();
+
+  ASM_REGISTER_STATE_CHECK(UUT_->avg_(in, kInputStride, out, kOutputStride,
+                                      NULL, 0, NULL, 0, Width(), Height()));
+
+  CheckGuardBlocks();
+
+  for (int y = 0; y < Height(); ++y)
+    for (int x = 0; x < Width(); ++x)
+      ASSERT_EQ(lookup(out, y * kOutputStride + x),
+                ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
+                                       lookup(out_ref, y * kOutputStride + x),
+                                   1))
+          << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, CopyHoriz) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+  DECLARE_ALIGNED(256, const int16_t,
+                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
+
+  ASM_REGISTER_STATE_CHECK(UUT_->sh8_(in, kInputStride, out, kOutputStride,
+                                      filter8, 16, filter8, 16, Width(),
+                                      Height()));
+
+  CheckGuardBlocks();
+
+  for (int y = 0; y < Height(); ++y)
+    for (int x = 0; x < Width(); ++x)
+      ASSERT_EQ(lookup(out, y * kOutputStride + x),
+                lookup(in, y * kInputStride + x))
+          << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, CopyVert) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+  DECLARE_ALIGNED(256, const int16_t,
+                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
+
+  ASM_REGISTER_STATE_CHECK(UUT_->sv8_(in, kInputStride, out, kOutputStride,
+                                      filter8, 16, filter8, 16, Width(),
+                                      Height()));
+
+  CheckGuardBlocks();
+
+  for (int y = 0; y < Height(); ++y)
+    for (int x = 0; x < Width(); ++x)
+      ASSERT_EQ(lookup(out, y * kOutputStride + x),
+                lookup(in, y * kInputStride + x))
+          << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, Copy2D) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+  DECLARE_ALIGNED(256, const int16_t,
+                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
+
+  ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
+                                       filter8, 16, filter8, 16, Width(),
+                                       Height()));
+
+  CheckGuardBlocks();
+
+  for (int y = 0; y < Height(); ++y)
+    for (int x = 0; x < Width(); ++x)
+      ASSERT_EQ(lookup(out, y * kOutputStride + x),
+                lookup(in, y * kInputStride + x))
+          << "(" << x << "," << y << ")";
+}
+
+const int kNumFilterBanks = SWITCHABLE_FILTERS;
+const int kNumFilters = 16;
+
+TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
+  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+    const InterpFilter filter = (InterpFilter)filter_bank;
+    const InterpKernel *filters =
+        (const InterpKernel *)av1_get_interp_filter_kernel(filter);
+#if CONFIG_DUAL_FILTER
+    const InterpFilterParams filter_params =
+        av1_get_interp_filter_params(filter);
+    if (filter_params.taps != SUBPEL_TAPS) continue;
+#endif
+    for (int i = 0; i < kNumFilters; i++) {
+      const int p0 = filters[i][0] + filters[i][1];
+      const int p1 = filters[i][2] + filters[i][3];
+      const int p2 = filters[i][4] + filters[i][5];
+      const int p3 = filters[i][6] + filters[i][7];
+      EXPECT_LE(p0, 128);
+      EXPECT_LE(p1, 128);
+      EXPECT_LE(p2, 128);
+      EXPECT_LE(p3, 128);
+      EXPECT_LE(p0 + p3, 128);
+      EXPECT_LE(p0 + p3 + p1, 128);
+      EXPECT_LE(p0 + p3 + p1 + p2, 128);
+      EXPECT_EQ(p0 + p1 + p2 + p3, 128);
+    }
+  }
+}
+
+const int16_t kInvalidFilter[8] = { 0 };
+
+TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+#if CONFIG_HIGHBITDEPTH
+  uint8_t ref8[kOutputStride * kMaxDimension];
+  uint16_t ref16[kOutputStride * kMaxDimension];
+  uint8_t *ref;
+  if (UUT_->use_highbd_ == 0) {
+    ref = ref8;
+  } else {
+    ref = CONVERT_TO_BYTEPTR(ref16);
+  }
+#else
+  uint8_t ref[kOutputStride * kMaxDimension];
+#endif
+
+  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+    const InterpFilter filter = (InterpFilter)filter_bank;
+    const InterpKernel *filters =
+        (const InterpKernel *)av1_get_interp_filter_kernel(filter);
+#if CONFIG_DUAL_FILTER
+    const InterpFilterParams filter_params =
+        av1_get_interp_filter_params(filter);
+    if (filter_params.taps != SUBPEL_TAPS) continue;
+#endif
+
+    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+        wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
+                                   filters[filter_y], ref, kOutputStride,
+                                   Width(), Height());
+
+        if (filter_x && filter_y)
+          ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
+              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+              filters[filter_y], 16, Width(), Height()));
+        else if (filter_y)
+          ASM_REGISTER_STATE_CHECK(
+              UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
+                        16, filters[filter_y], 16, Width(), Height()));
+        else if (filter_x)
+          ASM_REGISTER_STATE_CHECK(
+              UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x],
+                        16, kInvalidFilter, 16, Width(), Height()));
+        else
+          ASM_REGISTER_STATE_CHECK(
+              UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter,
+                          0, kInvalidFilter, 0, Width(), Height()));
+
+        CheckGuardBlocks();
+
+        for (int y = 0; y < Height(); ++y)
+          for (int x = 0; x < Width(); ++x)
+            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
+                      lookup(out, y * kOutputStride + x))
+                << "mismatch at (" << x << "," << y << "), "
+                << "filters (" << filter_bank << "," << filter_x << ","
+                << filter_y << ")";
+      }
+    }
+  }
+}
+
+TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+#if CONFIG_HIGHBITDEPTH
+  uint8_t ref8[kOutputStride * kMaxDimension];
+  uint16_t ref16[kOutputStride * kMaxDimension];
+  uint8_t *ref;
+  if (UUT_->use_highbd_ == 0) {
+    ref = ref8;
+  } else {
+    ref = CONVERT_TO_BYTEPTR(ref16);
+  }
+#else
+  uint8_t ref[kOutputStride * kMaxDimension];
+#endif
+
+  // Populate ref and out with some random data
+  ::libaom_test::ACMRandom prng;
+  for (int y = 0; y < Height(); ++y) {
+    for (int x = 0; x < Width(); ++x) {
+      uint16_t r;
+#if CONFIG_HIGHBITDEPTH
+      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
+        r = prng.Rand8Extremes();
+      } else {
+        r = prng.Rand16() & mask_;
+      }
+#else
+      r = prng.Rand8Extremes();
+#endif
+
+      assign_val(out, y * kOutputStride + x, r);
+      assign_val(ref, y * kOutputStride + x, r);
+    }
+  }
+
+  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+    const InterpFilter filter = (InterpFilter)filter_bank;
+    const InterpKernel *filters =
+        (const InterpKernel *)av1_get_interp_filter_kernel(filter);
+#if CONFIG_DUAL_FILTER
+    const InterpFilterParams filter_params =
+        av1_get_interp_filter_params(filter);
+    if (filter_params.taps != SUBPEL_TAPS) continue;
+#endif
+
+    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+        wrapper_filter_average_block2d_8_c(in, kInputStride, filters[filter_x],
+                                           filters[filter_y], ref,
+                                           kOutputStride, Width(), Height());
+
+        if (filter_x && filter_y)
+          ASM_REGISTER_STATE_CHECK(UUT_->hv8_avg_(
+              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+              filters[filter_y], 16, Width(), Height()));
+        else if (filter_y)
+          ASM_REGISTER_STATE_CHECK(UUT_->v8_avg_(
+              in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
+              filters[filter_y], 16, Width(), Height()));
+        else if (filter_x)
+          ASM_REGISTER_STATE_CHECK(UUT_->h8_avg_(
+              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+              kInvalidFilter, 16, Width(), Height()));
+        else
+          ASM_REGISTER_STATE_CHECK(
+              UUT_->avg_(in, kInputStride, out, kOutputStride, kInvalidFilter,
+                         0, kInvalidFilter, 0, Width(), Height()));
+
+        CheckGuardBlocks();
+
+        for (int y = 0; y < Height(); ++y)
+          for (int x = 0; x < Width(); ++x)
+            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
+                      lookup(out, y * kOutputStride + x))
+                << "mismatch at (" << x << "," << y << "), "
+                << "filters (" << filter_bank << "," << filter_x << ","
+                << filter_y << ")";
+      }
+    }
+  }
+}
+
+TEST_P(ConvolveTest, FilterExtremes) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+#if CONFIG_HIGHBITDEPTH
+  uint8_t ref8[kOutputStride * kMaxDimension];
+  uint16_t ref16[kOutputStride * kMaxDimension];
+  uint8_t *ref;
+  if (UUT_->use_highbd_ == 0) {
+    ref = ref8;
+  } else {
+    ref = CONVERT_TO_BYTEPTR(ref16);
+  }
+#else
+  uint8_t ref[kOutputStride * kMaxDimension];
+#endif
+
+  // Populate ref and out with some random data
+  ::libaom_test::ACMRandom prng;
+  for (int y = 0; y < Height(); ++y) {
+    for (int x = 0; x < Width(); ++x) {
+      uint16_t r;
+#if CONFIG_HIGHBITDEPTH
+      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
+        r = prng.Rand8Extremes();
+      } else {
+        r = prng.Rand16() & mask_;
+      }
+#else
+      r = prng.Rand8Extremes();
+#endif
+      assign_val(out, y * kOutputStride + x, r);
+      assign_val(ref, y * kOutputStride + x, r);
+    }
+  }
+
+  for (int axis = 0; axis < 2; axis++) {
+    int seed_val = 0;
+    while (seed_val < 256) {
+      for (int y = 0; y < 8; ++y) {
+        for (int x = 0; x < 8; ++x) {
+#if CONFIG_HIGHBITDEPTH
+          assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
+                     ((seed_val >> (axis ? y : x)) & 1) * mask_);
+#else
+          assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
+                     ((seed_val >> (axis ? y : x)) & 1) * 255);
+#endif
+          if (axis) seed_val++;
+        }
+        if (axis)
+          seed_val -= 8;
+        else
+          seed_val++;
+      }
+      if (axis) seed_val += 8;
+
+      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+        const InterpFilter filter = (InterpFilter)filter_bank;
+        const InterpKernel *filters =
+            (const InterpKernel *)av1_get_interp_filter_kernel(filter);
+#if CONFIG_DUAL_FILTER
+        const InterpFilterParams filter_params =
+            av1_get_interp_filter_params(filter);
+        if (filter_params.taps != SUBPEL_TAPS) continue;
+#endif
+        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+            wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
+                                       filters[filter_y], ref, kOutputStride,
+                                       Width(), Height());
+            if (filter_x && filter_y)
+              ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
+                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                  filters[filter_y], 16, Width(), Height()));
+            else if (filter_y)
+              ASM_REGISTER_STATE_CHECK(UUT_->v8_(
+                  in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
+                  filters[filter_y], 16, Width(), Height()));
+            else if (filter_x)
+              ASM_REGISTER_STATE_CHECK(UUT_->h8_(
+                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                  kInvalidFilter, 16, Width(), Height()));
+            else
+              ASM_REGISTER_STATE_CHECK(UUT_->copy_(
+                  in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
+                  kInvalidFilter, 0, Width(), Height()));
+
+            for (int y = 0; y < Height(); ++y)
+              for (int x = 0; x < Width(); ++x)
+                ASSERT_EQ(lookup(ref, y * kOutputStride + x),
+                          lookup(out, y * kOutputStride + x))
+                    << "mismatch at (" << x << "," << y << "), "
+                    << "filters (" << filter_bank << "," << filter_x << ","
+                    << filter_y << ")";
+          }
+        }
+      }
+    }
+  }
+}
+
+/* This test exercises that enough rows and columns are filtered with every
+   possible initial fractional positions and scaling steps. */
+TEST_P(ConvolveTest, CheckScalingFiltering) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+  const InterpKernel *const eighttap =
+      (const InterpKernel *)av1_get_interp_filter_kernel(EIGHTTAP_REGULAR);
+
+  SetConstantInput(127);
+
+  for (int frac = 0; frac < 16; ++frac) {
+    for (int step = 1; step <= 32; ++step) {
+      /* Test the horizontal and vertical filters in combination. */
+      ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
+                                           eighttap[frac], step, eighttap[frac],
+                                           step, Width(), Height()));
+
+      CheckGuardBlocks();
+
+      for (int y = 0; y < Height(); ++y) {
+        for (int x = 0; x < Width(); ++x) {
+          ASSERT_EQ(lookup(in, y * kInputStride + x),
+                    lookup(out, y * kOutputStride + x))
+              << "x == " << x << ", y == " << y << ", frac == " << frac
+              << ", step == " << step;
+        }
+      }
+    }
+  }
+}
+
+TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
+  const uint8_t *const in = input();
+  uint8_t *const out = output();
+  const int kNumTests = 5000000;
+  const int width = Width();
+  const int height = Height();
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&timer);
+  for (int n = 0; n < kNumTests; ++n) {
+    UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
+                height);
+  }
+  aom_usec_timer_mark(&timer);
+
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
+         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
+}
+
+TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
+  const uint8_t *const in = input();
+  uint8_t *const out = output();
+  const int kNumTests = 5000000;
+  const int width = Width();
+  const int height = Height();
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&timer);
+  for (int n = 0; n < kNumTests; ++n) {
+    UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
+               height);
+  }
+  aom_usec_timer_mark(&timer);
+
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
+         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
+}
+
+TEST_P(ConvolveTest, DISABLED_Speed) {
+  uint8_t *const in = input();
+  uint8_t *const out = output();
+#if CONFIG_HIGHBITDEPTH
+  uint8_t ref8[kOutputStride * kMaxDimension];
+  uint16_t ref16[kOutputStride * kMaxDimension];
+  uint8_t *ref;
+  if (UUT_->use_highbd_ == 0) {
+    ref = ref8;
+  } else {
+    ref = CONVERT_TO_BYTEPTR(ref16);
+  }
+#else
+  uint8_t ref[kOutputStride * kMaxDimension];
+#endif
+
+  // Populate ref and out with some random data
+  ::libaom_test::ACMRandom prng;
+  for (int y = 0; y < Height(); ++y) {
+    for (int x = 0; x < Width(); ++x) {
+      uint16_t r;
+#if CONFIG_HIGHBITDEPTH
+      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
+        r = prng.Rand8Extremes();
+      } else {
+        r = prng.Rand16() & mask_;
+      }
+#else
+      r = prng.Rand8Extremes();
+#endif
+
+      assign_val(out, y * kOutputStride + x, r);
+      assign_val(ref, y * kOutputStride + x, r);
+    }
+  }
+
+  const InterpFilter filter = (InterpFilter)1;
+  const InterpKernel *filters =
+      (const InterpKernel *)av1_get_interp_filter_kernel(filter);
+  wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
+                                     out, kOutputStride, Width(), Height());
+
+  aom_usec_timer timer;
+  int tests_num = 1000;
+
+  aom_usec_timer_start(&timer);
+  while (tests_num > 0) {
+    for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+      const InterpFilter filter = (InterpFilter)filter_bank;
+      const InterpKernel *filters =
+          (const InterpKernel *)av1_get_interp_filter_kernel(filter);
+#if CONFIG_DUAL_FILTER
+      const InterpFilterParams filter_params =
+          av1_get_interp_filter_params(filter);
+      if (filter_params.taps != SUBPEL_TAPS) continue;
+#endif
+
+      for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+        for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+          if (filter_x && filter_y)
+            ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
+                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                filters[filter_y], 16, Width(), Height()));
+          if (filter_y)
+            ASM_REGISTER_STATE_CHECK(
+                UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
+                          16, filters[filter_y], 16, Width(), Height()));
+          else if (filter_x)
+            ASM_REGISTER_STATE_CHECK(UUT_->h8_(
+                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                kInvalidFilter, 16, Width(), Height()));
+        }
+      }
+    }
+    tests_num--;
+  }
+  aom_usec_timer_mark(&timer);
+
+  const int elapsed_time =
+      static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
+  printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
+         UUT_->use_highbd_, elapsed_time);
+}
+
+using std::tr1::make_tuple;
+
+#if CONFIG_HIGHBITDEPTH
+#define WRAP(func, bd)                                                       \
+  void wrap_##func##_##bd(                                                   \
+      const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
+      ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
+      const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
+    aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
+                      filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
+  }
+#if HAVE_SSE2 && ARCH_X86_64
+WRAP(convolve_copy_sse2, 8)
+WRAP(convolve_avg_sse2, 8)
+WRAP(convolve_copy_sse2, 10)
+WRAP(convolve_avg_sse2, 10)
+WRAP(convolve_copy_sse2, 12)
+WRAP(convolve_avg_sse2, 12)
+WRAP(convolve8_horiz_sse2, 8)
+WRAP(convolve8_avg_horiz_sse2, 8)
+WRAP(convolve8_vert_sse2, 8)
+WRAP(convolve8_avg_vert_sse2, 8)
+WRAP(convolve8_sse2, 8)
+WRAP(convolve8_avg_sse2, 8)
+WRAP(convolve8_horiz_sse2, 10)
+WRAP(convolve8_avg_horiz_sse2, 10)
+WRAP(convolve8_vert_sse2, 10)
+WRAP(convolve8_avg_vert_sse2, 10)
+WRAP(convolve8_sse2, 10)
+WRAP(convolve8_avg_sse2, 10)
+WRAP(convolve8_horiz_sse2, 12)
+WRAP(convolve8_avg_horiz_sse2, 12)
+WRAP(convolve8_vert_sse2, 12)
+WRAP(convolve8_avg_vert_sse2, 12)
+WRAP(convolve8_sse2, 12)
+WRAP(convolve8_avg_sse2, 12)
+#endif  // HAVE_SSE2 && ARCH_X86_64
+
+WRAP(convolve_copy_c, 8)
+WRAP(convolve_avg_c, 8)
+WRAP(convolve8_horiz_c, 8)
+WRAP(convolve8_avg_horiz_c, 8)
+WRAP(convolve8_vert_c, 8)
+WRAP(convolve8_avg_vert_c, 8)
+WRAP(convolve8_c, 8)
+WRAP(convolve8_avg_c, 8)
+WRAP(convolve_copy_c, 10)
+WRAP(convolve_avg_c, 10)
+WRAP(convolve8_horiz_c, 10)
+WRAP(convolve8_avg_horiz_c, 10)
+WRAP(convolve8_vert_c, 10)
+WRAP(convolve8_avg_vert_c, 10)
+WRAP(convolve8_c, 10)
+WRAP(convolve8_avg_c, 10)
+WRAP(convolve_copy_c, 12)
+WRAP(convolve_avg_c, 12)
+WRAP(convolve8_horiz_c, 12)
+WRAP(convolve8_avg_horiz_c, 12)
+WRAP(convolve8_vert_c, 12)
+WRAP(convolve8_avg_vert_c, 12)
+WRAP(convolve8_c, 12)
+WRAP(convolve8_avg_c, 12)
+
+#if HAVE_AVX2
+WRAP(convolve_copy_avx2, 8)
+WRAP(convolve_avg_avx2, 8)
+WRAP(convolve8_horiz_avx2, 8)
+WRAP(convolve8_avg_horiz_avx2, 8)
+WRAP(convolve8_vert_avx2, 8)
+WRAP(convolve8_avg_vert_avx2, 8)
+WRAP(convolve8_avx2, 8)
+WRAP(convolve8_avg_avx2, 8)
+
+WRAP(convolve_copy_avx2, 10)
+WRAP(convolve_avg_avx2, 10)
+WRAP(convolve8_avx2, 10)
+WRAP(convolve8_horiz_avx2, 10)
+WRAP(convolve8_vert_avx2, 10)
+WRAP(convolve8_avg_avx2, 10)
+WRAP(convolve8_avg_horiz_avx2, 10)
+WRAP(convolve8_avg_vert_avx2, 10)
+
+WRAP(convolve_copy_avx2, 12)
+WRAP(convolve_avg_avx2, 12)
+WRAP(convolve8_avx2, 12)
+WRAP(convolve8_horiz_avx2, 12)
+WRAP(convolve8_vert_avx2, 12)
+WRAP(convolve8_avg_avx2, 12)
+WRAP(convolve8_avg_horiz_avx2, 12)
+WRAP(convolve8_avg_vert_avx2, 12)
+#endif  // HAVE_AVX2
+
+#undef WRAP
+
+const ConvolveFunctions convolve8_c(
+    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
+    wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
+    wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
+    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
+    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
+    wrap_convolve8_avg_c_8, 8);
+const ConvolveFunctions convolve10_c(
+    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
+    wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
+    wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
+    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
+    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
+    wrap_convolve8_avg_c_10, 10);
+const ConvolveFunctions convolve12_c(
+    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
+    wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
+    wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
+    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
+    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
+    wrap_convolve8_avg_c_12, 12);
+const ConvolveParam kArrayConvolve_c[] = {
+  ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
+};
+
+#else
+const ConvolveFunctions convolve8_c(
+    aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_c,
+    aom_convolve8_avg_horiz_c, aom_convolve8_vert_c, aom_convolve8_avg_vert_c,
+    aom_convolve8_c, aom_convolve8_avg_c, aom_scaled_horiz_c,
+    aom_scaled_avg_horiz_c, aom_scaled_vert_c, aom_scaled_avg_vert_c,
+    aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
+const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
+#endif
+INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
+
+#if HAVE_SSE2 && ARCH_X86_64
+#if CONFIG_HIGHBITDEPTH
+const ConvolveFunctions convolve8_sse2(
+    wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
+    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
+    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
+    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
+    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
+    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
+    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
+const ConvolveFunctions convolve10_sse2(
+    wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
+    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
+    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
+    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
+    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
+    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
+    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
+const ConvolveFunctions convolve12_sse2(
+    wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
+    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
+    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
+    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
+    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
+    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
+    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
+const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
+                                              ALL_SIZES(convolve10_sse2),
+                                              ALL_SIZES(convolve12_sse2) };
+#else
+const ConvolveFunctions convolve8_sse2(
+    aom_convolve_copy_sse2, aom_convolve_avg_sse2, aom_convolve8_horiz_sse2,
+    aom_convolve8_avg_horiz_sse2, aom_convolve8_vert_sse2,
+    aom_convolve8_avg_vert_sse2, aom_convolve8_sse2, aom_convolve8_avg_sse2,
+    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
+    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
+
+const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
+#endif  // CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve_sse2));
+#endif
+
+#if HAVE_SSSE3
+const ConvolveFunctions convolve8_ssse3(
+    aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_ssse3,
+    aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_ssse3,
+    aom_convolve8_avg_vert_ssse3, aom_convolve8_ssse3, aom_convolve8_avg_ssse3,
+    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
+    aom_scaled_avg_vert_c, aom_scaled_2d_ssse3, aom_scaled_avg_2d_c, 0);
+
+const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
+INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_ssse3));
+#endif
+
+#if HAVE_AVX2
+#if CONFIG_HIGHBITDEPTH
+const ConvolveFunctions convolve8_avx2(
+    wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8,
+    wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8,
+    wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8,
+    wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8,
+    wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
+    wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
+const ConvolveFunctions convolve10_avx2(
+    wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10,
+    wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10,
+    wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10,
+    wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10,
+    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
+    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
+    wrap_convolve8_avg_c_10, 10);
+const ConvolveFunctions convolve12_avx2(
+    wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12,
+    wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12,
+    wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12,
+    wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12,
+    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
+    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
+    wrap_convolve8_avg_c_12, 12);
+const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2),
+                                               ALL_SIZES(convolve10_avx2),
+                                               ALL_SIZES(convolve12_avx2) };
+#else
+const ConvolveFunctions convolve8_avx2(
+    aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_avx2,
+    aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_avx2,
+    aom_convolve8_avg_vert_ssse3, aom_convolve8_avx2, aom_convolve8_avg_ssse3,
+    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
+    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
+
+const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
+#endif  // CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_avx2));
+#endif  // HAVE_AVX2
+
+// TODO(any): Make NEON versions support 128x128 128x64 64x128 block sizes
+#if HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
+#if HAVE_NEON_ASM
+const ConvolveFunctions convolve8_neon(
+    aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon,
+    aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon,
+    aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon,
+    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
+    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
+#else   // HAVE_NEON
+const ConvolveFunctions convolve8_neon(
+    aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon,
+    aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon,
+    aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon,
+    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
+    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
+#endif  // HAVE_NEON_ASM
+
+const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) };
+INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_neon));
+#endif  // HAVE_NEON
+
+// TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes
+#if HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
+const ConvolveFunctions convolve8_dspr2(
+    aom_convolve_copy_dspr2, aom_convolve_avg_dspr2, aom_convolve8_horiz_dspr2,
+    aom_convolve8_avg_horiz_dspr2, aom_convolve8_vert_dspr2,
+    aom_convolve8_avg_vert_dspr2, aom_convolve8_dspr2, aom_convolve8_avg_dspr2,
+    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
+    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
+
+const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
+INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_dspr2));
+#endif  // HAVE_DSPR2
+
+// TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes
+#if HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
+const ConvolveFunctions convolve8_msa(
+    aom_convolve_copy_msa, aom_convolve_avg_msa, aom_convolve8_horiz_msa,
+    aom_convolve8_avg_horiz_msa, aom_convolve8_vert_msa,
+    aom_convolve8_avg_vert_msa, aom_convolve8_msa, aom_convolve8_avg_msa,
+    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
+    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
+
+const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
+INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_msa));
+#endif  // HAVE_MSA
+}  // namespace
diff --git a/third_party/aom/test/cpu_speed_test.cc b/third_party/aom/test/cpu_speed_test.cc
new file mode 100644
index 000000000..9b7966462
--- /dev/null
+++ b/third_party/aom/test/cpu_speed_test.cc
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+const int kMaxPSNR = 100;
+
+class CpuSpeedTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  CpuSpeedTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR),
+        tune_content_(AOM_CONTENT_DEFAULT) {}
+  virtual ~CpuSpeedTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    if (encoding_mode_ != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 25;
+      cfg_.rc_end_usage = AOM_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = AOM_CBR;
+    }
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = kMaxPSNR; }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_TUNE_CONTENT, tune_content_);
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (pkt->data.psnr.psnr[0] < min_psnr_) min_psnr_ = pkt->data.psnr.psnr[0];
+  }
+
+  void TestQ0();
+  void TestScreencastQ0();
+  void TestTuneScreen();
+  void TestEncodeHighBitrate();
+  void TestLowBitrate();
+
+  ::libaom_test::TestMode encoding_mode_;
+  int set_cpu_used_;
+  double min_psnr_;
+  int tune_content_;
+};
+
+void CpuSpeedTest::TestQ0() {
+  // Validate that this non multiple of 64 wide clip encodes and decodes
+  // without a mismatch when passing in a very low max q.  This pushes
+  // the encoder to producing lots of big partitions which will likely
+  // extend into the border and test the border condition.
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 400;
+  cfg_.rc_max_quantizer = 0;
+  cfg_.rc_min_quantizer = 0;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       10);
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_GE(min_psnr_, kMaxPSNR);
+}
+
+void CpuSpeedTest::TestScreencastQ0() {
+  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 10);
+  cfg_.g_timebase = video.timebase();
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 400;
+  cfg_.rc_max_quantizer = 0;
+  cfg_.rc_min_quantizer = 0;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_GE(min_psnr_, kMaxPSNR);
+}
+
+void CpuSpeedTest::TestTuneScreen() {
+  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 10);
+  cfg_.g_timebase = video.timebase();
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_minsection_pct = 2000;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_min_quantizer = 0;
+  tune_content_ = AOM_CONTENT_SCREEN;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+void CpuSpeedTest::TestEncodeHighBitrate() {
+  // Validate that this non multiple of 64 wide clip encodes and decodes
+  // without a mismatch when passing in a very low max q.  This pushes
+  // the encoder to producing lots of big partitions which will likely
+  // extend into the border and test the border condition.
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 12000;
+  cfg_.rc_max_quantizer = 10;
+  cfg_.rc_min_quantizer = 0;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       10);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+void CpuSpeedTest::TestLowBitrate() {
+  // Validate that this clip encodes and decodes without a mismatch
+  // when passing in a very high min q.  This pushes the encoder to producing
+  // lots of small partitions which might will test the other condition.
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_target_bitrate = 200;
+  cfg_.rc_min_quantizer = 40;
+
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       10);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+TEST_P(CpuSpeedTest, TestQ0) { TestQ0(); }
+TEST_P(CpuSpeedTest, TestScreencastQ0) { TestScreencastQ0(); }
+TEST_P(CpuSpeedTest, TestTuneScreen) { TestTuneScreen(); }
+TEST_P(CpuSpeedTest, TestEncodeHighBitrate) { TestEncodeHighBitrate(); }
+TEST_P(CpuSpeedTest, TestLowBitrate) { TestLowBitrate(); }
+
+class CpuSpeedTestLarge : public CpuSpeedTest {};
+
+TEST_P(CpuSpeedTestLarge, TestQ0) { TestQ0(); }
+TEST_P(CpuSpeedTestLarge, TestScreencastQ0) { TestScreencastQ0(); }
+TEST_P(CpuSpeedTestLarge, TestTuneScreen) { TestTuneScreen(); }
+TEST_P(CpuSpeedTestLarge, TestEncodeHighBitrate) { TestEncodeHighBitrate(); }
+TEST_P(CpuSpeedTestLarge, TestLowBitrate) { TestLowBitrate(); }
+
+AV1_INSTANTIATE_TEST_CASE(CpuSpeedTest,
+                          ::testing::Values(::libaom_test::kTwoPassGood,
+                                            ::libaom_test::kOnePassGood),
+                          ::testing::Range(1, 3));
+AV1_INSTANTIATE_TEST_CASE(CpuSpeedTestLarge,
+                          ::testing::Values(::libaom_test::kTwoPassGood,
+                                            ::libaom_test::kOnePassGood),
+                          ::testing::Range(0, 1));
+}  // namespace
diff --git a/third_party/aom/test/datarate_test.cc b/third_party/aom/test/datarate_test.cc
new file mode 100644
index 000000000..48be4a46d
--- /dev/null
+++ b/third_party/aom/test/datarate_test.cc
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "./aom_config.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "aom/aom_codec.h"
+
+namespace {
+
+class DatarateTestLarge
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ public:
+  DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {}
+
+ protected:
+  virtual ~DatarateTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+    ResetModel();
+  }
+
+  virtual void ResetModel() {
+    last_pts_ = 0;
+    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
+    frame_number_ = 0;
+    tot_frame_number_ = 0;
+    first_drop_ = 0;
+    num_drops_ = 0;
+    // Denoiser is off by default.
+    denoiser_on_ = 0;
+    bits_total_ = 0;
+    denoiser_offon_test_ = 0;
+    denoiser_offon_period_ = -1;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+
+    if (denoiser_offon_test_) {
+      ASSERT_GT(denoiser_offon_period_, 0)
+          << "denoiser_offon_period_ is not positive.";
+      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
+        // Flip denoiser_on_ periodically
+        denoiser_on_ ^= 1;
+      }
+    }
+
+    encoder->Control(AV1E_SET_NOISE_SENSITIVITY, denoiser_on_);
+
+    const aom_rational_t tb = video->timebase();
+    timebase_ = static_cast<double>(tb.num) / tb.den;
+    duration_ = 0;
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    // Time since last timestamp = duration.
+    aom_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
+
+    if (duration > 1) {
+      // If first drop not set and we have a drop set it to this time.
+      if (!first_drop_) first_drop_ = last_pts_ + 1;
+      // Update the number of frame drops.
+      num_drops_ += static_cast<int>(duration - 1);
+      // Update counter for total number of frames (#frames input to encoder).
+      // Needed for setting the proper layer_id below.
+      tot_frame_number_ += static_cast<int>(duration - 1);
+    }
+
+    // Add to the buffer the bits we'd expect from a constant bitrate server.
+    bits_in_buffer_model_ += static_cast<int64_t>(
+        duration * timebase_ * cfg_.rc_target_bitrate * 1000);
+
+    // Buffer should not go negative.
+    ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
+                                        << pkt->data.frame.pts;
+
+    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+
+    // Update the total encoded bits.
+    bits_total_ += frame_size_in_bits;
+
+    // Update the most recent pts.
+    last_pts_ = pkt->data.frame.pts;
+    ++frame_number_;
+    ++tot_frame_number_;
+  }
+
+  virtual void EndPassHook(void) {
+    duration_ = (last_pts_ + 1) * timebase_;
+    // Effective file datarate:
+    effective_datarate_ = (bits_total_ / 1000.0) / duration_;
+  }
+
+  aom_codec_pts_t last_pts_;
+  double timebase_;
+  int frame_number_;      // Counter for number of non-dropped/encoded frames.
+  int tot_frame_number_;  // Counter for total number of input frames.
+  int64_t bits_total_;
+  double duration_;
+  double effective_datarate_;
+  int set_cpu_used_;
+  int64_t bits_in_buffer_model_;
+  aom_codec_pts_t first_drop_;
+  int num_drops_;
+  int denoiser_on_;
+  int denoiser_offon_test_;
+  int denoiser_offon_period_;
+};
+
+// Check basic rate targeting for VBR mode.
+TEST_P(DatarateTestLarge, BasicRateTargetingVBR) {
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.g_error_resilient = 0;
+  cfg_.rc_end_usage = AOM_VBR;
+  cfg_.g_lag_in_frames = 0;
+
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 140);
+  for (int i = 400; i <= 800; i += 400) {
+    cfg_.rc_target_bitrate = i;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.75)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.25)
+        << " The datarate for the file is greater than target by too much!";
+  }
+}
+
+// Check basic rate targeting for CBR,
+TEST_P(DatarateTestLarge, BasicRateTargeting) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = AOM_CBR;
+  cfg_.g_lag_in_frames = 0;
+
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 140);
+  for (int i = 150; i < 800; i += 400) {
+    cfg_.rc_target_bitrate = i;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
+        << " The datarate for the file is greater than target by too much!";
+  }
+}
+
+// Check basic rate targeting for CBR.
+TEST_P(DatarateTestLarge, BasicRateTargeting444) {
+  ::libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
+
+  cfg_.g_profile = 1;
+  cfg_.g_timebase = video.timebase();
+
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = AOM_CBR;
+
+  for (int i = 250; i < 900; i += 400) {
+    cfg_.rc_target_bitrate = i;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 0.85)
+        << " The datarate for the file exceeds the target by too much!";
+    ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
+              effective_datarate_ * 1.15)
+        << " The datarate for the file missed the target!"
+        << cfg_.rc_target_bitrate << " " << effective_datarate_;
+  }
+}
+
+// Check that (1) the first dropped frame gets earlier and earlier
+// as the drop frame threshold is increased, and (2) that the total number of
+// frame drops does not decrease as we increase frame drop threshold.
+// Use a lower qp-max to force some frame drops.
+TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_undershoot_pct = 20;
+  cfg_.rc_undershoot_pct = 20;
+  cfg_.rc_dropframe_thresh = 10;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 50;
+  cfg_.rc_end_usage = AOM_CBR;
+  cfg_.rc_target_bitrate = 200;
+  cfg_.g_lag_in_frames = 0;
+  // TODO(marpan): Investigate datarate target failures with a smaller keyframe
+  // interval (128).
+  cfg_.kf_max_dist = 9999;
+
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  const int kDropFrameThreshTestStep = 30;
+  aom_codec_pts_t last_drop = 140;
+  int last_num_drops = 0;
+  for (int i = 40; i < 100; i += kDropFrameThreshTestStep) {
+    cfg_.rc_dropframe_thresh = i;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
+        << " The datarate for the file is lower than target by too much!";
+    ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
+        << " The datarate for the file is greater than target by too much!";
+    ASSERT_LE(first_drop_, last_drop)
+        << " The first dropped frame for drop_thresh " << i
+        << " > first dropped frame for drop_thresh "
+        << i - kDropFrameThreshTestStep;
+    ASSERT_GE(num_drops_, last_num_drops * 0.85)
+        << " The number of dropped frames for drop_thresh " << i
+        << " < number of dropped frames for drop_thresh "
+        << i - kDropFrameThreshTestStep;
+    last_drop = first_drop_;
+    last_num_drops = num_drops_;
+  }
+}
+
+AV1_INSTANTIATE_TEST_CASE(DatarateTestLarge,
+                          ::testing::Values(::libaom_test::kOnePassGood,
+                                            ::libaom_test::kRealTime),
+                          ::testing::Range(2, 9, 2));
+}  // namespace
diff --git a/third_party/aom/test/dct16x16_test.cc b/third_party/aom/test/dct16x16_test.cc
new file mode 100644
index 000000000..89263ce89
--- /dev/null
+++ b/third_party/aom/test/dct16x16_test.cc
@@ -0,0 +1,876 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "av1/common/scan.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/msvc.h"  // for round()
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+const int kNumCoeffs = 256;
+const double C1 = 0.995184726672197;
+const double C2 = 0.98078528040323;
+const double C3 = 0.956940335732209;
+const double C4 = 0.923879532511287;
+const double C5 = 0.881921264348355;
+const double C6 = 0.831469612302545;
+const double C7 = 0.773010453362737;
+const double C8 = 0.707106781186548;
+const double C9 = 0.634393284163646;
+const double C10 = 0.555570233019602;
+const double C11 = 0.471396736825998;
+const double C12 = 0.38268343236509;
+const double C13 = 0.290284677254462;
+const double C14 = 0.195090322016128;
+const double C15 = 0.098017140329561;
+
+void butterfly_16x16_dct_1d(double input[16], double output[16]) {
+  double step[16];
+  double intermediate[16];
+  double temp1, temp2;
+
+  // step 1
+  step[0] = input[0] + input[15];
+  step[1] = input[1] + input[14];
+  step[2] = input[2] + input[13];
+  step[3] = input[3] + input[12];
+  step[4] = input[4] + input[11];
+  step[5] = input[5] + input[10];
+  step[6] = input[6] + input[9];
+  step[7] = input[7] + input[8];
+  step[8] = input[7] - input[8];
+  step[9] = input[6] - input[9];
+  step[10] = input[5] - input[10];
+  step[11] = input[4] - input[11];
+  step[12] = input[3] - input[12];
+  step[13] = input[2] - input[13];
+  step[14] = input[1] - input[14];
+  step[15] = input[0] - input[15];
+
+  // step 2
+  output[0] = step[0] + step[7];
+  output[1] = step[1] + step[6];
+  output[2] = step[2] + step[5];
+  output[3] = step[3] + step[4];
+  output[4] = step[3] - step[4];
+  output[5] = step[2] - step[5];
+  output[6] = step[1] - step[6];
+  output[7] = step[0] - step[7];
+
+  temp1 = step[8] * C7;
+  temp2 = step[15] * C9;
+  output[8] = temp1 + temp2;
+
+  temp1 = step[9] * C11;
+  temp2 = step[14] * C5;
+  output[9] = temp1 - temp2;
+
+  temp1 = step[10] * C3;
+  temp2 = step[13] * C13;
+  output[10] = temp1 + temp2;
+
+  temp1 = step[11] * C15;
+  temp2 = step[12] * C1;
+  output[11] = temp1 - temp2;
+
+  temp1 = step[11] * C1;
+  temp2 = step[12] * C15;
+  output[12] = temp2 + temp1;
+
+  temp1 = step[10] * C13;
+  temp2 = step[13] * C3;
+  output[13] = temp2 - temp1;
+
+  temp1 = step[9] * C5;
+  temp2 = step[14] * C11;
+  output[14] = temp2 + temp1;
+
+  temp1 = step[8] * C9;
+  temp2 = step[15] * C7;
+  output[15] = temp2 - temp1;
+
+  // step 3
+  step[0] = output[0] + output[3];
+  step[1] = output[1] + output[2];
+  step[2] = output[1] - output[2];
+  step[3] = output[0] - output[3];
+
+  temp1 = output[4] * C14;
+  temp2 = output[7] * C2;
+  step[4] = temp1 + temp2;
+
+  temp1 = output[5] * C10;
+  temp2 = output[6] * C6;
+  step[5] = temp1 + temp2;
+
+  temp1 = output[5] * C6;
+  temp2 = output[6] * C10;
+  step[6] = temp2 - temp1;
+
+  temp1 = output[4] * C2;
+  temp2 = output[7] * C14;
+  step[7] = temp2 - temp1;
+
+  step[8] = output[8] + output[11];
+  step[9] = output[9] + output[10];
+  step[10] = output[9] - output[10];
+  step[11] = output[8] - output[11];
+
+  step[12] = output[12] + output[15];
+  step[13] = output[13] + output[14];
+  step[14] = output[13] - output[14];
+  step[15] = output[12] - output[15];
+
+  // step 4
+  output[0] = (step[0] + step[1]);
+  output[8] = (step[0] - step[1]);
+
+  temp1 = step[2] * C12;
+  temp2 = step[3] * C4;
+  temp1 = temp1 + temp2;
+  output[4] = 2 * (temp1 * C8);
+
+  temp1 = step[2] * C4;
+  temp2 = step[3] * C12;
+  temp1 = temp2 - temp1;
+  output[12] = 2 * (temp1 * C8);
+
+  output[2] = 2 * ((step[4] + step[5]) * C8);
+  output[14] = 2 * ((step[7] - step[6]) * C8);
+
+  temp1 = step[4] - step[5];
+  temp2 = step[6] + step[7];
+  output[6] = (temp1 + temp2);
+  output[10] = (temp1 - temp2);
+
+  intermediate[8] = step[8] + step[14];
+  intermediate[9] = step[9] + step[15];
+
+  temp1 = intermediate[8] * C12;
+  temp2 = intermediate[9] * C4;
+  temp1 = temp1 - temp2;
+  output[3] = 2 * (temp1 * C8);
+
+  temp1 = intermediate[8] * C4;
+  temp2 = intermediate[9] * C12;
+  temp1 = temp2 + temp1;
+  output[13] = 2 * (temp1 * C8);
+
+  output[9] = 2 * ((step[10] + step[11]) * C8);
+
+  intermediate[11] = step[10] - step[11];
+  intermediate[12] = step[12] + step[13];
+  intermediate[13] = step[12] - step[13];
+  intermediate[14] = step[8] - step[14];
+  intermediate[15] = step[9] - step[15];
+
+  output[15] = (intermediate[11] + intermediate[12]);
+  output[1] = -(intermediate[11] - intermediate[12]);
+
+  output[7] = 2 * (intermediate[13] * C8);
+
+  temp1 = intermediate[14] * C12;
+  temp2 = intermediate[15] * C4;
+  temp1 = temp1 - temp2;
+  output[11] = -2 * (temp1 * C8);
+
+  temp1 = intermediate[14] * C4;
+  temp2 = intermediate[15] * C12;
+  temp1 = temp2 + temp1;
+  output[5] = 2 * (temp1 * C8);
+}
+
+void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
+  // First transform columns
+  for (int i = 0; i < 16; ++i) {
+    double temp_in[16], temp_out[16];
+    for (int j = 0; j < 16; ++j) temp_in[j] = input[j * 16 + i];
+    butterfly_16x16_dct_1d(temp_in, temp_out);
+    for (int j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j];
+  }
+  // Then transform rows
+  for (int i = 0; i < 16; ++i) {
+    double temp_in[16], temp_out[16];
+    for (int j = 0; j < 16; ++j) temp_in[j] = output[j + i * 16];
+    butterfly_16x16_dct_1d(temp_in, temp_out);
+    // Scale by some magic number
+    for (int j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j] / 2;
+  }
+}
+
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
+                        int tx_type);
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t> Dct16x16Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t> Ht16x16Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, int, aom_bit_depth_t>
+    Idct16x16Param;
+
+void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
+                   int /*tx_type*/) {
+  aom_fdct16x16_c(in, out, stride);
+}
+
+void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
+                   int /*tx_type*/) {
+  aom_idct16x16_256_add_c(in, dest, stride);
+}
+
+void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht16x16_c(in, out, stride, tx_type);
+}
+
+void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
+                  int tx_type) {
+  av1_iht16x16_256_add_c(in, dest, stride, tx_type);
+}
+
+#if CONFIG_HIGHBITDEPTH
+void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
+}
+
+void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+class Trans16x16TestBase {
+ public:
+  virtual ~Trans16x16TestBase() {}
+
+ protected:
+  virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
+
+  virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
+
+  void RunAccuracyCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    uint32_t max_error = 0;
+    int64_t total_error = 0;
+    const int count_test_block = 10000;
+    for (int i = 0; i < count_test_block; ++i) {
+      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+#if CONFIG_HIGHBITDEPTH
+      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+#endif
+
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          test_input_block[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          test_input_block[j] = src16[j] - dst16[j];
+#endif
+        }
+      }
+
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+      if (bit_depth_ == AOM_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int32_t diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        const int32_t diff = dst[j] - src[j];
+#endif
+        const uint32_t error = diff * diff;
+        if (max_error < error) max_error = error;
+        total_error += error;
+      }
+    }
+
+    EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
+        << "Error: 16x16 FHT/IHT has an individual round trip error > 1";
+
+    EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
+        << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
+  }
+
+  void RunCoeffCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 1000;
+    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < kNumCoeffs; ++j)
+        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+
+      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+
+      // The minimum quant value is 4.
+      for (int j = 0; j < kNumCoeffs; ++j)
+        EXPECT_EQ(output_block[j], output_ref_block[j]);
+    }
+  }
+
+  void RunMemCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 1000;
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
+      }
+      if (i == 0) {
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
+      } else if (i == 1) {
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
+      }
+
+      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(input_extreme_block, output_block, pitch_));
+
+      // The minimum quant value is 4.
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        EXPECT_EQ(output_block[j], output_ref_block[j]);
+        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
+            << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
+      }
+    }
+  }
+
+  void RunQuantCheck(int dc_thred, int ac_thred) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 100000;
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+#if CONFIG_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
+      }
+      if (i == 0)
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
+      if (i == 1)
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
+
+      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
+
+      // clear reconstructed pixel buffers
+      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
+      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
+#if CONFIG_HIGHBITDEPTH
+      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
+      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
+#endif
+
+      // quantization with maximum allowed step sizes
+      output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
+      for (int j = 1; j < kNumCoeffs; ++j)
+        output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
+      if (bit_depth_ == AOM_BITS_8) {
+        inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
+                     tx_type_);
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+      if (bit_depth_ == AOM_BITS_8) {
+        for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref[j], dst[j]);
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref16[j], dst16[j]);
+#endif
+      }
+    }
+  }
+
+  void RunInvAccuracyCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 1000;
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+#if CONFIG_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+#endif  // CONFIG_HIGHBITDEPTH
+
+    for (int i = 0; i < count_test_block; ++i) {
+      double out_r[kNumCoeffs];
+
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          in[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          in[j] = src16[j] - dst16[j];
+#endif  // CONFIG_HIGHBITDEPTH
+        }
+      }
+
+      reference_16x16_dct_2d(in, out_r);
+      for (int j = 0; j < kNumCoeffs; ++j)
+        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
+
+      if (bit_depth_ == AOM_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16));
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        const int diff = dst[j] - src[j];
+#endif  // CONFIG_HIGHBITDEPTH
+        const uint32_t error = diff * diff;
+        EXPECT_GE(1u, error) << "Error: 16x16 IDCT has error " << error
+                             << " at index " << j;
+      }
+    }
+  }
+
+  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 10000;
+    const int eob = 10;
+    const int16_t *scan = av1_default_scan_orders[TX_16X16].scan;
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+#if CONFIG_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif  // CONFIG_HIGHBITDEPTH
+
+    for (int i = 0; i < count_test_block; ++i) {
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (j < eob) {
+          // Random values less than the threshold, either positive or negative
+          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
+        } else {
+          coeff[scan[j]] = 0;
+        }
+        if (bit_depth_ == AOM_BITS_8) {
+          dst[j] = 0;
+          ref[j] = 0;
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          dst16[j] = 0;
+          ref16[j] = 0;
+#endif  // CONFIG_HIGHBITDEPTH
+        }
+      }
+      if (bit_depth_ == AOM_BITS_8) {
+        ref_txfm(coeff, ref, pitch_);
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+      } else {
+#if CONFIG_HIGHBITDEPTH
+        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
+#else
+        const int diff = dst[j] - ref[j];
+#endif  // CONFIG_HIGHBITDEPTH
+        const uint32_t error = diff * diff;
+        EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error "
+                             << error << " at index " << j;
+      }
+    }
+  }
+
+  int pitch_;
+  int tx_type_;
+  aom_bit_depth_t bit_depth_;
+  int mask_;
+  FhtFunc fwd_txfm_ref;
+  IhtFunc inv_txfm_ref;
+};
+
+class Trans16x16DCT : public Trans16x16TestBase,
+                      public ::testing::TestWithParam<Dct16x16Param> {
+ public:
+  virtual ~Trans16x16DCT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    bit_depth_ = GET_PARAM(3);
+    pitch_ = 16;
+    fwd_txfm_ref = fdct16x16_ref;
+    inv_txfm_ref = idct16x16_ref;
+    mask_ = (1 << bit_depth_) - 1;
+    inv_txfm_ref = idct16x16_ref;
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  FdctFunc fwd_txfm_;
+  IdctFunc inv_txfm_;
+};
+
+TEST_P(Trans16x16DCT, AccuracyCheck) { RunAccuracyCheck(); }
+
+TEST_P(Trans16x16DCT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans16x16DCT, QuantCheck) {
+  // Use maximally allowed quantization step sizes for DC and AC
+  // coefficients respectively.
+  RunQuantCheck(1336, 1828);
+}
+
+TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
+
+class Trans16x16HT : public Trans16x16TestBase,
+                     public ::testing::TestWithParam<Ht16x16Param> {
+ public:
+  virtual ~Trans16x16HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    bit_depth_ = GET_PARAM(3);
+    pitch_ = 16;
+    fwd_txfm_ref = fht16x16_ref;
+    inv_txfm_ref = iht16x16_ref;
+    mask_ = (1 << bit_depth_) - 1;
+#if CONFIG_HIGHBITDEPTH
+    switch (bit_depth_) {
+      case AOM_BITS_10: inv_txfm_ref = iht16x16_10; break;
+      case AOM_BITS_12: inv_txfm_ref = iht16x16_12; break;
+      default: inv_txfm_ref = iht16x16_ref; break;
+    }
+#else
+    inv_txfm_ref = iht16x16_ref;
+#endif
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(); }
+
+TEST_P(Trans16x16HT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans16x16HT, QuantCheck) {
+  // The encoder skips any non-DC intra prediction modes,
+  // when the quantization step size goes beyond 988.
+  RunQuantCheck(429, 729);
+}
+
+class InvTrans16x16DCT : public Trans16x16TestBase,
+                         public ::testing::TestWithParam<Idct16x16Param> {
+ public:
+  virtual ~InvTrans16x16DCT() {}
+
+  virtual void SetUp() {
+    ref_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    thresh_ = GET_PARAM(2);
+    bit_depth_ = GET_PARAM(3);
+    pitch_ = 16;
+    mask_ = (1 << bit_depth_) - 1;
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {}
+  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  IdctFunc ref_txfm_;
+  IdctFunc inv_txfm_;
+  int thresh_;
+};
+
+TEST_P(InvTrans16x16DCT, CompareReference) {
+  CompareInvReference(ref_txfm_, thresh_);
+}
+
+class PartialTrans16x16Test : public ::testing::TestWithParam<
+                                  std::tr1::tuple<FdctFunc, aom_bit_depth_t> > {
+ public:
+  virtual ~PartialTrans16x16Test() {}
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    bit_depth_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  aom_bit_depth_t bit_depth_;
+  FdctFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans16x16Test, Extremes) {
+#if CONFIG_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  const int minval = -maxval;
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]);
+}
+
+TEST_P(PartialTrans16x16Test, Random) {
+#if CONFIG_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  int sum = 0;
+  for (int i = 0; i < kNumCoeffs; ++i) {
+    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
+    input[i] = val;
+    sum += val;
+  }
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ(sum >> 1, output[0]);
+}
+
+using std::tr1::make_tuple;
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_c,
+                                                     &aom_idct16x16_256_add_c,
+                                                     0, AOM_BITS_8)));
+#else
+INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_c,
+                                                     &aom_idct16x16_256_add_c,
+                                                     0, AOM_BITS_8)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, Trans16x16HT,
+    ::testing::Values(
+        make_tuple(&av1_highbd_fht16x16_c, &iht16x16_10, 0, AOM_BITS_10),
+        make_tuple(&av1_highbd_fht16x16_c, &iht16x16_10, 1, AOM_BITS_10),
+        make_tuple(&av1_highbd_fht16x16_c, &iht16x16_10, 2, AOM_BITS_10),
+        make_tuple(&av1_highbd_fht16x16_c, &iht16x16_10, 3, AOM_BITS_10),
+        make_tuple(&av1_highbd_fht16x16_c, &iht16x16_12, 0, AOM_BITS_12),
+        make_tuple(&av1_highbd_fht16x16_c, &iht16x16_12, 1, AOM_BITS_12),
+        make_tuple(&av1_highbd_fht16x16_c, &iht16x16_12, 2, AOM_BITS_12),
+        make_tuple(&av1_highbd_fht16x16_c, &iht16x16_12, 3, AOM_BITS_12),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 0, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 1, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 3, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    C, PartialTrans16x16Test,
+    ::testing::Values(make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_8),
+                      make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_10),
+                      make_tuple(&aom_highbd_fdct16x16_1_c, AOM_BITS_12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    C, Trans16x16HT,
+    ::testing::Values(
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 0, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 1, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 3, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_1_c,
+                                                     AOM_BITS_8)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    NEON, Trans16x16DCT,
+    ::testing::Values(make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_neon,
+                                 0, AOM_BITS_8)));
+#endif
+
+#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans16x16DCT,
+    ::testing::Values(make_tuple(&aom_fdct16x16_sse2,
+                                 &aom_idct16x16_256_add_sse2, 0, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans16x16HT,
+    ::testing::Values(make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
+                                 0, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
+                                 1, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
+                                 2, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
+                                 3, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_1_sse2,
+                                                     AOM_BITS_8)));
+#endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(AVX2, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_1_avx2,
+                                                     AOM_BITS_8)));
+#endif  // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSE2, Trans16x16DCT,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_sse2,
+                                                     &aom_idct16x16_256_add_c,
+                                                     0, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans16x16HT,
+    ::testing::Values(
+        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 0, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 1, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 3,
+                   AOM_BITS_8)));
+// TODO(luoyi):
+// For this test case, we should test function: aom_highbd_fdct16x16_1_sse2.
+// However this function is not available yet. if we mistakely test
+// aom_fdct16x16_1_sse2, it could only pass AOM_BITS_8/AOM_BITS_10 but not
+// AOM_BITS_12.
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_1_sse2,
+                                                     AOM_BITS_8)));
+#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+
+#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_msa,
+                                                     &aom_idct16x16_256_add_msa,
+                                                     0, AOM_BITS_8)));
+#if !CONFIG_EXT_TX
+// TODO(yaowu): re-enable this after msa versions are updated to match C.
+INSTANTIATE_TEST_CASE_P(
+    DISABLED_MSA, Trans16x16HT,
+    ::testing::Values(
+        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 0, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 1, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 2, AOM_BITS_8),
+        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 3,
+                   AOM_BITS_8)));
+#endif  // !CONFIG_EXT_TX
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&aom_fdct16x16_1_msa,
+                                                     AOM_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/dct32x32_test.cc b/third_party/aom/test/dct32x32_test.cc
new file mode 100644
index 000000000..7c1db6501
--- /dev/null
+++ b/third_party/aom/test/dct32x32_test.cc
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/msvc.h"  // for round()
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+const int kNumCoeffs = 1024;
+const double kPi = 3.141592653589793238462643383279502884;
+void reference_32x32_dct_1d(const double in[32], double out[32]) {
+  const double kInvSqrt2 = 0.707106781186547524400844362104;
+  for (int k = 0; k < 32; k++) {
+    out[k] = 0.0;
+    for (int n = 0; n < 32; n++)
+      out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
+    if (k == 0) out[k] = out[k] * kInvSqrt2;
+  }
+}
+
+void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
+                            double output[kNumCoeffs]) {
+  // First transform columns
+  for (int i = 0; i < 32; ++i) {
+    double temp_in[32], temp_out[32];
+    for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i];
+    reference_32x32_dct_1d(temp_in, temp_out);
+    for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j];
+  }
+  // Then transform rows
+  for (int i = 0; i < 32; ++i) {
+    double temp_in[32], temp_out[32];
+    for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
+    reference_32x32_dct_1d(temp_in, temp_out);
+    // Scale by some magic number
+    for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4;
+  }
+}
+
+typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
+
+typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, aom_bit_depth_t>
+    Trans32x32Param;
+
+class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
+ public:
+  virtual ~Trans32x32Test() {}
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    version_ = GET_PARAM(2);  // 0: high precision forward transform
+                              // 1: low precision version for rd loop
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int version_;
+  aom_bit_depth_t bit_depth_;
+  int mask_;
+  FwdTxfmFunc fwd_txfm_;
+  InvTxfmFunc inv_txfm_;
+};
+
+TEST_P(Trans32x32Test, AccuracyCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  uint32_t max_error = 0;
+  int64_t total_error = 0;
+  const int count_test_block = 10000;
+  DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+#if CONFIG_HIGHBITDEPTH
+  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+#endif
+
+  for (int i = 0; i < count_test_block; ++i) {
+    // Initialize a test block with input range [-mask_, mask_].
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      if (bit_depth_ == AOM_BITS_8) {
+        src[j] = rnd.Rand8();
+        dst[j] = rnd.Rand8();
+        test_input_block[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        src16[j] = rnd.Rand16() & mask_;
+        dst16[j] = rnd.Rand16() & mask_;
+        test_input_block[j] = src16[j] - dst16[j];
+#endif
+      }
+    }
+
+    ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
+    if (bit_depth_ == AOM_BITS_8) {
+      ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
+#if CONFIG_HIGHBITDEPTH
+    } else {
+      ASM_REGISTER_STATE_CHECK(
+          inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32));
+#endif
+    }
+
+    for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_HIGHBITDEPTH
+      const int32_t diff =
+          bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+      const int32_t diff = dst[j] - src[j];
+#endif
+      const uint32_t error = diff * diff;
+      if (max_error < error) max_error = error;
+      total_error += error;
+    }
+  }
+
+  if (version_ == 1) {
+    max_error /= 2;
+    total_error /= 45;
+  }
+
+  EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
+      << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
+
+  EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
+      << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
+}
+
+TEST_P(Trans32x32Test, CoeffCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 1000;
+
+  DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+
+  for (int i = 0; i < count_test_block; ++i) {
+    for (int j = 0; j < kNumCoeffs; ++j)
+      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+
+    const int stride = 32;
+    aom_fdct32x32_c(input_block, output_ref_block, stride);
+    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
+
+    if (version_ == 0) {
+      for (int j = 0; j < kNumCoeffs; ++j)
+        EXPECT_EQ(output_block[j], output_ref_block[j])
+            << "Error: 32x32 FDCT versions have mismatched coefficients";
+    } else {
+      for (int j = 0; j < kNumCoeffs; ++j)
+        EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
+            << "Error: 32x32 FDCT rd has mismatched coefficients";
+    }
+  }
+}
+
+TEST_P(Trans32x32Test, MemCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 2000;
+
+  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+
+  for (int i = 0; i < count_test_block; ++i) {
+    // Initialize a test block with input range [-mask_, mask_].
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
+    }
+    if (i == 0) {
+      for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
+    } else if (i == 1) {
+      for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
+    }
+
+    const int stride = 32;
+    aom_fdct32x32_c(input_extreme_block, output_ref_block, stride);
+    ASM_REGISTER_STATE_CHECK(
+        fwd_txfm_(input_extreme_block, output_block, stride));
+
+    // The minimum quant value is 4.
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      if (version_ == 0) {
+        EXPECT_EQ(output_block[j], output_ref_block[j])
+            << "Error: 32x32 FDCT versions have mismatched coefficients";
+      } else {
+        EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
+            << "Error: 32x32 FDCT rd has mismatched coefficients";
+      }
+      EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
+          << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
+      EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
+          << "Error: 32x32 FDCT has coefficient larger than "
+          << "4*DCT_MAX_VALUE";
+    }
+  }
+}
+
+TEST_P(Trans32x32Test, InverseAccuracy) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 1000;
+  DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+#if CONFIG_HIGHBITDEPTH
+  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+#endif
+
+  for (int i = 0; i < count_test_block; ++i) {
+    double out_r[kNumCoeffs];
+
+    // Initialize a test block with input range [-255, 255]
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      if (bit_depth_ == AOM_BITS_8) {
+        src[j] = rnd.Rand8();
+        dst[j] = rnd.Rand8();
+        in[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        src16[j] = rnd.Rand16() & mask_;
+        dst16[j] = rnd.Rand16() & mask_;
+        in[j] = src16[j] - dst16[j];
+#endif
+      }
+    }
+
+    reference_32x32_dct_2d(in, out_r);
+    for (int j = 0; j < kNumCoeffs; ++j)
+      coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
+    if (bit_depth_ == AOM_BITS_8) {
+      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
+#if CONFIG_HIGHBITDEPTH
+    } else {
+      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
+#endif
+    }
+    for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_HIGHBITDEPTH
+      const int diff =
+          bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+      const int diff = dst[j] - src[j];
+#endif
+      const int error = diff * diff;
+      EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error
+                          << " at index " << j;
+    }
+  }
+}
+
+class PartialTrans32x32Test
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<FwdTxfmFunc, aom_bit_depth_t> > {
+ public:
+  virtual ~PartialTrans32x32Test() {}
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    bit_depth_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  aom_bit_depth_t bit_depth_;
+  FwdTxfmFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans32x32Test, Extremes) {
+#if CONFIG_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  const int minval = -maxval;
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
+}
+
+TEST_P(PartialTrans32x32Test, Random) {
+#if CONFIG_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  int sum = 0;
+  for (int i = 0; i < kNumCoeffs; ++i) {
+    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
+    input[i] = val;
+    sum += val;
+  }
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ(sum >> 3, output[0]);
+}
+
+using std::tr1::make_tuple;
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c, 0,
+                                 AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_c, &aom_idct32x32_1024_add_c,
+                                 1, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    C, PartialTrans32x32Test,
+    ::testing::Values(make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_8),
+                      make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_10),
+                      make_tuple(&aom_highbd_fdct32x32_1_c, AOM_BITS_12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    C, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c, 0,
+                                 AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_c, &aom_idct32x32_1024_add_c,
+                                 1, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&aom_fdct32x32_1_c,
+                                                     AOM_BITS_8)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if HAVE_NEON && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    NEON, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_neon,
+                                 0, AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_c,
+                                 &aom_idct32x32_1024_add_neon, 1, AOM_BITS_8)));
+#endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_sse2,
+                                 &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_sse2,
+                                 &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&aom_fdct32x32_1_sse2,
+                                                     AOM_BITS_8)));
+#endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(AVX2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&aom_fdct32x32_1_avx2,
+                                                     AOM_BITS_8)));
+#endif  // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_sse2, &aom_idct32x32_1024_add_c,
+                                 0, AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_sse2,
+                                 &aom_idct32x32_1024_add_c, 1, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&aom_fdct32x32_1_sse2,
+                                                     AOM_BITS_8)));
+#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+
+#if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    AVX2, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_avx2,
+                                 &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_avx2,
+                                 &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
+#endif  // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_AVX2 && CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    AVX2, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_avx2,
+                                 &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_avx2,
+                                 &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
+#endif  // HAVE_AVX2 && CONFIG_HIGHBITDEPTH
+
+#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_msa,
+                                 &aom_idct32x32_1024_add_msa, 0, AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_msa,
+                                 &aom_idct32x32_1024_add_msa, 1, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&aom_fdct32x32_1_msa,
+                                                     AOM_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/decode_api_test.cc b/third_party/aom/test/decode_api_test.cc
new file mode 100644
index 000000000..6bd72a45d
--- /dev/null
+++ b/third_party/aom/test/decode_api_test.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "test/ivf_video_source.h"
+#include "aom/aomdx.h"
+#include "aom/aom_decoder.h"
+
+namespace {
+
+#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+
+TEST(DecodeAPI, InvalidParams) {
+  static const aom_codec_iface_t *kCodecs[] = {
+#if CONFIG_AV1_DECODER
+    &aom_codec_av1_dx_algo,
+#endif
+  };
+  uint8_t buf[1] = { 0 };
+  aom_codec_ctx_t dec;
+
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_dec_init(NULL, NULL, NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_dec_init(&dec, NULL, NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, NULL, 0, NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, buf, 0, NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_decode(NULL, buf, NELEMENTS(buf), NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_decode(NULL, NULL, NELEMENTS(buf), NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(NULL));
+  EXPECT_TRUE(aom_codec_error(NULL) != NULL);
+
+  for (int i = 0; i < NELEMENTS(kCodecs); ++i) {
+    EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+              aom_codec_dec_init(NULL, kCodecs[i], NULL, 0));
+
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, kCodecs[i], NULL, 0));
+    EXPECT_EQ(AOM_CODEC_UNSUP_BITSTREAM,
+              aom_codec_decode(&dec, buf, NELEMENTS(buf), NULL, 0));
+    EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+              aom_codec_decode(&dec, NULL, NELEMENTS(buf), NULL, 0));
+    EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(&dec, buf, 0, NULL, 0));
+
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec));
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/decode_perf_test.cc b/third_party/aom/test/decode_perf_test.cc
new file mode 100644
index 000000000..ede4f8849
--- /dev/null
+++ b/third_party/aom/test/decode_perf_test.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string>
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/webm_video_source.h"
+#include "aom_ports/aom_timer.h"
+#include "./ivfenc.h"
+#include "./aom_version.h"
+
+using std::tr1::make_tuple;
+
+namespace {
+
+#define VIDEO_NAME 0
+#define THREADS 1
+
+const int kMaxPsnr = 100;
+const double kUsecsInSec = 1000000.0;
+const char kNewEncodeOutputFile[] = "new_encode.ivf";
+
+/*
+ DecodePerfTest takes a tuple of filename + number of threads to decode with
+ */
+typedef std::tr1::tuple<const char *, unsigned> DecodePerfParam;
+
+// TODO(jimbankoski): Add actual test vectors here when available.
+// const DecodePerfParam kAV1DecodePerfVectors[] = {};
+
+/*
+ In order to reflect real world performance as much as possible, Perf tests
+ *DO NOT* do any correctness checks. Please run them alongside correctness
+ tests to ensure proper codec integrity. Furthermore, in this test we
+ deliberately limit the amount of system calls we make to avoid OS
+ preemption.
+
+ TODO(joshualitt) create a more detailed perf measurement test to collect
+   power/temp/min max frame decode times/etc
+ */
+
+class DecodePerfTest : public ::testing::TestWithParam<DecodePerfParam> {};
+
+TEST_P(DecodePerfTest, PerfTest) {
+  const char *const video_name = GET_PARAM(VIDEO_NAME);
+  const unsigned threads = GET_PARAM(THREADS);
+
+  libaom_test::WebMVideoSource video(video_name);
+  video.Init();
+
+  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+  cfg.threads = threads;
+  libaom_test::AV1Decoder decoder(cfg, 0);
+
+  aom_usec_timer t;
+  aom_usec_timer_start(&t);
+
+  for (video.Begin(); video.cxdata() != NULL; video.Next()) {
+    decoder.DecodeFrame(video.cxdata(), video.frame_size());
+  }
+
+  aom_usec_timer_mark(&t);
+  const double elapsed_secs = double(aom_usec_timer_elapsed(&t)) / kUsecsInSec;
+  const unsigned frames = video.frame_number();
+  const double fps = double(frames) / elapsed_secs;
+
+  printf("{\n");
+  printf("\t\"type\" : \"decode_perf_test\",\n");
+  printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+  printf("\t\"videoName\" : \"%s\",\n", video_name);
+  printf("\t\"threadCount\" : %u,\n", threads);
+  printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
+  printf("\t\"totalFrames\" : %u,\n", frames);
+  printf("\t\"framesPerSecond\" : %f\n", fps);
+  printf("}\n");
+}
+
+// TODO(jimbankoski): Enabled when we have actual AV1 Decode vectors.
+// INSTANTIATE_TEST_CASE_P(AV1, DecodePerfTest,
+//                        ::testing::ValuesIn(kAV1DecodePerfVectors));
+
+class AV1NewEncodeDecodePerfTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> {
+ protected:
+  AV1NewEncodeDecodePerfTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), speed_(0),
+        outfile_(0), out_frames_(0) {}
+
+  virtual ~AV1NewEncodeDecodePerfTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 25;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_resize_allowed = 0;
+    cfg_.rc_end_usage = AOM_VBR;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, speed_);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 2);
+    }
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    const std::string data_path = getenv("LIBAOM_TEST_DATA_PATH");
+    const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile;
+    outfile_ = fopen(path_to_source.c_str(), "wb");
+    ASSERT_TRUE(outfile_ != NULL);
+  }
+
+  virtual void EndPassHook() {
+    if (outfile_ != NULL) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_);
+      fclose(outfile_);
+      outfile_ = NULL;
+    }
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0)
+      ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_);
+
+    // Write frame header and data.
+    ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz);
+    ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_),
+              pkt->data.frame.sz);
+  }
+
+  virtual bool DoDecode() { return false; }
+
+  void set_speed(unsigned int speed) { speed_ = speed; }
+
+ private:
+  libaom_test::TestMode encoding_mode_;
+  uint32_t speed_;
+  FILE *outfile_;
+  uint32_t out_frames_;
+};
+
+struct EncodePerfTestVideo {
+  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
+                      uint32_t bitrate_, int frames_)
+      : name(name_), width(width_), height(height_), bitrate(bitrate_),
+        frames(frames_) {}
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = {
+  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
+};
+
+TEST_P(AV1NewEncodeDecodePerfTest, PerfTest) {
+  SetUp();
+
+  // TODO(JBB): Make this work by going through the set of given files.
+  const int i = 0;
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = kAV1EncodePerfTestVectors[i].bitrate;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  const char *video_name = kAV1EncodePerfTestVectors[i].name;
+  libaom_test::I420VideoSource video(
+      video_name, kAV1EncodePerfTestVectors[i].width,
+      kAV1EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0,
+      kAV1EncodePerfTestVectors[i].frames);
+  set_speed(2);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  const uint32_t threads = 4;
+
+  libaom_test::IVFVideoSource decode_video(kNewEncodeOutputFile);
+  decode_video.Init();
+
+  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+  cfg.threads = threads;
+  libaom_test::AV1Decoder decoder(cfg, 0);
+
+  aom_usec_timer t;
+  aom_usec_timer_start(&t);
+
+  for (decode_video.Begin(); decode_video.cxdata() != NULL;
+       decode_video.Next()) {
+    decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size());
+  }
+
+  aom_usec_timer_mark(&t);
+  const double elapsed_secs =
+      static_cast<double>(aom_usec_timer_elapsed(&t)) / kUsecsInSec;
+  const unsigned decode_frames = decode_video.frame_number();
+  const double fps = static_cast<double>(decode_frames) / elapsed_secs;
+
+  printf("{\n");
+  printf("\t\"type\" : \"decode_perf_test\",\n");
+  printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+  printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile);
+  printf("\t\"threadCount\" : %u,\n", threads);
+  printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
+  printf("\t\"totalFrames\" : %u,\n", decode_frames);
+  printf("\t\"framesPerSecond\" : %f\n", fps);
+  printf("}\n");
+}
+
+AV1_INSTANTIATE_TEST_CASE(AV1NewEncodeDecodePerfTest,
+                          ::testing::Values(::libaom_test::kTwoPassGood));
+}  // namespace
diff --git a/third_party/aom/test/decode_test_driver.cc b/third_party/aom/test/decode_test_driver.cc
new file mode 100644
index 000000000..35c28eafd
--- /dev/null
+++ b/third_party/aom/test/decode_test_driver.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/register_state_check.h"
+#include "test/video_source.h"
+
+namespace libaom_test {
+
+const char kVP8Name[] = "WebM Project VP8";
+const char kAV1Name[] = "AOMedia Project AV1 Decoder";
+
+aom_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
+                                    aom_codec_stream_info_t *stream_info) {
+  return aom_codec_peek_stream_info(
+      CodecInterface(), cxdata, static_cast<unsigned int>(size), stream_info);
+}
+
+aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
+  return DecodeFrame(cxdata, size, NULL);
+}
+
+aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
+                                     void *user_priv) {
+  aom_codec_err_t res_dec;
+  InitOnce();
+  API_REGISTER_STATE_CHECK(
+      res_dec = aom_codec_decode(
+          &decoder_, cxdata, static_cast<unsigned int>(size), user_priv, 0));
+  return res_dec;
+}
+
+bool Decoder::IsVP8() const {
+  const char *codec_name = GetDecoderName();
+  return strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0;
+}
+
+bool Decoder::IsAV1() const {
+  const char *codec_name = GetDecoderName();
+  return strncmp(kAV1Name, codec_name, sizeof(kAV1Name) - 1) == 0;
+}
+
+void DecoderTest::HandlePeekResult(Decoder *const decoder,
+                                   CompressedVideoSource *video,
+                                   const aom_codec_err_t res_peek) {
+  const bool is_vp8 = decoder->IsVP8();
+  if (is_vp8) {
+    /* Vp8's implementation of PeekStream returns an error if the frame you
+     * pass it is not a keyframe, so we only expect AOM_CODEC_OK on the first
+     * frame, which must be a keyframe. */
+    if (video->frame_number() == 0)
+      ASSERT_EQ(AOM_CODEC_OK, res_peek) << "Peek return failed: "
+                                        << aom_codec_err_to_string(res_peek);
+  } else {
+    /* The Av1 implementation of PeekStream returns an error only if the
+     * data passed to it isn't a valid Av1 chunk. */
+    ASSERT_EQ(AOM_CODEC_OK, res_peek) << "Peek return failed: "
+                                      << aom_codec_err_to_string(res_peek);
+  }
+}
+
+void DecoderTest::RunLoop(CompressedVideoSource *video,
+                          const aom_codec_dec_cfg_t &dec_cfg) {
+  Decoder *const decoder = codec_->CreateDecoder(dec_cfg, flags_);
+  ASSERT_TRUE(decoder != NULL);
+  bool end_of_file = false;
+
+  // Decode frames.
+  for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file;
+       video->Next()) {
+    PreDecodeFrameHook(*video, decoder);
+
+    aom_codec_stream_info_t stream_info;
+    stream_info.sz = sizeof(stream_info);
+
+    if (video->cxdata() != NULL) {
+      const aom_codec_err_t res_peek = decoder->PeekStream(
+          video->cxdata(), video->frame_size(), &stream_info);
+      HandlePeekResult(decoder, video, res_peek);
+      ASSERT_FALSE(::testing::Test::HasFailure());
+
+      aom_codec_err_t res_dec =
+          decoder->DecodeFrame(video->cxdata(), video->frame_size());
+      if (!HandleDecodeResult(res_dec, decoder)) break;
+    } else {
+      // Signal end of the file to the decoder.
+      const aom_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
+      ASSERT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+      end_of_file = true;
+    }
+
+    DxDataIterator dec_iter = decoder->GetDxData();
+    const aom_image_t *img = NULL;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next()))
+      DecompressedFrameHook(*img, video->frame_number());
+  }
+  delete decoder;
+}
+
+void DecoderTest::RunLoop(CompressedVideoSource *video) {
+  aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t();
+  RunLoop(video, dec_cfg);
+}
+
+void DecoderTest::set_cfg(const aom_codec_dec_cfg_t &dec_cfg) {
+  memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
+}
+
+void DecoderTest::set_flags(const aom_codec_flags_t flags) { flags_ = flags; }
+
+}  // namespace libaom_test
diff --git a/third_party/aom/test/decode_test_driver.h b/third_party/aom/test/decode_test_driver.h
new file mode 100644
index 000000000..e7deb389c
--- /dev/null
+++ b/third_party/aom/test/decode_test_driver.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_DECODE_TEST_DRIVER_H_
+#define TEST_DECODE_TEST_DRIVER_H_
+#include <cstring>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "./aom_config.h"
+#include "aom/aom_decoder.h"
+
+namespace libaom_test {
+
+class CodecFactory;
+class CompressedVideoSource;
+
+// Provides an object to handle decoding output
+class DxDataIterator {
+ public:
+  explicit DxDataIterator(aom_codec_ctx_t *decoder)
+      : decoder_(decoder), iter_(NULL) {}
+
+  const aom_image_t *Next() { return aom_codec_get_frame(decoder_, &iter_); }
+
+ private:
+  aom_codec_ctx_t *decoder_;
+  aom_codec_iter_t iter_;
+};
+
+// Provides a simplified interface to manage one video decoding.
+// Similar to Encoder class, the exact services should be added
+// as more tests are added.
+class Decoder {
+ public:
+  explicit Decoder(aom_codec_dec_cfg_t cfg)
+      : cfg_(cfg), flags_(0), init_done_(false) {
+    memset(&decoder_, 0, sizeof(decoder_));
+  }
+
+  Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag)
+      : cfg_(cfg), flags_(flag), init_done_(false) {
+    memset(&decoder_, 0, sizeof(decoder_));
+  }
+
+  virtual ~Decoder() { aom_codec_destroy(&decoder_); }
+
+  aom_codec_err_t PeekStream(const uint8_t *cxdata, size_t size,
+                             aom_codec_stream_info_t *stream_info);
+
+  aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size);
+
+  aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size,
+                              void *user_priv);
+
+  DxDataIterator GetDxData() { return DxDataIterator(&decoder_); }
+
+  void Control(int ctrl_id, int arg) { Control(ctrl_id, arg, AOM_CODEC_OK); }
+
+  void Control(int ctrl_id, const void *arg) {
+    InitOnce();
+    const aom_codec_err_t res = aom_codec_control_(&decoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError();
+  }
+
+  void Control(int ctrl_id, int arg, aom_codec_err_t expected_value) {
+    InitOnce();
+    const aom_codec_err_t res = aom_codec_control_(&decoder_, ctrl_id, arg);
+    ASSERT_EQ(expected_value, res) << DecodeError();
+  }
+
+  const char *DecodeError() {
+    const char *detail = aom_codec_error_detail(&decoder_);
+    return detail ? detail : aom_codec_error(&decoder_);
+  }
+
+  // Passes the external frame buffer information to libaom.
+  aom_codec_err_t SetFrameBufferFunctions(
+      aom_get_frame_buffer_cb_fn_t cb_get,
+      aom_release_frame_buffer_cb_fn_t cb_release, void *user_priv) {
+    InitOnce();
+    return aom_codec_set_frame_buffer_functions(&decoder_, cb_get, cb_release,
+                                                user_priv);
+  }
+
+  const char *GetDecoderName() const {
+    return aom_codec_iface_name(CodecInterface());
+  }
+
+  bool IsVP8() const;
+
+  bool IsAV1() const;
+
+  aom_codec_ctx_t *GetDecoder() { return &decoder_; }
+
+ protected:
+  virtual aom_codec_iface_t *CodecInterface() const = 0;
+
+  void InitOnce() {
+    if (!init_done_) {
+      const aom_codec_err_t res =
+          aom_codec_dec_init(&decoder_, CodecInterface(), &cfg_, flags_);
+      ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError();
+      init_done_ = true;
+    }
+  }
+
+  aom_codec_ctx_t decoder_;
+  aom_codec_dec_cfg_t cfg_;
+  aom_codec_flags_t flags_;
+  bool init_done_;
+};
+
+// Common test functionality for all Decoder tests.
+class DecoderTest {
+ public:
+  // Main decoding loop
+  virtual void RunLoop(CompressedVideoSource *video);
+  virtual void RunLoop(CompressedVideoSource *video,
+                       const aom_codec_dec_cfg_t &dec_cfg);
+
+  virtual void set_cfg(const aom_codec_dec_cfg_t &dec_cfg);
+  virtual void set_flags(const aom_codec_flags_t flags);
+
+  // Hook to be called before decompressing every frame.
+  virtual void PreDecodeFrameHook(const CompressedVideoSource & /*video*/,
+                                  Decoder * /*decoder*/) {}
+
+  // Hook to be called to handle decode result. Return true to continue.
+  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                                  Decoder *decoder) {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  // Hook to be called on every decompressed frame.
+  virtual void DecompressedFrameHook(const aom_image_t & /*img*/,
+                                     const unsigned int /*frame_number*/) {}
+
+  // Hook to be called on peek result
+  virtual void HandlePeekResult(Decoder *const decoder,
+                                CompressedVideoSource *video,
+                                const aom_codec_err_t res_peek);
+
+ protected:
+  explicit DecoderTest(const CodecFactory *codec)
+      : codec_(codec), cfg_(), flags_(0) {}
+
+  virtual ~DecoderTest() {}
+
+  const CodecFactory *codec_;
+  aom_codec_dec_cfg_t cfg_;
+  aom_codec_flags_t flags_;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_DECODE_TEST_DRIVER_H_
diff --git a/third_party/aom/test/decode_to_md5.sh b/third_party/aom/test/decode_to_md5.sh
new file mode 100755
index 000000000..44c9f5f05
--- /dev/null
+++ b/third_party/aom/test/decode_to_md5.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom decode_to_md5 example. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to decode_to_md5_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+#   $AOM_IVF_FILE and $AV1_IVF_FILE are required.
+decode_to_md5_verify_environment() {
+  if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
+    return 1
+  fi
+}
+
+# Runs decode_to_md5 on $1 and captures the md5 sum for the final frame. $2 is
+# interpreted as codec name and used solely to name the output file. $3 is the
+# expected md5 sum: It must match that of the final frame.
+decode_to_md5() {
+  local decoder="${LIBAOM_BIN_PATH}/decode_to_md5${AOM_TEST_EXE_SUFFIX}"
+  local input_file="$1"
+  local codec="$2"
+  local expected_md5="$3"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"
+
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
+      ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+
+  local md5_last_frame="$(tail -n1 "${output_file}" | awk '{print $1}')"
+  local actual_md5="$(echo "${md5_last_frame}" | awk '{print $1}')"
+  [ "${actual_md5}" = "${expected_md5}" ] || return 1
+}
+
+decode_to_md5_av1() {
+  # expected MD5 sum for the last frame.
+  local expected_md5="26d3ef1d60754a1f6acb603c3763efbe"
+  local file="${AV1_IVF_FILE}"
+
+  if [ "$(av1_decode_available)" = "yes" ]; then
+    if [ ! -e "${AV1_IVF_FILE}" ]; then
+      file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
+      encode_yuv_raw_input_av1 "${file}" --ivf
+    fi
+    decode_to_md5 "${file}" "av1" "${expected_md5}"
+  fi
+}
+
+decode_to_md5_tests="decode_to_md5_av1"
+
+run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}"
diff --git a/third_party/aom/test/decode_with_drops.sh b/third_party/aom/test/decode_with_drops.sh
new file mode 100755
index 000000000..5978312f2
--- /dev/null
+++ b/third_party/aom/test/decode_with_drops.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom decode_with_drops example. To add new tests to
+## this file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to decode_with_drops_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+#   $AOM_IVF_FILE and $AV1_IVF_FILE are required.
+decode_with_drops_verify_environment() {
+  if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
+    return 1
+  fi
+}
+
+# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely
+# to name the output file. $3 is the drop mode, and is passed directly to
+# decode_with_drops.
+decode_with_drops() {
+  local decoder="${LIBAOM_BIN_PATH}/decode_with_drops${AOM_TEST_EXE_SUFFIX}"
+  local input_file="$1"
+  local codec="$2"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
+  local drop_mode="$3"
+
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
+      "${drop_mode}" ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+
+# Decodes $AV1_IVF_FILE while dropping frames, twice: once in sequence mode,
+# and once in pattern mode.
+decode_with_drops_av1() {
+  if [ "$(av1_decode_available)" = "yes" ]; then
+    local file="${AV1_IVF_FILE}"
+    if [ ! -e "${AV1_IVF_FILE}" ]; then
+      file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
+      encode_yuv_raw_input_av1 "${file}" --ivf
+    fi
+    # Drop frames 2 and 3.
+    decode_with_drops "${file}" "av1" "2-3"
+
+    # Test pattern mode: Drop 3 of every 4 frames.
+    decode_with_drops "${file}" "av1" "3/4"
+  fi
+}
+
+decode_with_drops_tests="decode_with_drops_av1"
+
+run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}"
diff --git a/third_party/aom/test/dering_test.cc b/third_party/aom/test/dering_test.cc
new file mode 100644
index 000000000..195a60ff8
--- /dev/null
+++ b/third_party/aom/test/dering_test.cc
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/common/od_dering.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+typedef std::tr1::tuple<od_filter_dering_direction_func,
+                        od_filter_dering_direction_func, int>
+    dering_dir_param_t;
+
+class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
+ public:
+  virtual ~CDEFDeringDirTest() {}
+  virtual void SetUp() {
+    dering = GET_PARAM(0);
+    ref_dering = GET_PARAM(1);
+    bsize = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int bsize;
+  od_filter_dering_direction_func dering;
+  od_filter_dering_direction_func ref_dering;
+};
+
+typedef CDEFDeringDirTest CDEFDeringSpeedTest;
+
+void test_dering(int bsize, int iterations,
+                 od_filter_dering_direction_func dering,
+                 od_filter_dering_direction_func ref_dering) {
+  const int size = 8;
+  const int ysize = size + 2 * OD_FILT_VBORDER;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, s[ysize * OD_FILT_BSTRIDE]);
+  DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
+  DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  int error = 0, threshold = 0, dir;
+  int boundary, damping, depth, bits, level, count,
+      errdepth = 0, errthreshold = 0, errboundary = 0, errdamping = 0;
+  unsigned int pos = 0;
+
+  for (boundary = 0; boundary < 16; boundary++) {
+    for (depth = 8; depth <= 12; depth += 2) {
+      for (damping = 5 + depth - 8; damping < 7 + depth - 8; damping++) {
+        for (count = 0; count < iterations; count++) {
+          for (level = 0; level < (1 << depth) && !error;
+               level += (1 + 4 * !!boundary) << (depth - 8)) {
+            for (bits = 1; bits <= depth && !error; bits++) {
+              for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
+                s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                             (1 << depth) - 1);
+              if (boundary) {
+                if (boundary & 1) {  // Left
+                  for (int i = 0; i < ysize; i++)
+                    for (int j = 0; j < OD_FILT_HBORDER; j++)
+                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+                }
+                if (boundary & 2) {  // Right
+                  for (int i = 0; i < ysize; i++)
+                    for (int j = OD_FILT_HBORDER + size; j < OD_FILT_BSTRIDE;
+                         j++)
+                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+                }
+                if (boundary & 4) {  // Above
+                  for (int i = 0; i < OD_FILT_VBORDER; i++)
+                    for (int j = 0; j < OD_FILT_BSTRIDE; j++)
+                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+                }
+                if (boundary & 8) {  // Below
+                  for (int i = OD_FILT_VBORDER + size; i < ysize; i++)
+                    for (int j = 0; j < OD_FILT_BSTRIDE; j++)
+                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+                }
+              }
+              for (dir = 0; dir < 8; dir++) {
+                for (threshold = 0; threshold < 64 << (depth - 8) && !error;
+                     threshold += (1 + 4 * !!boundary) << (depth - 8)) {
+                  ref_dering(ref_d, size, s + OD_FILT_HBORDER +
+                                              OD_FILT_VBORDER * OD_FILT_BSTRIDE,
+                             threshold, dir, damping);
+                  // If dering and ref_dering are the same, we're just testing
+                  // speed
+                  if (dering != ref_dering)
+                    ASM_REGISTER_STATE_CHECK(dering(
+                        d, size,
+                        s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE,
+                        threshold, dir, damping));
+                  if (ref_dering != dering) {
+                    for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error;
+                         pos++) {
+                      error = ref_d[pos] != d[pos];
+                      errdepth = depth;
+                      errthreshold = threshold;
+                      errboundary = boundary;
+                      errdamping = damping;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  pos--;
+  EXPECT_EQ(0, error) << "Error: CDEFDeringDirTest, SIMD and C mismatch."
+                      << std::endl
+                      << "First error at " << pos % size << "," << pos / size
+                      << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
+                      << ") " << std::endl
+                      << "threshold: " << errthreshold << std::endl
+                      << "damping: " << errdamping << std::endl
+                      << "depth: " << errdepth << std::endl
+                      << "size: " << bsize << std::endl
+                      << "boundary: " << errboundary << std::endl
+                      << std::endl;
+}
+
+void test_dering_speed(int bsize, int iterations,
+                       od_filter_dering_direction_func dering,
+                       od_filter_dering_direction_func ref_dering) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&ref_timer);
+  test_dering(bsize, iterations, ref_dering, ref_dering);
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  test_dering(bsize, iterations, dering, dering);
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+
+#if 0
+  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
+            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
+#endif
+
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CDEFDeringSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+typedef int (*find_dir_t)(const od_dering_in *img, int stride, int32_t *var,
+                          int coeff_shift);
+
+typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
+
+class CDEFDeringFindDirTest
+    : public ::testing::TestWithParam<find_dir_param_t> {
+ public:
+  virtual ~CDEFDeringFindDirTest() {}
+  virtual void SetUp() {
+    finddir = GET_PARAM(0);
+    ref_finddir = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  find_dir_t finddir;
+  find_dir_t ref_finddir;
+};
+
+typedef CDEFDeringFindDirTest CDEFDeringFindDirSpeedTest;
+
+void test_finddir(int (*finddir)(const od_dering_in *img, int stride,
+                                 int32_t *var, int coeff_shift),
+                  int (*ref_finddir)(const od_dering_in *img, int stride,
+                                     int32_t *var, int coeff_shift)) {
+  const int size = 8;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, s[size * size]);
+
+  int error = 0;
+  int depth, bits, level, count, errdepth = 0;
+  int ref_res = 0, res = 0;
+  int32_t ref_var = 0, var = 0;
+
+  for (depth = 8; depth <= 12 && !error; depth += 2) {
+    for (count = 0; count < 512 && !error; count++) {
+      for (level = 0; level < (1 << depth) && !error;
+           level += 1 << (depth - 8)) {
+        for (bits = 1; bits <= depth && !error; bits++) {
+          for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
+            s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                         (1 << depth) - 1);
+          for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
+            ref_res = ref_finddir(s, size, &ref_var, depth - 8);
+          if (finddir != ref_finddir)
+            ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
+          if (ref_finddir != finddir) {
+            if (res != ref_res || var != ref_var) error = 1;
+            errdepth = depth;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: CDEFDeringFindDirTest, SIMD and C mismatch."
+                      << std::endl
+                      << "return: " << res << " : " << ref_res << std::endl
+                      << "var: " << var << " : " << ref_var << std::endl
+                      << "depth: " << errdepth << std::endl
+                      << std::endl;
+}
+
+void test_finddir_speed(int (*finddir)(const od_dering_in *img, int stride,
+                                       int32_t *var, int coeff_shift),
+                        int (*ref_finddir)(const od_dering_in *img, int stride,
+                                           int32_t *var, int coeff_shift)) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&ref_timer);
+  test_finddir(ref_finddir, ref_finddir);
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  test_finddir(finddir, finddir);
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+
+#if 0
+  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
+            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
+#endif
+
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CDEFDeringFindDirSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+TEST_P(CDEFDeringDirTest, TestSIMDNoMismatch) {
+  test_dering(bsize, 1, dering, ref_dering);
+}
+
+TEST_P(CDEFDeringSpeedTest, DISABLED_TestSpeed) {
+  test_dering_speed(bsize, 4, dering, ref_dering);
+}
+
+TEST_P(CDEFDeringFindDirTest, TestSIMDNoMismatch) {
+  test_finddir(finddir, ref_finddir);
+}
+
+TEST_P(CDEFDeringFindDirSpeedTest, DISABLED_TestSpeed) {
+  test_finddir_speed(finddir, ref_finddir);
+}
+
+using std::tr1::make_tuple;
+
+// VS compiling for 32 bit targets does not support vector types in
+// structs as arguments, which makes the v256 type of the intrinsics
+// hard to support, so optimizations for this target are disabled.
+#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, CDEFDeringDirTest,
+    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2,
+                                 &od_filter_dering_direction_4x4_c, 4),
+                      make_tuple(&od_filter_dering_direction_8x8_sse2,
+                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirTest,
+                        ::testing::Values(make_tuple(&od_dir_find8_sse2,
+                                                     &od_dir_find8_c)));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, CDEFDeringDirTest,
+    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3,
+                                 &od_filter_dering_direction_4x4_c, 4),
+                      make_tuple(&od_filter_dering_direction_8x8_ssse3,
+                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirTest,
+                        ::testing::Values(make_tuple(&od_dir_find8_ssse3,
+                                                     &od_dir_find8_c)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, CDEFDeringDirTest,
+    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1,
+                                 &od_filter_dering_direction_4x4_c, 4),
+                      make_tuple(&od_filter_dering_direction_8x8_sse4_1,
+                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirTest,
+                        ::testing::Values(make_tuple(&od_dir_find8_sse4_1,
+                                                     &od_dir_find8_c)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, CDEFDeringDirTest,
+    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon,
+                                 &od_filter_dering_direction_4x4_c, 4),
+                      make_tuple(&od_filter_dering_direction_8x8_neon,
+                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirTest,
+                        ::testing::Values(make_tuple(&od_dir_find8_neon,
+                                                     &od_dir_find8_c)));
+#endif
+
+// Test speed for all supported architectures
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, CDEFDeringSpeedTest,
+    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2,
+                                 &od_filter_dering_direction_4x4_c, 4),
+                      make_tuple(&od_filter_dering_direction_8x8_sse2,
+                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&od_dir_find8_sse2,
+                                                     &od_dir_find8_c)));
+#endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, CDEFDeringSpeedTest,
+    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3,
+                                 &od_filter_dering_direction_4x4_c, 4),
+                      make_tuple(&od_filter_dering_direction_8x8_ssse3,
+                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&od_dir_find8_ssse3,
+                                                     &od_dir_find8_c)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, CDEFDeringSpeedTest,
+    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1,
+                                 &od_filter_dering_direction_4x4_c, 4),
+                      make_tuple(&od_filter_dering_direction_8x8_sse4_1,
+                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&od_dir_find8_sse4_1,
+                                                     &od_dir_find8_c)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, CDEFDeringSpeedTest,
+    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon,
+                                 &od_filter_dering_direction_4x4_c, 4),
+                      make_tuple(&od_filter_dering_direction_8x8_neon,
+                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&od_dir_find8_neon,
+                                                     &od_dir_find8_c)));
+#endif
+
+#endif  // defined(_WIN64) || !defined(_MSC_VER)
+}  // namespace
diff --git a/third_party/aom/test/divu_small_test.cc b/third_party/aom/test/divu_small_test.cc
new file mode 100644
index 000000000..064f8ee45
--- /dev/null
+++ b/third_party/aom/test/divu_small_test.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdlib.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "av1/common/odintrin.h"
+
+using libaom_test::ACMRandom;
+
+TEST(Daala, TestDIVUuptoMAX) {
+  for (int d = 1; d <= OD_DIVU_DMAX; d++) {
+    for (uint32_t x = 1; x <= 1000000; x++) {
+      GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d))
+          << "x=" << x << " d=" << d << " x/d=" << (x / d)
+          << " != " << OD_DIVU_SMALL(x, d);
+    }
+  }
+}
+
+TEST(Daala, TestDIVUrandI31) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int d = 1; d < OD_DIVU_DMAX; d++) {
+    for (int i = 0; i < 1000000; i++) {
+      uint32_t x = rnd.Rand31();
+      GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d))
+          << "x=" << x << " d=" << d << " x/d=" << (x / d)
+          << " != " << OD_DIVU_SMALL(x, d);
+    }
+  }
+}
diff --git a/third_party/aom/test/encode_api_test.cc b/third_party/aom/test/encode_api_test.cc
new file mode 100644
index 000000000..14e43c847
--- /dev/null
+++ b/third_party/aom/test/encode_api_test.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "aom/aomcx.h"
+#include "aom/aom_encoder.h"
+
+namespace {
+
+#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+
+TEST(EncodeAPI, InvalidParams) {
+  static const aom_codec_iface_t *kCodecs[] = {
+#if CONFIG_AV1_ENCODER
+    &aom_codec_av1_cx_algo,
+#endif
+  };
+  uint8_t buf[1] = { 0 };
+  aom_image_t img;
+  aom_codec_ctx_t enc;
+  aom_codec_enc_cfg_t cfg;
+
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, 1, 1, 1, buf));
+
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(NULL, NULL, NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, NULL, NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, NULL, 0, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, &img, 0, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(NULL));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_config_default(NULL, NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+            aom_codec_enc_config_default(NULL, &cfg, 0));
+  EXPECT_TRUE(aom_codec_error(NULL) != NULL);
+
+  for (int i = 0; i < NELEMENTS(kCodecs); ++i) {
+    SCOPED_TRACE(aom_codec_iface_name(kCodecs[i]));
+    EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+              aom_codec_enc_init(NULL, kCodecs[i], NULL, 0));
+    EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+              aom_codec_enc_init(&enc, kCodecs[i], NULL, 0));
+    EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+              aom_codec_enc_config_default(kCodecs[i], &cfg, 1));
+
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(kCodecs[i], &cfg, 0));
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, kCodecs[i], &cfg, 0));
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, NULL, 0, 0, 0, 0));
+
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/encode_perf_test.cc b/third_party/aom/test/encode_perf_test.cc
new file mode 100644
index 000000000..e2a4f2b71
--- /dev/null
+++ b/third_party/aom/test/encode_perf_test.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "./aom_config.h"
+#include "./aom_version.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "aom_ports/aom_timer.h"
+
+namespace {
+
+const int kMaxPsnr = 100;
+const double kUsecsInSec = 1000000.0;
+
+struct EncodePerfTestVideo {
+  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
+                      uint32_t bitrate_, int frames_)
+      : name(name_), width(width_), height(height_), bitrate(bitrate_),
+        frames(frames_) {}
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = {
+  EncodePerfTestVideo("desktop_640_360_30.yuv", 640, 360, 200, 2484),
+  EncodePerfTestVideo("kirland_640_480_30.yuv", 640, 480, 200, 300),
+  EncodePerfTestVideo("macmarcomoving_640_480_30.yuv", 640, 480, 200, 987),
+  EncodePerfTestVideo("macmarcostationary_640_480_30.yuv", 640, 480, 200, 718),
+  EncodePerfTestVideo("niklas_640_480_30.yuv", 640, 480, 200, 471),
+  EncodePerfTestVideo("tacomanarrows_640_480_30.yuv", 640, 480, 200, 300),
+  EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv", 640, 480, 200,
+                      300),
+  EncodePerfTestVideo("thaloundeskmtg_640_480_30.yuv", 640, 480, 200, 300),
+  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
+};
+
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
+const int kEncodePerfTestThreads[] = { 1, 2, 4 };
+
+#define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0]))
+
+class AV1EncodePerfTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> {
+ protected:
+  AV1EncodePerfTest()
+      : EncoderTest(GET_PARAM(0)), min_psnr_(kMaxPsnr), nframes_(0),
+        encoding_mode_(GET_PARAM(1)), speed_(0), threads_(1) {}
+
+  virtual ~AV1EncodePerfTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 0;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_resize_allowed = 0;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_error_resilient = 1;
+    cfg_.g_threads = threads_;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      const int log2_tile_columns = 3;
+      encoder->Control(AOME_SET_CPUUSED, speed_);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, log2_tile_columns);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
+    }
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    min_psnr_ = kMaxPsnr;
+    nframes_ = 0;
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (pkt->data.psnr.psnr[0] < min_psnr_) {
+      min_psnr_ = pkt->data.psnr.psnr[0];
+    }
+  }
+
+  // for performance reasons don't decode
+  virtual bool DoDecode() { return 0; }
+
+  double min_psnr() const { return min_psnr_; }
+
+  void set_speed(unsigned int speed) { speed_ = speed; }
+
+  void set_threads(unsigned int threads) { threads_ = threads; }
+
+ private:
+  double min_psnr_;
+  unsigned int nframes_;
+  libaom_test::TestMode encoding_mode_;
+  unsigned speed_;
+  unsigned int threads_;
+};
+
+TEST_P(AV1EncodePerfTest, PerfTest) {
+  for (size_t i = 0; i < NELEMENTS(kAV1EncodePerfTestVectors); ++i) {
+    for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) {
+      for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) {
+        if (kAV1EncodePerfTestVectors[i].width < 512 &&
+            kEncodePerfTestThreads[k] > 1)
+          continue;
+        else if (kAV1EncodePerfTestVectors[i].width < 1024 &&
+                 kEncodePerfTestThreads[k] > 2)
+          continue;
+
+        set_threads(kEncodePerfTestThreads[k]);
+        SetUp();
+
+        const aom_rational timebase = { 33333333, 1000000000 };
+        cfg_.g_timebase = timebase;
+        cfg_.rc_target_bitrate = kAV1EncodePerfTestVectors[i].bitrate;
+
+        init_flags_ = AOM_CODEC_USE_PSNR;
+
+        const unsigned frames = kAV1EncodePerfTestVectors[i].frames;
+        const char *video_name = kAV1EncodePerfTestVectors[i].name;
+        libaom_test::I420VideoSource video(
+            video_name, kAV1EncodePerfTestVectors[i].width,
+            kAV1EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0,
+            kAV1EncodePerfTestVectors[i].frames);
+        set_speed(kEncodePerfTestSpeeds[j]);
+
+        aom_usec_timer t;
+        aom_usec_timer_start(&t);
+
+        ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+        aom_usec_timer_mark(&t);
+        const double elapsed_secs = aom_usec_timer_elapsed(&t) / kUsecsInSec;
+        const double fps = frames / elapsed_secs;
+        const double minimum_psnr = min_psnr();
+        std::string display_name(video_name);
+        if (kEncodePerfTestThreads[k] > 1) {
+          char thread_count[32];
+          snprintf(thread_count, sizeof(thread_count), "_t-%d",
+                   kEncodePerfTestThreads[k]);
+          display_name += thread_count;
+        }
+
+        printf("{\n");
+        printf("\t\"type\" : \"encode_perf_test\",\n");
+        printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+        printf("\t\"videoName\" : \"%s\",\n", display_name.c_str());
+        printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
+        printf("\t\"totalFrames\" : %u,\n", frames);
+        printf("\t\"framesPerSecond\" : %f,\n", fps);
+        printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
+        printf("\t\"speed\" : %d,\n", kEncodePerfTestSpeeds[j]);
+        printf("\t\"threads\" : %d\n", kEncodePerfTestThreads[k]);
+        printf("}\n");
+      }
+    }
+  }
+}
+
+AV1_INSTANTIATE_TEST_CASE(AV1EncodePerfTest,
+                          ::testing::Values(::libaom_test::kRealTime));
+}  // namespace
diff --git a/third_party/aom/test/encode_test_driver.cc b/third_party/aom/test/encode_test_driver.cc
new file mode 100644
index 000000000..80f155ab2
--- /dev/null
+++ b/third_party/aom/test/encode_test_driver.cc
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "aom_ports/mem.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+#include "test/register_state_check.h"
+#include "test/video_source.h"
+
+namespace libaom_test {
+void Encoder::InitEncoder(VideoSource *video) {
+  aom_codec_err_t res;
+  const aom_image_t *img = video->img();
+
+  if (video->img() && !encoder_.priv) {
+    cfg_.g_w = img->d_w;
+    cfg_.g_h = img->d_h;
+    cfg_.g_timebase = video->timebase();
+    cfg_.rc_twopass_stats_in = stats_->buf();
+
+    res = aom_codec_enc_init(&encoder_, CodecInterface(), &cfg_, init_flags_);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+
+#if CONFIG_AV1_ENCODER
+    if (CodecInterface() == &aom_codec_av1_cx_algo) {
+// Default to 1 tile column for AV1. With CONFIG_EXT_TILE, the
+// default is already the largest possible tile size
+#if !CONFIG_EXT_TILE
+      const int log2_tile_columns = 0;
+      res = aom_codec_control_(&encoder_, AV1E_SET_TILE_COLUMNS,
+                               log2_tile_columns);
+      ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+#endif  // !CONFIG_EXT_TILE
+    } else
+#endif
+    {
+    }
+  }
+}
+
+void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
+  if (video->img())
+    EncodeFrameInternal(*video, frame_flags);
+  else
+    Flush();
+
+  // Handle twopass stats
+  CxDataIterator iter = GetCxData();
+
+  while (const aom_codec_cx_pkt_t *pkt = iter.Next()) {
+    if (pkt->kind != AOM_CODEC_STATS_PKT) continue;
+
+    stats_->Append(*pkt);
+  }
+}
+
+void Encoder::EncodeFrameInternal(const VideoSource &video,
+                                  const unsigned long frame_flags) {
+  aom_codec_err_t res;
+  const aom_image_t *img = video.img();
+
+  // Handle frame resizing
+  if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
+    cfg_.g_w = img->d_w;
+    cfg_.g_h = img->d_h;
+    res = aom_codec_enc_config_set(&encoder_, &cfg_);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  // Encode the frame
+  API_REGISTER_STATE_CHECK(res = aom_codec_encode(&encoder_, img, video.pts(),
+                                                  video.duration(), frame_flags,
+                                                  deadline_));
+  ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+}
+
+void Encoder::Flush() {
+  const aom_codec_err_t res =
+      aom_codec_encode(&encoder_, NULL, 0, 0, 0, deadline_);
+  if (!encoder_.priv)
+    ASSERT_EQ(AOM_CODEC_ERROR, res) << EncoderError();
+  else
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+}
+
+void EncoderTest::InitializeConfig() {
+  const aom_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0);
+  dec_cfg_ = aom_codec_dec_cfg_t();
+  ASSERT_EQ(AOM_CODEC_OK, res);
+}
+
+void EncoderTest::SetMode(TestMode mode) {
+  switch (mode) {
+    case kOnePassGood:
+    case kTwoPassGood: deadline_ = AOM_DL_GOOD_QUALITY; break;
+    case kRealTime:
+      deadline_ = AOM_DL_GOOD_QUALITY;
+      cfg_.g_lag_in_frames = 0;
+      break;
+    default: ASSERT_TRUE(false) << "Unexpected mode " << mode;
+  }
+  mode_ = mode;
+  if (mode == kTwoPassGood)
+    passes_ = 2;
+  else
+    passes_ = 1;
+}
+
+static bool compare_plane(const uint8_t *const buf1, int stride1,
+                          const uint8_t *const buf2, int stride2, int w, int h,
+                          int *const mismatch_row, int *const mismatch_col,
+                          int *const mismatch_pix1, int *const mismatch_pix2) {
+  int r, c;
+
+  for (r = 0; r < h; ++r) {
+    for (c = 0; c < w; ++c) {
+      const int pix1 = buf1[r * stride1 + c];
+      const int pix2 = buf2[r * stride2 + c];
+
+      if (pix1 != pix2) {
+        if (mismatch_row != NULL) *mismatch_row = r;
+        if (mismatch_col != NULL) *mismatch_col = c;
+        if (mismatch_pix1 != NULL) *mismatch_pix1 = pix1;
+        if (mismatch_pix2 != NULL) *mismatch_pix2 = pix2;
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+// The function should return "true" most of the time, therefore no early
+// break-out is implemented within the match checking process.
+static bool compare_img(const aom_image_t *img1, const aom_image_t *img2,
+                        int *const mismatch_row, int *const mismatch_col,
+                        int *const mismatch_plane, int *const mismatch_pix1,
+                        int *const mismatch_pix2) {
+  const unsigned int w_y = img1->d_w;
+  const unsigned int h_y = img1->d_h;
+  const unsigned int w_uv = ROUND_POWER_OF_TWO(w_y, img1->x_chroma_shift);
+  const unsigned int h_uv = ROUND_POWER_OF_TWO(h_y, img1->y_chroma_shift);
+
+  if (img1->fmt != img2->fmt || img1->cs != img2->cs ||
+      img1->d_w != img2->d_w || img1->d_h != img2->d_h) {
+    if (mismatch_row != NULL) *mismatch_row = -1;
+    if (mismatch_col != NULL) *mismatch_col = -1;
+    return false;
+  }
+
+  if (!compare_plane(img1->planes[AOM_PLANE_Y], img1->stride[AOM_PLANE_Y],
+                     img2->planes[AOM_PLANE_Y], img2->stride[AOM_PLANE_Y], w_y,
+                     h_y, mismatch_row, mismatch_col, mismatch_pix1,
+                     mismatch_pix2)) {
+    if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_Y;
+    return false;
+  }
+
+  if (!compare_plane(img1->planes[AOM_PLANE_U], img1->stride[AOM_PLANE_U],
+                     img2->planes[AOM_PLANE_U], img2->stride[AOM_PLANE_U], w_uv,
+                     h_uv, mismatch_row, mismatch_col, mismatch_pix1,
+                     mismatch_pix2)) {
+    if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_U;
+    return false;
+  }
+
+  if (!compare_plane(img1->planes[AOM_PLANE_V], img1->stride[AOM_PLANE_V],
+                     img2->planes[AOM_PLANE_V], img2->stride[AOM_PLANE_V], w_uv,
+                     h_uv, mismatch_row, mismatch_col, mismatch_pix1,
+                     mismatch_pix2)) {
+    if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_U;
+    return false;
+  }
+
+  return true;
+}
+
+void EncoderTest::MismatchHook(const aom_image_t *img_enc,
+                               const aom_image_t *img_dec) {
+  int mismatch_row = 0;
+  int mismatch_col = 0;
+  int mismatch_plane = 0;
+  int mismatch_pix_enc = 0;
+  int mismatch_pix_dec = 0;
+
+  ASSERT_FALSE(compare_img(img_enc, img_dec, &mismatch_row, &mismatch_col,
+                           &mismatch_plane, &mismatch_pix_enc,
+                           &mismatch_pix_dec));
+
+  GTEST_FAIL() << "Encode/Decode mismatch found:" << std::endl
+               << "  pixel value enc/dec: " << mismatch_pix_enc << "/"
+               << mismatch_pix_dec << std::endl
+               << "                plane: " << mismatch_plane << std::endl
+               << "              row/col: " << mismatch_row << "/"
+               << mismatch_col << std::endl;
+}
+
+void EncoderTest::RunLoop(VideoSource *video) {
+  aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t();
+
+  stats_.Reset();
+
+  ASSERT_TRUE(passes_ == 1 || passes_ == 2);
+  for (unsigned int pass = 0; pass < passes_; pass++) {
+    last_pts_ = 0;
+
+    if (passes_ == 1)
+      cfg_.g_pass = AOM_RC_ONE_PASS;
+    else if (pass == 0)
+      cfg_.g_pass = AOM_RC_FIRST_PASS;
+    else
+      cfg_.g_pass = AOM_RC_LAST_PASS;
+
+    BeginPassHook(pass);
+    testing::internal::scoped_ptr<Encoder> encoder(
+        codec_->CreateEncoder(cfg_, deadline_, init_flags_, &stats_));
+    ASSERT_TRUE(encoder.get() != NULL);
+
+    ASSERT_NO_FATAL_FAILURE(video->Begin());
+    encoder->InitEncoder(video);
+
+    if (mode_ == kRealTime) {
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
+    }
+
+    ASSERT_FALSE(::testing::Test::HasFatalFailure());
+
+    unsigned long dec_init_flags = 0;  // NOLINT
+    // Use fragment decoder if encoder outputs partitions.
+    // NOTE: fragment decoder and partition encoder are only supported by VP8.
+    if (init_flags_ & AOM_CODEC_USE_OUTPUT_PARTITION)
+      dec_init_flags |= AOM_CODEC_USE_INPUT_FRAGMENTS;
+    testing::internal::scoped_ptr<Decoder> decoder(
+        codec_->CreateDecoder(dec_cfg, dec_init_flags));
+#if CONFIG_AV1 && CONFIG_EXT_TILE
+    if (decoder->IsAV1()) {
+      // Set dec_cfg.tile_row = -1 and dec_cfg.tile_col = -1 so that the whole
+      // frame is decoded.
+      decoder->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      decoder->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+#endif
+
+    bool again;
+    for (again = true; again; video->Next()) {
+      again = (video->img() != NULL);
+
+      PreEncodeFrameHook(video);
+      PreEncodeFrameHook(video, encoder.get());
+      encoder->EncodeFrame(video, frame_flags_);
+
+      CxDataIterator iter = encoder->GetCxData();
+
+      bool has_cxdata = false;
+      bool has_dxdata = false;
+      while (const aom_codec_cx_pkt_t *pkt = iter.Next()) {
+        pkt = MutateEncoderOutputHook(pkt);
+        again = true;
+        switch (pkt->kind) {
+          case AOM_CODEC_CX_FRAME_PKT:
+            has_cxdata = true;
+            if (decoder.get() != NULL && DoDecode()) {
+              aom_codec_err_t res_dec = decoder->DecodeFrame(
+                  (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz);
+
+              if (!HandleDecodeResult(res_dec, decoder.get())) break;
+
+              has_dxdata = true;
+            }
+            ASSERT_GE(pkt->data.frame.pts, last_pts_);
+            last_pts_ = pkt->data.frame.pts;
+            FramePktHook(pkt);
+            break;
+
+          case AOM_CODEC_PSNR_PKT: PSNRPktHook(pkt); break;
+
+          default: break;
+        }
+      }
+
+      // Flush the decoder when there are no more fragments.
+      if ((init_flags_ & AOM_CODEC_USE_OUTPUT_PARTITION) && has_dxdata) {
+        const aom_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
+        if (!HandleDecodeResult(res_dec, decoder.get())) break;
+      }
+
+      if (has_dxdata && has_cxdata) {
+        const aom_image_t *img_enc = encoder->GetPreviewFrame();
+        DxDataIterator dec_iter = decoder->GetDxData();
+        const aom_image_t *img_dec = dec_iter.Next();
+        if (img_enc && img_dec) {
+          const bool res =
+              compare_img(img_enc, img_dec, NULL, NULL, NULL, NULL, NULL);
+          if (!res) {  // Mismatch
+            MismatchHook(img_enc, img_dec);
+          }
+        }
+        if (img_dec) DecompressedFrameHook(*img_dec, video->pts());
+      }
+      if (!Continue()) break;
+    }
+
+    EndPassHook();
+
+    if (!Continue()) break;
+  }
+}
+
+}  // namespace libaom_test
diff --git a/third_party/aom/test/encode_test_driver.h b/third_party/aom/test/encode_test_driver.h
new file mode 100644
index 000000000..91027b4f6
--- /dev/null
+++ b/third_party/aom/test/encode_test_driver.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_ENCODE_TEST_DRIVER_H_
+#define TEST_ENCODE_TEST_DRIVER_H_
+
+#include <string>
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#if CONFIG_AV1_ENCODER
+#include "aom/aomcx.h"
+#endif
+#include "aom/aom_encoder.h"
+
+namespace libaom_test {
+
+class CodecFactory;
+class VideoSource;
+
+enum TestMode { kRealTime, kOnePassGood, kTwoPassGood };
+#define ALL_TEST_MODES                                                     \
+  ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood, \
+                    ::libaom_test::kTwoPassGood)
+
+#define ONE_PASS_TEST_MODES \
+  ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood)
+
+#define TWO_PASS_TEST_MODES ::testing::Values(::libaom_test::kTwoPassGood)
+
+// Provides an object to handle the libaom get_cx_data() iteration pattern
+class CxDataIterator {
+ public:
+  explicit CxDataIterator(aom_codec_ctx_t *encoder)
+      : encoder_(encoder), iter_(NULL) {}
+
+  const aom_codec_cx_pkt_t *Next() {
+    return aom_codec_get_cx_data(encoder_, &iter_);
+  }
+
+ private:
+  aom_codec_ctx_t *encoder_;
+  aom_codec_iter_t iter_;
+};
+
+// Implements an in-memory store for libaom twopass statistics
+class TwopassStatsStore {
+ public:
+  void Append(const aom_codec_cx_pkt_t &pkt) {
+    buffer_.append(reinterpret_cast<char *>(pkt.data.twopass_stats.buf),
+                   pkt.data.twopass_stats.sz);
+  }
+
+  aom_fixed_buf_t buf() {
+    const aom_fixed_buf_t buf = { &buffer_[0], buffer_.size() };
+    return buf;
+  }
+
+  void Reset() { buffer_.clear(); }
+
+ protected:
+  std::string buffer_;
+};
+
+// Provides a simplified interface to manage one video encoding pass, given
+// a configuration and video source.
+//
+// TODO(jkoleszar): The exact services it provides and the appropriate
+// level of abstraction will be fleshed out as more tests are written.
+class Encoder {
+ public:
+  Encoder(aom_codec_enc_cfg_t cfg, unsigned long deadline,
+          const unsigned long init_flags, TwopassStatsStore *stats)
+      : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) {
+    memset(&encoder_, 0, sizeof(encoder_));
+  }
+
+  virtual ~Encoder() { aom_codec_destroy(&encoder_); }
+
+  CxDataIterator GetCxData() { return CxDataIterator(&encoder_); }
+
+  void InitEncoder(VideoSource *video);
+
+  const aom_image_t *GetPreviewFrame() {
+    return aom_codec_get_preview_frame(&encoder_);
+  }
+  // This is a thin wrapper around aom_codec_encode(), so refer to
+  // aom_encoder.h for its semantics.
+  void EncodeFrame(VideoSource *video, const unsigned long frame_flags);
+
+  // Convenience wrapper for EncodeFrame()
+  void EncodeFrame(VideoSource *video) { EncodeFrame(video, 0); }
+
+  void Control(int ctrl_id, int arg) {
+    const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, int *arg) {
+    const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+  void Control(int ctrl_id, struct aom_scaling_mode *arg) {
+    const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+
+#if CONFIG_AV1_ENCODER
+  void Control(int ctrl_id, aom_active_map_t *arg) {
+    const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+  }
+#endif
+
+  void Config(const aom_codec_enc_cfg_t *cfg) {
+    const aom_codec_err_t res = aom_codec_enc_config_set(&encoder_, cfg);
+    ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
+    cfg_ = *cfg;
+  }
+
+  void set_deadline(unsigned long deadline) { deadline_ = deadline; }
+
+ protected:
+  virtual aom_codec_iface_t *CodecInterface() const = 0;
+
+  const char *EncoderError() {
+    const char *detail = aom_codec_error_detail(&encoder_);
+    return detail ? detail : aom_codec_error(&encoder_);
+  }
+
+  // Encode an image
+  void EncodeFrameInternal(const VideoSource &video,
+                           const unsigned long frame_flags);
+
+  // Flush the encoder on EOS
+  void Flush();
+
+  aom_codec_ctx_t encoder_;
+  aom_codec_enc_cfg_t cfg_;
+  unsigned long deadline_;
+  unsigned long init_flags_;
+  TwopassStatsStore *stats_;
+};
+
+// Common test functionality for all Encoder tests.
+//
+// This class is a mixin which provides the main loop common to all
+// encoder tests. It provides hooks which can be overridden by subclasses
+// to implement each test's specific behavior, while centralizing the bulk
+// of the boilerplate. Note that it doesn't inherit the gtest testing
+// classes directly, so that tests can be parameterized differently.
+class EncoderTest {
+ protected:
+  explicit EncoderTest(const CodecFactory *codec)
+      : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0),
+        last_pts_(0), mode_(kRealTime) {
+    // Default to 1 thread.
+    cfg_.g_threads = 1;
+  }
+
+  virtual ~EncoderTest() {}
+
+  // Initialize the cfg_ member with the default configuration.
+  void InitializeConfig();
+
+  // Map the TestMode enum to the deadline_ and passes_ variables.
+  void SetMode(TestMode mode);
+
+  // Set encoder flag.
+  void set_init_flags(unsigned long flag) {  // NOLINT(runtime/int)
+    init_flags_ = flag;
+  }
+
+  // Main loop
+  virtual void RunLoop(VideoSource *video);
+
+  // Hook to be called at the beginning of a pass.
+  virtual void BeginPassHook(unsigned int /*pass*/) {}
+
+  // Hook to be called at the end of a pass.
+  virtual void EndPassHook() {}
+
+  // Hook to be called before encoding a frame.
+  virtual void PreEncodeFrameHook(VideoSource * /*video*/) {}
+  virtual void PreEncodeFrameHook(VideoSource * /*video*/,
+                                  Encoder * /*encoder*/) {}
+
+  // Hook to be called on every compressed data packet.
+  virtual void FramePktHook(const aom_codec_cx_pkt_t * /*pkt*/) {}
+
+  // Hook to be called on every PSNR packet.
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t * /*pkt*/) {}
+
+  // Hook to determine whether the encode loop should continue.
+  virtual bool Continue() const {
+    return !(::testing::Test::HasFatalFailure() || abort_);
+  }
+
+  const CodecFactory *codec_;
+  // Hook to determine whether to decode frame after encoding
+  virtual bool DoDecode() const { return 1; }
+
+  // Hook to handle encode/decode mismatch
+  virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2);
+
+  // Hook to be called on every decompressed frame.
+  virtual void DecompressedFrameHook(const aom_image_t & /*img*/,
+                                     aom_codec_pts_t /*pts*/) {}
+
+  // Hook to be called to handle decode result. Return true to continue.
+  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                                  Decoder *decoder) {
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  // Hook that can modify the encoder's output data
+  virtual const aom_codec_cx_pkt_t *MutateEncoderOutputHook(
+      const aom_codec_cx_pkt_t *pkt) {
+    return pkt;
+  }
+
+  bool abort_;
+  aom_codec_enc_cfg_t cfg_;
+  aom_codec_dec_cfg_t dec_cfg_;
+  unsigned int passes_;
+  unsigned long deadline_;
+  TwopassStatsStore stats_;
+  unsigned long init_flags_;
+  unsigned long frame_flags_;
+  aom_codec_pts_t last_pts_;
+  TestMode mode_;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_ENCODE_TEST_DRIVER_H_
diff --git a/third_party/aom/test/encoder_parms_get_to_decoder.cc b/third_party/aom/test/encoder_parms_get_to_decoder.cc
new file mode 100644
index 000000000..ca6a24ebe
--- /dev/null
+++ b/third_party/aom/test/encoder_parms_get_to_decoder.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "av1/av1_dx_iface.c"
+
+namespace {
+
+const int kCpuUsed = 2;
+
+struct EncodePerfTestVideo {
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = {
+  { "niklas_1280_720_30.y4m", 1280, 720, 600, 10 },
+};
+
+struct EncodeParameters {
+  int32_t tile_rows;
+  int32_t tile_cols;
+  int32_t lossless;
+  int32_t error_resilient;
+  int32_t frame_parallel;
+  aom_color_range_t color_range;
+  aom_color_space_t cs;
+  int render_size[2];
+  // TODO(JBB): quantizers / bitrate
+};
+
+const EncodeParameters kAV1EncodeParameterSet[] = {
+  { 0, 0, 0, 1, 0, AOM_CR_STUDIO_RANGE, AOM_CS_BT_601, { 0, 0 } },
+  { 0, 0, 0, 0, 0, AOM_CR_FULL_RANGE, AOM_CS_BT_709, { 0, 0 } },
+  { 0, 0, 1, 0, 0, AOM_CR_FULL_RANGE, AOM_CS_BT_2020, { 0, 0 } },
+  { 0, 2, 0, 0, 1, AOM_CR_STUDIO_RANGE, AOM_CS_UNKNOWN, { 640, 480 } },
+  // TODO(JBB): Test profiles (requires more work).
+};
+
+class AvxEncoderParmsGetToDecoder
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<EncodeParameters,
+                                                 EncodePerfTestVideo> {
+ protected:
+  AvxEncoderParmsGetToDecoder()
+      : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {}
+
+  virtual ~AvxEncoderParmsGetToDecoder() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(::libaom_test::kTwoPassGood);
+    cfg_.g_lag_in_frames = 25;
+    cfg_.g_error_resilient = encode_parms.error_resilient;
+    dec_cfg_.threads = 4;
+    test_video_ = GET_PARAM(2);
+    cfg_.rc_target_bitrate = test_video_.bitrate;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AV1E_SET_COLOR_SPACE, encode_parms.cs);
+      encoder->Control(AV1E_SET_COLOR_RANGE, encode_parms.color_range);
+      encoder->Control(AV1E_SET_LOSSLESS, encode_parms.lossless);
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING,
+                       encode_parms.frame_parallel);
+      encoder->Control(AV1E_SET_TILE_ROWS, encode_parms.tile_rows);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, encode_parms.tile_cols);
+      encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0)
+        encoder->Control(AV1E_SET_RENDER_SIZE, encode_parms.render_size);
+    }
+  }
+
+  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                                  libaom_test::Decoder *decoder) {
+    aom_codec_ctx_t *const av1_decoder = decoder->GetDecoder();
+    aom_codec_alg_priv_t *const priv =
+        reinterpret_cast<aom_codec_alg_priv_t *>(av1_decoder->priv);
+    FrameWorkerData *const worker_data =
+        reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1);
+    AV1_COMMON *const common = &worker_data->pbi->common;
+
+    if (encode_parms.lossless) {
+      EXPECT_EQ(0, common->base_qindex);
+      EXPECT_EQ(0, common->y_dc_delta_q);
+      EXPECT_EQ(0, common->uv_dc_delta_q);
+      EXPECT_EQ(0, common->uv_ac_delta_q);
+      EXPECT_EQ(ONLY_4X4, common->tx_mode);
+    }
+    EXPECT_EQ(encode_parms.error_resilient, common->error_resilient_mode);
+    if (encode_parms.error_resilient) {
+      EXPECT_EQ(0, common->use_prev_frame_mvs);
+    }
+    EXPECT_EQ(encode_parms.color_range, common->color_range);
+    EXPECT_EQ(encode_parms.cs, common->color_space);
+    if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) {
+      EXPECT_EQ(encode_parms.render_size[0], common->render_width);
+      EXPECT_EQ(encode_parms.render_size[1], common->render_height);
+    }
+    EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols);
+    EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows);
+
+    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
+    return AOM_CODEC_OK == res_dec;
+  }
+
+  EncodePerfTestVideo test_video_;
+
+ private:
+  EncodeParameters encode_parms;
+};
+
+TEST_P(AvxEncoderParmsGetToDecoder, BitstreamParms) {
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  testing::internal::scoped_ptr<libaom_test::VideoSource> video(
+      new libaom_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames));
+  ASSERT_TRUE(video.get() != NULL);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+}
+
+AV1_INSTANTIATE_TEST_CASE(AvxEncoderParmsGetToDecoder,
+                          ::testing::ValuesIn(kAV1EncodeParameterSet),
+                          ::testing::ValuesIn(kAV1EncodePerfTestVectors));
+}  // namespace
diff --git a/third_party/aom/test/end_to_end_test.cc b/third_party/aom/test/end_to_end_test.cc
new file mode 100644
index 000000000..0c8cbe274
--- /dev/null
+++ b/third_party/aom/test/end_to_end_test.cc
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+const unsigned int kWidth = 160;
+const unsigned int kHeight = 90;
+const unsigned int kFramerate = 50;
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+// List of psnr thresholds for speed settings 0-7 and 5 encoding modes
+const double kPsnrThreshold[][5] = {
+// Note:
+// AV1 HBD average PSNR is slightly lower than AV1.
+// We make two cases here to enable the testing and
+// guard picture quality.
+#if CONFIG_AV1_ENCODER && CONFIG_HIGHBITDEPTH
+  { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 31.0, 36.0, 36.0, 36.0, 36.0 },
+  { 31.0, 35.0, 35.0, 35.0, 35.0 }, { 31.0, 34.0, 34.0, 34.0, 34.0 },
+  { 31.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 },
+  { 30.0, 31.0, 31.0, 31.0, 31.0 }, { 29.0, 30.0, 30.0, 30.0, 30.0 },
+#else
+  { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 35.0, 36.0, 36.0, 36.0, 36.0 },
+  { 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 },
+  { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 },
+  { 30.0, 31.0, 31.0, 31.0, 31.0 }, { 29.0, 30.0, 30.0, 30.0, 30.0 },
+#endif  // CONFIG_HIGHBITDEPTH && CONFIG_AV1_ENCODER
+};
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+const TestVideoParam kTestVectors[] = {
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
+  { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 1 },
+  { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
+  { "park_joy_90p_8_440.yuv", 8, AOM_IMG_FMT_I440, AOM_BITS_8, 1 },
+#if CONFIG_HIGHBITDEPTH
+  { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 2 },
+  { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 3 },
+  { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 3 },
+  { "park_joy_90p_10_440.yuv", 10, AOM_IMG_FMT_I44016, AOM_BITS_10, 3 },
+  { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 },
+  { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 3 },
+  { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 3 },
+  { "park_joy_90p_12_440.yuv", 12, AOM_IMG_FMT_I44016, AOM_BITS_12, 3 },
+#endif  // CONFIG_HIGHBITDEPTH
+};
+
+// Encoding modes tested
+const libaom_test::TestMode kEncodingModeVectors[] = {
+  ::libaom_test::kTwoPassGood, ::libaom_test::kOnePassGood,
+  ::libaom_test::kRealTime,
+};
+
+// Speed settings tested
+const int kCpuUsedVectors[] = { 1, 2, 3, 5, 6 };
+
+int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename)
+    return 0;
+  else
+    return !strcmp(dot, ".y4m");
+}
+
+class EndToEndTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
+                                                 TestVideoParam, int> {
+ protected:
+  EndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(2)),
+        cpu_used_(GET_PARAM(3)), psnr_(0.0), nframes_(0),
+        encoding_mode_(GET_PARAM(1)) {}
+
+  virtual ~EndToEndTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    if (encoding_mode_ != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 5;
+      cfg_.rc_end_usage = AOM_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = AOM_CBR;
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+    dec_cfg_.threads = 4;
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+#if CONFIG_PALETTE
+      // Test screen coding tools at cpu_used = 1 && encoding mode is two-pass.
+      if (cpu_used_ == 1 && encoding_mode_ == ::libaom_test::kTwoPassGood)
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      else
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+#endif  // CONFIG_PALETTE
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() {
+    return kPsnrThreshold[cpu_used_][encoding_mode_];
+  }
+
+  TestVideoParam test_video_param_;
+  int cpu_used_;
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  libaom_test::TestMode encoding_mode_;
+};
+
+class EndToEndTestLarge : public EndToEndTest {};
+
+TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+  testing::internal::scoped_ptr<libaom_test::VideoSource> video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                kFrames));
+  } else {
+    video.reset(new libaom_test::YUVVideoSource(
+        test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight,
+        kFramerate, 1, 0, kFrames));
+  }
+  ASSERT_TRUE(video.get() != NULL);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  const double psnr = GetAveragePsnr();
+  EXPECT_GT(psnr, GetPsnrThreshold());
+}
+
+TEST_P(EndToEndTest, EndtoEndPSNRTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+  testing::internal::scoped_ptr<libaom_test::VideoSource> video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                kFrames));
+  } else {
+    video.reset(new libaom_test::YUVVideoSource(
+        test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight,
+        kFramerate, 1, 0, kFrames));
+  }
+  ASSERT_TRUE(video.get() != NULL);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  const double psnr = GetAveragePsnr();
+  EXPECT_GT(psnr, GetPsnrThreshold());
+}
+
+AV1_INSTANTIATE_TEST_CASE(EndToEndTestLarge,
+                          ::testing::ValuesIn(kEncodingModeVectors),
+                          ::testing::ValuesIn(kTestVectors),
+                          ::testing::ValuesIn(kCpuUsedVectors));
+
+AV1_INSTANTIATE_TEST_CASE(EndToEndTest,
+                          ::testing::Values(kEncodingModeVectors[0]),
+                          ::testing::Values(kTestVectors[2]),  // 444
+                          ::testing::Values(kCpuUsedVectors[2]));
+}  // namespace
diff --git a/third_party/aom/test/error_block_test.cc b/third_party/aom/test/error_block_test.cc
new file mode 100644
index 000000000..227065fa9
--- /dev/null
+++ b/third_party/aom/test/error_block_test.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+#if CONFIG_HIGHBITDEPTH
+const int kNumIterations = 1000;
+
+typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
+                                  const tran_low_t *dqcoeff,
+                                  intptr_t block_size, int64_t *ssz, int bps);
+
+typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>
+    ErrorBlockParam;
+
+class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> {
+ public:
+  virtual ~ErrorBlockTest() {}
+  virtual void SetUp() {
+    error_block_op_ = GET_PARAM(0);
+    ref_error_block_op_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  aom_bit_depth_t bit_depth_;
+  ErrorBlockFunc error_block_op_;
+  ErrorBlockFunc ref_error_block_op_;
+};
+
+TEST_P(ErrorBlockTest, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  intptr_t block_size;
+  int64_t ssz;
+  int64_t ret;
+  int64_t ref_ssz;
+  int64_t ref_ret;
+  const int msb = bit_depth_ + 8 - 1;
+  for (int i = 0; i < kNumIterations; ++i) {
+    int err_count = 0;
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int j = 0; j < block_size; j++) {
+      // coeff and dqcoeff will always have at least the same sign, and this
+      // can be used for optimization, so generate test input precisely.
+      if (rnd(2)) {
+        // Positive number
+        coeff[j] = rnd(1 << msb);
+        dqcoeff[j] = rnd(1 << msb);
+      } else {
+        // Negative number
+        coeff[j] = -rnd(1 << msb);
+        dqcoeff[j] = -rnd(1 << msb);
+      }
+    }
+    ref_ret =
+        ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(
+        ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_));
+    err_count += (ref_ret != ret) | (ref_ssz != ssz);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Error Block Test, C output doesn't match optimized output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(ErrorBlockTest, ExtremeValues) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  intptr_t block_size;
+  int64_t ssz;
+  int64_t ret;
+  int64_t ref_ssz;
+  int64_t ref_ret;
+  const int msb = bit_depth_ + 8 - 1;
+  int max_val = ((1 << msb) - 1);
+  for (int i = 0; i < kNumIterations; ++i) {
+    int err_count = 0;
+    int k = (i / 9) % 9;
+
+    // Change the maximum coeff value, to test different bit boundaries
+    if (k == 8 && (i % 9) == 0) {
+      max_val >>= 1;
+    }
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int j = 0; j < block_size; j++) {
+      if (k < 4) {
+        // Test at positive maximum values
+        coeff[j] = k % 2 ? max_val : 0;
+        dqcoeff[j] = (k >> 1) % 2 ? max_val : 0;
+      } else if (k < 8) {
+        // Test at negative maximum values
+        coeff[j] = k % 2 ? -max_val : 0;
+        dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0;
+      } else {
+        if (rnd(2)) {
+          // Positive number
+          coeff[j] = rnd(1 << 14);
+          dqcoeff[j] = rnd(1 << 14);
+        } else {
+          // Negative number
+          coeff[j] = -rnd(1 << 14);
+          dqcoeff[j] = -rnd(1 << 14);
+        }
+      }
+    }
+    ref_ret =
+        ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(
+        ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_));
+    err_count += (ref_ret != ret) | (ref_ssz != ssz);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Error Block Test, C output doesn't match optimized output. "
+      << "First failed at test case " << first_failure;
+}
+
+#if HAVE_SSE2 || HAVE_AVX
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, ErrorBlockTest,
+    ::testing::Values(make_tuple(&av1_highbd_block_error_sse2,
+                                 &av1_highbd_block_error_c, AOM_BITS_10),
+                      make_tuple(&av1_highbd_block_error_sse2,
+                                 &av1_highbd_block_error_c, AOM_BITS_12),
+                      make_tuple(&av1_highbd_block_error_sse2,
+                                 &av1_highbd_block_error_c, AOM_BITS_8)));
+#endif  // HAVE_SSE2
+
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/error_resilience_test.cc b/third_party/aom/test/error_resilience_test.cc
new file mode 100644
index 000000000..63f10012f
--- /dev/null
+++ b/third_party/aom/test/error_resilience_test.cc
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+const int kMaxErrorFrames = 12;
+const int kMaxDroppableFrames = 12;
+
+class ErrorResilienceTestLarge
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> {
+ protected:
+  ErrorResilienceTestLarge()
+      : EncoderTest(GET_PARAM(0)), psnr_(0.0), nframes_(0), mismatch_psnr_(0.0),
+        mismatch_nframes_(0), encoding_mode_(GET_PARAM(1)) {
+    Reset();
+  }
+
+  virtual ~ErrorResilienceTestLarge() {}
+
+  void Reset() {
+    error_nframes_ = 0;
+    droppable_nframes_ = 0;
+    pattern_switch_ = 0;
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    psnr_ = 0.0;
+    nframes_ = 0;
+    mismatch_psnr_ = 0.0;
+    mismatch_nframes_ = 0;
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video) {
+    frame_flags_ &=
+        ~(AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF);
+    if (droppable_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < droppable_nframes_; ++i) {
+        if (droppable_frames_[i] == video->frame()) {
+          std::cout << "Encoding droppable frame: " << droppable_frames_[i]
+                    << "\n";
+          frame_flags_ |= (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
+                           AOM_EFLAG_NO_UPD_ARF);
+          return;
+        }
+      }
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetAverageMismatchPsnr() const {
+    if (mismatch_nframes_) return mismatch_psnr_ / mismatch_nframes_;
+    return 0.0;
+  }
+
+  virtual bool DoDecode() const {
+    if (error_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < error_nframes_; ++i) {
+        if (error_frames_[i] == nframes_ - 1) {
+          std::cout << "             Skipping decoding frame: "
+                    << error_frames_[i] << "\n";
+          return 0;
+        }
+      }
+    }
+    return 1;
+  }
+
+  virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) {
+    double mismatch_psnr = compute_psnr(img1, img2);
+    mismatch_psnr_ += mismatch_psnr;
+    ++mismatch_nframes_;
+    // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n";
+    ::libaom_test::EncoderTest::MismatchHook(img1, img2);
+  }
+
+  void SetErrorFrames(int num, unsigned int *list) {
+    if (num > kMaxErrorFrames)
+      num = kMaxErrorFrames;
+    else if (num < 0)
+      num = 0;
+    error_nframes_ = num;
+    for (unsigned int i = 0; i < error_nframes_; ++i)
+      error_frames_[i] = list[i];
+  }
+
+  void SetDroppableFrames(int num, unsigned int *list) {
+    if (num > kMaxDroppableFrames)
+      num = kMaxDroppableFrames;
+    else if (num < 0)
+      num = 0;
+    droppable_nframes_ = num;
+    for (unsigned int i = 0; i < droppable_nframes_; ++i)
+      droppable_frames_[i] = list[i];
+  }
+
+  unsigned int GetMismatchFrames() { return mismatch_nframes_; }
+
+  void SetPatternSwitch(int frame_switch) { pattern_switch_ = frame_switch; }
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  unsigned int error_nframes_;
+  unsigned int droppable_nframes_;
+  unsigned int pattern_switch_;
+  double mismatch_psnr_;
+  unsigned int mismatch_nframes_;
+  unsigned int error_frames_[kMaxErrorFrames];
+  unsigned int droppable_frames_[kMaxDroppableFrames];
+  libaom_test::TestMode encoding_mode_;
+};
+
+TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.g_lag_in_frames = 10;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 30);
+
+  // Error resilient mode OFF.
+  cfg_.g_error_resilient = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_resilience_off = GetAveragePsnr();
+  EXPECT_GT(psnr_resilience_off, 25.0);
+
+  // Error resilient mode ON.
+  cfg_.g_error_resilient = 1;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_resilience_on = GetAveragePsnr();
+  EXPECT_GT(psnr_resilience_on, 25.0);
+
+  // Test that turning on error resilient mode hurts by 10% at most.
+  if (psnr_resilience_off > 0.0) {
+    const double psnr_ratio = psnr_resilience_on / psnr_resilience_off;
+    EXPECT_GE(psnr_ratio, 0.9);
+    EXPECT_LE(psnr_ratio, 1.1);
+  }
+}
+
+// Check for successful decoding and no encoder/decoder mismatch
+// if we lose (i.e., drop before decoding) a set of droppable
+// frames (i.e., frames that don't update any reference buffers).
+// Check both isolated and consecutive loss.
+TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 500;
+  // FIXME(debargha): Fix this to work for any lag.
+  // Currently this test only works for lag = 0
+  cfg_.g_lag_in_frames = 0;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 40);
+
+  // Error resilient mode ON.
+  cfg_.g_error_resilient = 1;
+  cfg_.kf_mode = AOM_KF_DISABLED;
+
+  // Set an arbitrary set of error frames same as droppable frames.
+  // In addition to isolated loss/drop, add a long consecutive series
+  // (of size 9) of dropped frames.
+  unsigned int num_droppable_frames = 11;
+  unsigned int droppable_frame_list[] = { 5,  16, 22, 23, 24, 25,
+                                          26, 27, 28, 29, 30 };
+  SetDroppableFrames(num_droppable_frames, droppable_frame_list);
+  SetErrorFrames(num_droppable_frames, droppable_frame_list);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Test that no mismatches have been found
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int)0);
+
+  // Reset previously set of error/droppable frames.
+  Reset();
+
+#if 0
+  // TODO(jkoleszar): This test is disabled for the time being as too
+  // sensitive. It's not clear how to set a reasonable threshold for
+  // this behavior.
+
+  // Now set an arbitrary set of error frames that are non-droppable
+  unsigned int num_error_frames = 3;
+  unsigned int error_frame_list[] = {3, 10, 20};
+  SetErrorFrames(num_error_frames, error_frame_list);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Test that dropping an arbitrary set of inter frames does not hurt too much
+  // Note the Average Mismatch PSNR is the average of the PSNR between
+  // decoded frame and encoder's version of the same frame for all frames
+  // with mismatch.
+  const double psnr_resilience_mismatch = GetAverageMismatchPsnr();
+  std::cout << "             Mismatch PSNR: "
+            << psnr_resilience_mismatch << "\n";
+  EXPECT_GT(psnr_resilience_mismatch, 20.0);
+#endif
+}
+
+AV1_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+}  // namespace
diff --git a/third_party/aom/test/ethread_test.cc b/third_party/aom/test/ethread_test.cc
new file mode 100644
index 000000000..5b519f8fe
--- /dev/null
+++ b/third_party/aom/test/ethread_test.cc
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+class AVxEncoderThreadTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  AVxEncoderThreadTest()
+      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false),
+        encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 1280;
+    cfg.h = 720;
+    decoder_ = codec_->CreateDecoder(cfg, 0);
+#if CONFIG_AV1 && CONFIG_EXT_TILE
+    if (decoder_->IsAV1()) {
+      decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+#endif
+
+    size_enc_.clear();
+    md5_dec_.clear();
+    md5_enc_.clear();
+  }
+  virtual ~AVxEncoderThreadTest() { delete decoder_; }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+
+    if (encoding_mode_ != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 3;
+      cfg_.rc_end_usage = AOM_VBR;
+      cfg_.rc_2pass_vbr_minsection_pct = 5;
+      cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = AOM_CBR;
+      cfg_.g_error_resilient = 1;
+    }
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    encoder_initialized_ = false;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/,
+                                  ::libaom_test::Encoder *encoder) {
+    if (!encoder_initialized_) {
+#if CONFIG_AV1 && CONFIG_EXT_TILE
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 1);
+      if (codec_ == &libaom_test::kAV1) {
+        // TODO(geza): Start using multiple tile rows when the multi-threaded
+        // encoder can handle them
+        encoder->Control(AV1E_SET_TILE_ROWS, 32);
+      } else {
+        encoder->Control(AV1E_SET_TILE_ROWS, 0);
+      }
+#else
+      // Encode 4 tile columns.
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 2);
+      encoder->Control(AV1E_SET_TILE_ROWS, 0);
+#endif  // CONFIG_AV1 && CONFIG_EXT_TILE
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+        encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0);
+      } else {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
+        encoder->Control(AV1E_SET_AQ_MODE, 3);
+      }
+      encoder_initialized_ = true;
+    }
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    size_enc_.push_back(pkt->data.frame.sz);
+
+    ::libaom_test::MD5 md5_enc;
+    md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
+                pkt->data.frame.sz);
+    md5_enc_.push_back(md5_enc.Get());
+
+    const aom_codec_err_t res = decoder_->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+    const aom_image_t *img = decoder_->GetDxData().Next();
+
+    if (img) {
+      ::libaom_test::MD5 md5_res;
+      md5_res.Add(img);
+      md5_dec_.push_back(md5_res.Get());
+    }
+  }
+
+  void DoTest() {
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 18);
+    cfg_.rc_target_bitrate = 1000;
+
+    // Encode using single thread.
+    cfg_.g_threads = 1;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    std::vector<size_t> single_thr_size_enc;
+    std::vector<std::string> single_thr_md5_enc;
+    std::vector<std::string> single_thr_md5_dec;
+    single_thr_size_enc = size_enc_;
+    single_thr_md5_enc = md5_enc_;
+    single_thr_md5_dec = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    // Encode using multiple threads.
+    cfg_.g_threads = 4;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    std::vector<size_t> multi_thr_size_enc;
+    std::vector<std::string> multi_thr_md5_enc;
+    std::vector<std::string> multi_thr_md5_dec;
+    multi_thr_size_enc = size_enc_;
+    multi_thr_md5_enc = md5_enc_;
+    multi_thr_md5_dec = md5_dec_;
+    size_enc_.clear();
+    md5_enc_.clear();
+    md5_dec_.clear();
+
+    // Check that the vectors are equal.
+    ASSERT_EQ(single_thr_size_enc, multi_thr_size_enc);
+    ASSERT_EQ(single_thr_md5_enc, multi_thr_md5_enc);
+    ASSERT_EQ(single_thr_md5_dec, multi_thr_md5_dec);
+  }
+
+  bool encoder_initialized_;
+  ::libaom_test::TestMode encoding_mode_;
+  int set_cpu_used_;
+  ::libaom_test::Decoder *decoder_;
+  std::vector<size_t> size_enc_;
+  std::vector<std::string> md5_enc_;
+  std::vector<std::string> md5_dec_;
+};
+
+TEST_P(AVxEncoderThreadTest, EncoderResultTest) { DoTest(); }
+
+class AVxEncoderThreadTestLarge : public AVxEncoderThreadTest {};
+
+TEST_P(AVxEncoderThreadTestLarge, EncoderResultTest) { DoTest(); }
+
+// For AV1, only test speed 0 to 3.
+AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTest,
+                          ::testing::Values(::libaom_test::kTwoPassGood,
+                                            ::libaom_test::kOnePassGood),
+                          ::testing::Range(2, 4));
+
+AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTestLarge,
+                          ::testing::Values(::libaom_test::kTwoPassGood,
+                                            ::libaom_test::kOnePassGood),
+                          ::testing::Range(0, 2));
+}  // namespace
diff --git a/third_party/aom/test/examples.sh b/third_party/aom/test/examples.sh
new file mode 100755
index 000000000..d3152be7d
--- /dev/null
+++ b/third_party/aom/test/examples.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file runs all of the tests for the libaom examples.
+##
+. $(dirname $0)/tools_common.sh
+
+example_tests=$(ls $(dirname $0)/*.sh)
+
+# List of script names to exclude.
+exclude_list="examples tools_common decode_to_md5"
+
+# Filter out the scripts in $exclude_list.
+for word in ${exclude_list}; do
+  example_tests=$(filter_strings "${example_tests}" "${word}" exclude)
+done
+
+for test in ${example_tests}; do
+  # Source each test script so that exporting variables can be avoided.
+  AOM_TEST_NAME="$(basename ${test%.*})"
+  . "${test}"
+done
diff --git a/third_party/aom/test/fdct4x4_test.cc b/third_party/aom/test/fdct4x4_test.cc
new file mode 100644
index 000000000..ed265e84f
--- /dev/null
+++ b/third_party/aom/test/fdct4x4_test.cc
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using libaom_test::FhtFunc;
+
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t, int>
+    Dct4x4Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x4Param;
+
+void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+                 int /*tx_type*/) {
+  aom_fdct4x4_c(in, out, stride);
+}
+
+void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht4x4_c(in, out, stride, tx_type);
+}
+
+void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+                 int /*tx_type*/) {
+  av1_fwht4x4_c(in, out, stride);
+}
+
+#if CONFIG_HIGHBITDEPTH
+void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
+  aom_highbd_idct4x4_16_add_c(in, out, stride, 10);
+}
+
+void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
+  aom_highbd_idct4x4_16_add_c(in, out, stride, 12);
+}
+
+void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 10);
+}
+
+void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 12);
+}
+
+void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
+  aom_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+}
+
+void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
+  aom_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+}
+
+#if HAVE_SSE2
+void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  aom_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
+}
+
+void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  aom_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
+}
+#endif  // HAVE_SSE2
+#endif  // CONFIG_HIGHBITDEPTH
+
+class Trans4x4DCT : public libaom_test::TransformTestBase,
+                    public ::testing::TestWithParam<Dct4x4Param> {
+ public:
+  virtual ~Trans4x4DCT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
+    height_ = 4;
+    fwd_txfm_ref = fdct4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  FdctFunc fwd_txfm_;
+  IdctFunc inv_txfm_;
+};
+
+TEST_P(Trans4x4DCT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
+
+TEST_P(Trans4x4DCT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans4x4DCT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans4x4DCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+
+class Trans4x4HT : public libaom_test::TransformTestBase,
+                   public ::testing::TestWithParam<Ht4x4Param> {
+ public:
+  virtual ~Trans4x4HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
+    height_ = 4;
+    fwd_txfm_ref = fht4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(1, 0.005); }
+
+TEST_P(Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans4x4HT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+
+class Trans4x4WHT : public libaom_test::TransformTestBase,
+                    public ::testing::TestWithParam<Dct4x4Param> {
+ public:
+  virtual ~Trans4x4WHT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
+    height_ = 4;
+    fwd_txfm_ref = fwht4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  FdctFunc fwd_txfm_;
+  IdctFunc inv_txfm_;
+};
+
+TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
+
+TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
+using std::tr1::make_tuple;
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4DCT,
+    ::testing::Values(
+        make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_10, 0, AOM_BITS_10, 16),
+        make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_12, 0, AOM_BITS_12, 16),
+        make_tuple(&aom_fdct4x4_c, &aom_idct4x4_16_add_c, 0, AOM_BITS_8, 16)));
+#else
+INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&aom_fdct4x4_c,
+                                                     &aom_idct4x4_16_add_c, 0,
+                                                     AOM_BITS_8, 16)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&av1_highbd_fht4x4_c, &iht4x4_10, 0, AOM_BITS_10, 16),
+        make_tuple(&av1_highbd_fht4x4_c, &iht4x4_10, 1, AOM_BITS_10, 16),
+        make_tuple(&av1_highbd_fht4x4_c, &iht4x4_10, 2, AOM_BITS_10, 16),
+        make_tuple(&av1_highbd_fht4x4_c, &iht4x4_10, 3, AOM_BITS_10, 16),
+        make_tuple(&av1_highbd_fht4x4_c, &iht4x4_12, 0, AOM_BITS_12, 16),
+        make_tuple(&av1_highbd_fht4x4_c, &iht4x4_12, 1, AOM_BITS_12, 16),
+        make_tuple(&av1_highbd_fht4x4_c, &iht4x4_12, 2, AOM_BITS_12, 16),
+        make_tuple(&av1_highbd_fht4x4_c, &iht4x4_12, 3, AOM_BITS_12, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4WHT,
+    ::testing::Values(
+        make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10, 0, AOM_BITS_10, 16),
+        make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12, 0, AOM_BITS_12, 16),
+        make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, 0, AOM_BITS_8, 16)));
+#else
+INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT,
+                        ::testing::Values(make_tuple(&av1_fwht4x4_c,
+                                                     &aom_iwht4x4_16_add_c, 0,
+                                                     AOM_BITS_8, 16)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&aom_fdct4x4_c,
+                                                     &aom_idct4x4_16_add_neon,
+                                                     0, AOM_BITS_8, 16)));
+#endif  // HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_NEON && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    NEON, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 0, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 1, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 2, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 3, AOM_BITS_8, 16)));
+#endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4WHT,
+    ::testing::Values(make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, 0,
+                                 AOM_BITS_8, 16),
+                      make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_sse2, 0,
+                                 AOM_BITS_8, 16)));
+#endif
+
+#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSE2, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&aom_fdct4x4_sse2,
+                                                     &aom_idct4x4_16_add_sse2,
+                                                     0, AOM_BITS_8, 16)));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4HT,
+    ::testing::Values(make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 0,
+                                 AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 1,
+                                 AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 2,
+                                 AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 3,
+                                 AOM_BITS_8, 16)));
+#endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4DCT,
+    ::testing::Values(
+        make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, AOM_BITS_10, 16),
+        make_tuple(&aom_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, AOM_BITS_10,
+                   16),
+        make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, AOM_BITS_12, 16),
+        make_tuple(&aom_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, AOM_BITS_12,
+                   16),
+        make_tuple(&aom_fdct4x4_sse2, &aom_idct4x4_16_add_c, 0, AOM_BITS_8,
+                   16)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16)));
+#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+
+#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(MSA, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&aom_fdct4x4_msa,
+                                                     &aom_idct4x4_16_add_msa, 0,
+                                                     AOM_BITS_8, 16)));
+#if !CONFIG_EXT_TX
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 0, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 1, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 2, AOM_BITS_8, 16),
+        make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 3, AOM_BITS_8,
+                   16)));
+#endif  // !CONFIG_EXT_TX
+#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/fdct8x8_test.cc b/third_party/aom/test/fdct8x8_test.cc
new file mode 100644
index 000000000..0e86c70aa
--- /dev/null
+++ b/third_party/aom/test/fdct8x8_test.cc
@@ -0,0 +1,699 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "av1/common/scan.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+const int kNumCoeffs = 64;
+const double kPi = 3.141592653589793238462643383279502884;
+
+const int kSignBiasMaxDiff255 = 1500;
+const int kSignBiasMaxDiff15 = 10000;
+
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
+                        int tx_type);
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t> Dct8x8Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t> Ht8x8Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, int, aom_bit_depth_t> Idct8x8Param;
+
+void reference_8x8_dct_1d(const double in[8], double out[8]) {
+  const double kInvSqrt2 = 0.707106781186547524400844362104;
+  for (int k = 0; k < 8; k++) {
+    out[k] = 0.0;
+    for (int n = 0; n < 8; n++)
+      out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
+    if (k == 0) out[k] = out[k] * kInvSqrt2;
+  }
+}
+
+void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
+                          double output[kNumCoeffs]) {
+  // First transform columns
+  for (int i = 0; i < 8; ++i) {
+    double temp_in[8], temp_out[8];
+    for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
+    reference_8x8_dct_1d(temp_in, temp_out);
+    for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
+  }
+  // Then transform rows
+  for (int i = 0; i < 8; ++i) {
+    double temp_in[8], temp_out[8];
+    for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
+    reference_8x8_dct_1d(temp_in, temp_out);
+    // Scale by some magic number
+    for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
+  }
+}
+
+void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
+                 int /*tx_type*/) {
+  aom_fdct8x8_c(in, out, stride);
+}
+
+void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht8x8_c(in, out, stride, tx_type);
+}
+
+#if CONFIG_HIGHBITDEPTH
+void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 10);
+}
+
+void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  av1_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
+}
+
+#endif  // CONFIG_HIGHBITDEPTH
+
+class FwdTrans8x8TestBase {
+ public:
+  virtual ~FwdTrans8x8TestBase() {}
+
+ protected:
+  virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
+  virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
+
+  void RunSignBiasCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
+    int count_sign_block[64][2];
+    const int count_test_block = 100000;
+
+    memset(count_sign_block, 0, sizeof(count_sign_block));
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < 64; ++j)
+        test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
+                              ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_output_block, pitch_));
+
+      for (int j = 0; j < 64; ++j) {
+        if (test_output_block[j] < 0)
+          ++count_sign_block[j][0];
+        else if (test_output_block[j] > 0)
+          ++count_sign_block[j][1];
+      }
+    }
+
+    for (int j = 0; j < 64; ++j) {
+      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+      const int max_diff = kSignBiasMaxDiff255;
+      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
+          << "Error: 8x8 FDCT/FHT has a sign bias > "
+          << 1. * max_diff / count_test_block * 100 << "%"
+          << " for input range [-255, 255] at index " << j
+          << " count0: " << count_sign_block[j][0]
+          << " count1: " << count_sign_block[j][1] << " diff: " << diff;
+    }
+
+    memset(count_sign_block, 0, sizeof(count_sign_block));
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
+      for (int j = 0; j < 64; ++j)
+        test_input_block[j] =
+            ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_output_block, pitch_));
+
+      for (int j = 0; j < 64; ++j) {
+        if (test_output_block[j] < 0)
+          ++count_sign_block[j][0];
+        else if (test_output_block[j] > 0)
+          ++count_sign_block[j][1];
+      }
+    }
+
+    for (int j = 0; j < 64; ++j) {
+      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+      const int max_diff = kSignBiasMaxDiff15;
+      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
+          << "Error: 8x8 FDCT/FHT has a sign bias > "
+          << 1. * max_diff / count_test_block * 100 << "%"
+          << " for input range [-15, 15] at index " << j
+          << " count0: " << count_sign_block[j][0]
+          << " count1: " << count_sign_block[j][1] << " diff: " << diff;
+    }
+  }
+
+  void RunRoundTripErrorCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    int max_error = 0;
+    int total_error = 0;
+    const int count_test_block = 100000;
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
+    DECLARE_ALIGNED(16, uint8_t, dst[64]);
+    DECLARE_ALIGNED(16, uint8_t, src[64]);
+#if CONFIG_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
+    DECLARE_ALIGNED(16, uint16_t, src16[64]);
+#endif
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < 64; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          test_input_block[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          test_input_block[j] = src16[j] - dst16[j];
+#endif
+        }
+      }
+
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+      for (int j = 0; j < 64; ++j) {
+        if (test_temp_block[j] > 0) {
+          test_temp_block[j] += 2;
+          test_temp_block[j] /= 4;
+          test_temp_block[j] *= 4;
+        } else {
+          test_temp_block[j] -= 2;
+          test_temp_block[j] /= 4;
+          test_temp_block[j] *= 4;
+        }
+      }
+      if (bit_depth_ == AOM_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < 64; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        const int diff = dst[j] - src[j];
+#endif
+        const int error = diff * diff;
+        if (max_error < error) max_error = error;
+        total_error += error;
+      }
+    }
+
+    EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
+        << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
+        << " roundtrip error > 1";
+
+    EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
+        << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
+        << "error > 1/5 per block";
+  }
+
+  void RunExtremalCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    int max_error = 0;
+    int total_error = 0;
+    int total_coeff_error = 0;
+    const int count_test_block = 100000;
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
+    DECLARE_ALIGNED(16, uint8_t, dst[64]);
+    DECLARE_ALIGNED(16, uint8_t, src[64]);
+#if CONFIG_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
+    DECLARE_ALIGNED(16, uint16_t, src16[64]);
+#endif
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < 64; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          if (i == 0) {
+            src[j] = 255;
+            dst[j] = 0;
+          } else if (i == 1) {
+            src[j] = 0;
+            dst[j] = 255;
+          } else {
+            src[j] = rnd.Rand8() % 2 ? 255 : 0;
+            dst[j] = rnd.Rand8() % 2 ? 255 : 0;
+          }
+          test_input_block[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          if (i == 0) {
+            src16[j] = mask_;
+            dst16[j] = 0;
+          } else if (i == 1) {
+            src16[j] = 0;
+            dst16[j] = mask_;
+          } else {
+            src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
+            dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
+          }
+          test_input_block[j] = src16[j] - dst16[j];
+#endif
+        }
+      }
+
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(
+          fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
+      if (bit_depth_ == AOM_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < 64; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        const int diff = dst[j] - src[j];
+#endif
+        const int error = diff * diff;
+        if (max_error < error) max_error = error;
+        total_error += error;
+
+        const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
+        total_coeff_error += abs(coeff_diff);
+      }
+
+      EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
+          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
+          << "an individual roundtrip error > 1";
+
+      EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
+          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
+          << " roundtrip error > 1/5 per block";
+
+      EXPECT_EQ(0, total_coeff_error)
+          << "Error: Extremal 8x8 FDCT/FHT has"
+          << "overflow issues in the intermediate steps > 1";
+    }
+  }
+
+  void RunInvAccuracyCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 1000;
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+#if CONFIG_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+#endif
+
+    for (int i = 0; i < count_test_block; ++i) {
+      double out_r[kNumCoeffs];
+
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          src[j] = rnd.Rand8() % 2 ? 255 : 0;
+          dst[j] = src[j] > 0 ? 0 : 255;
+          in[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
+          dst16[j] = src16[j] > 0 ? 0 : mask_;
+          in[j] = src16[j] - dst16[j];
+#endif
+        }
+      }
+
+      reference_8x8_dct_2d(in, out_r);
+      for (int j = 0; j < kNumCoeffs; ++j)
+        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
+
+      if (bit_depth_ == AOM_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        const int diff = dst[j] - src[j];
+#endif
+        const uint32_t error = diff * diff;
+        EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
+            << "Error: 8x8 IDCT has error " << error << " at index " << j;
+      }
+    }
+  }
+
+  void RunFwdAccuracyCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 1000;
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      double out_r[kNumCoeffs];
+
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < kNumCoeffs; ++j)
+        in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
+
+      RunFwdTxfm(in, coeff, pitch_);
+      reference_8x8_dct_2d(in, out_r);
+      for (int j = 0; j < kNumCoeffs; ++j)
+        coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        const int32_t diff = coeff[j] - coeff_r[j];
+        const uint32_t error = diff * diff;
+        EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
+            << "Error: 8x8 DCT has error " << error << " at index " << j;
+      }
+    }
+  }
+
+  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 10000;
+    const int eob = 12;
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+#if CONFIG_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif
+    const int16_t *scan = av1_default_scan_orders[TX_8X8].scan;
+
+    for (int i = 0; i < count_test_block; ++i) {
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (j < eob) {
+          // Random values less than the threshold, either positive or negative
+          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
+        } else {
+          coeff[scan[j]] = 0;
+        }
+        if (bit_depth_ == AOM_BITS_8) {
+          dst[j] = 0;
+          ref[j] = 0;
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          dst16[j] = 0;
+          ref16[j] = 0;
+#endif
+        }
+      }
+      if (bit_depth_ == AOM_BITS_8) {
+        ref_txfm(coeff, ref, pitch_);
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
+#else
+        const int diff = dst[j] - ref[j];
+#endif
+        const uint32_t error = diff * diff;
+        EXPECT_EQ(0u, error) << "Error: 8x8 IDCT has error " << error
+                             << " at index " << j;
+      }
+    }
+  }
+  int pitch_;
+  int tx_type_;
+  FhtFunc fwd_txfm_ref;
+  aom_bit_depth_t bit_depth_;
+  int mask_;
+};
+
+class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
+                       public ::testing::TestWithParam<Dct8x8Param> {
+ public:
+  virtual ~FwdTrans8x8DCT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 8;
+    fwd_txfm_ref = fdct8x8_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  FdctFunc fwd_txfm_;
+  IdctFunc inv_txfm_;
+};
+
+TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
+
+TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
+
+TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
+
+TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
+
+TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
+
+class FwdTrans8x8HT : public FwdTrans8x8TestBase,
+                      public ::testing::TestWithParam<Ht8x8Param> {
+ public:
+  virtual ~FwdTrans8x8HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 8;
+    fwd_txfm_ref = fht8x8_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
+
+TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
+
+TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
+
+class InvTrans8x8DCT : public FwdTrans8x8TestBase,
+                       public ::testing::TestWithParam<Idct8x8Param> {
+ public:
+  virtual ~InvTrans8x8DCT() {}
+
+  virtual void SetUp() {
+    ref_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    thresh_ = GET_PARAM(2);
+    pitch_ = 8;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+  void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
+
+  IdctFunc ref_txfm_;
+  IdctFunc inv_txfm_;
+  int thresh_;
+};
+
+TEST_P(InvTrans8x8DCT, CompareReference) {
+  CompareInvReference(ref_txfm_, thresh_);
+}
+
+using std::tr1::make_tuple;
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&aom_fdct8x8_c,
+                                                     &aom_idct8x8_64_add_c, 0,
+                                                     AOM_BITS_8)));
+#else
+INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&aom_fdct8x8_c,
+                                                     &aom_idct8x8_64_add_c, 0,
+                                                     AOM_BITS_8)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 0, AOM_BITS_8),
+        make_tuple(&av1_highbd_fht8x8_c, &iht8x8_10, 0, AOM_BITS_10),
+        make_tuple(&av1_highbd_fht8x8_c, &iht8x8_10, 1, AOM_BITS_10),
+        make_tuple(&av1_highbd_fht8x8_c, &iht8x8_10, 2, AOM_BITS_10),
+        make_tuple(&av1_highbd_fht8x8_c, &iht8x8_10, 3, AOM_BITS_10),
+        make_tuple(&av1_highbd_fht8x8_c, &iht8x8_12, 0, AOM_BITS_12),
+        make_tuple(&av1_highbd_fht8x8_c, &iht8x8_12, 1, AOM_BITS_12),
+        make_tuple(&av1_highbd_fht8x8_c, &iht8x8_12, 2, AOM_BITS_12),
+        make_tuple(&av1_highbd_fht8x8_c, &iht8x8_12, 3, AOM_BITS_12),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 1, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 2, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 3, AOM_BITS_8)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    C, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 0, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 1, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 2, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 3, AOM_BITS_8)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&aom_fdct8x8_neon,
+                                                     &aom_idct8x8_64_add_neon,
+                                                     0, AOM_BITS_8)));
+#endif  // HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_NEON && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    NEON, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 0, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 1, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 2, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 3, AOM_BITS_8)));
+#endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&aom_fdct8x8_sse2,
+                                                     &aom_idct8x8_64_add_sse2,
+                                                     0, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 0, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 1, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 2, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 3, AOM_BITS_8)));
+#endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&aom_fdct8x8_sse2,
+                                                     &aom_idct8x8_64_add_c, 0,
+                                                     AOM_BITS_8)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 0, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 1, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 2, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 3, AOM_BITS_8)));
+
+#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSSE3 && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&aom_fdct8x8_ssse3,
+                                                     &aom_idct8x8_64_add_ssse3,
+                                                     0, AOM_BITS_8)));
+#endif
+
+#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&aom_fdct8x8_msa,
+                                                     &aom_idct8x8_64_add_msa, 0,
+                                                     AOM_BITS_8)));
+#if !CONFIG_EXT_TX
+INSTANTIATE_TEST_CASE_P(
+    MSA, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 0, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 1, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 2, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 3, AOM_BITS_8)));
+#endif  // !CONFIG_EXT_TX
+#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/fht32x32_test.cc b/third_party/aom/test/fht32x32_test.cc
new file mode 100644
index 000000000..56ac597c0
--- /dev/null
+++ b/third_party/aom/test/fht32x32_test.cc
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht32x32Param;
+
+void fht32x32_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht32x32_c(in, out, stride, tx_type);
+}
+
+#if CONFIG_HIGHBITDEPTH
+typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                           int tx_type, int bd);
+typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
+                          int tx_type, int bd);
+
+// Target optimized function, tx_type, bit depth
+typedef tuple<HbdHtFunc, int, int> HighbdHt32x32Param;
+
+void highbd_fht32x32_ref(const int16_t *in, int32_t *out, int stride,
+                         int tx_type, int bd) {
+  av1_fwd_txfm2d_32x32_c(in, out, stride, tx_type, bd);
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if HAVE_AVX2
+void dummy_inv_txfm(const tran_low_t *in, uint8_t *out, int stride,
+                    int tx_type) {
+  (void)in;
+  (void)out;
+  (void)stride;
+  (void)tx_type;
+}
+#endif
+
+class AV1Trans32x32HT : public libaom_test::TransformTestBase,
+                        public ::testing::TestWithParam<Ht32x32Param> {
+ public:
+  virtual ~AV1Trans32x32HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 32;
+    height_ = 32;
+    fwd_txfm_ref = fht32x32_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans32x32HT, MemCheck) { RunMemCheck(); }
+
+#if CONFIG_HIGHBITDEPTH
+class AV1HighbdTrans32x32HT
+    : public ::testing::TestWithParam<HighbdHt32x32Param> {
+ public:
+  virtual ~AV1HighbdTrans32x32HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    fwd_txfm_ref_ = highbd_fht32x32_ref;
+    tx_type_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = 1024;
+
+    input_ = reinterpret_cast<int16_t *>(
+        aom_memalign(32, sizeof(int16_t) * num_coeffs_));
+    output_ = reinterpret_cast<int32_t *>(
+        aom_memalign(32, sizeof(int32_t) * num_coeffs_));
+    output_ref_ = reinterpret_cast<int32_t *>(
+        aom_memalign(32, sizeof(int32_t) * num_coeffs_));
+  }
+
+  virtual void TearDown() {
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(output_ref_);
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunBitexactCheck();
+
+ private:
+  HbdHtFunc fwd_txfm_;
+  HbdHtFunc fwd_txfm_ref_;
+  int tx_type_;
+  int bit_depth_;
+  int mask_;
+  int num_coeffs_;
+  int16_t *input_;
+  int32_t *output_;
+  int32_t *output_ref_;
+};
+
+void AV1HighbdTrans32x32HT::RunBitexactCheck() {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int i, j;
+  const int stride = 32;
+  const int num_tests = 1000;
+
+  for (i = 0; i < num_tests; ++i) {
+    for (j = 0; j < num_coeffs_; ++j) {
+      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+    }
+
+    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(
+        fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_));
+
+    for (j = 0; j < num_coeffs_; ++j) {
+      EXPECT_EQ(output_ref_[j], output_[j])
+          << "Not bit-exact result at index: " << j << " at test block: " << i;
+    }
+  }
+}
+
+TEST_P(AV1HighbdTrans32x32HT, HighbdCoeffCheck) { RunBitexactCheck(); }
+#endif  // CONFIG_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+const Ht32x32Param kArrayHt32x32Param_sse2[] = {
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 0, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 1, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 2, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 3, AOM_BITS_8, 1024),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 4, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 5, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 6, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 7, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 8, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 9, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 10, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 11, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 12, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 13, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 14, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 15, AOM_BITS_8, 1024)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x32HT,
+                        ::testing::ValuesIn(kArrayHt32x32Param_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+const Ht32x32Param kArrayHt32x32Param_avx2[] = {
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 0, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 1, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 2, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 3, AOM_BITS_8, 1024),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 4, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 5, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 6, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 7, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 8, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 9, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 10, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 11, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 12, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 13, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 14, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 15, AOM_BITS_8, 1024)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans32x32HT,
+                        ::testing::ValuesIn(kArrayHt32x32Param_avx2));
+#endif  // HAVE_AVX2
+}  // namespace
diff --git a/third_party/aom/test/filterintra_predictors_test.cc b/third_party/aom/test/filterintra_predictors_test.cc
new file mode 100644
index 000000000..5c6b56d14
--- /dev/null
+++ b/third_party/aom/test/filterintra_predictors_test.cc
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/enums.h"
+
+namespace {
+
+using std::tr1::tuple;
+using libaom_test::ACMRandom;
+
+typedef void (*Predictor)(uint8_t *dst, ptrdiff_t stride, int bs,
+                          const uint8_t *above, const uint8_t *left);
+
+// Note:
+//  Test parameter list:
+//  Reference predictor, optimized predictor, prediction mode, block size
+//
+typedef tuple<Predictor, Predictor, int> PredFuncMode;
+typedef tuple<PredFuncMode, int> PredParams;
+
+#if CONFIG_HIGHBITDEPTH
+typedef void (*HbdPredictor)(uint16_t *dst, ptrdiff_t stride, int bs,
+                             const uint16_t *above, const uint16_t *left,
+                             int bd);
+
+// Note:
+//  Test parameter list:
+//  Reference predictor, optimized predictor, prediction mode, block size,
+//  bit depth
+//
+typedef tuple<HbdPredictor, HbdPredictor, int> HbdPredFuncMode;
+typedef tuple<HbdPredFuncMode, int, int> HbdPredParams;
+#endif
+
+const int MaxBlkSize = 32;
+
+// By default, disable speed test
+#define PREDICTORS_SPEED_TEST (0)
+
+#if PREDICTORS_SPEED_TEST
+const int MaxTestNum = 100000;
+#else
+const int MaxTestNum = 100;
+#endif
+
+class AV1FilterIntraPredOptimzTest
+    : public ::testing::TestWithParam<PredParams> {
+ public:
+  virtual ~AV1FilterIntraPredOptimzTest() {}
+  virtual void SetUp() {
+    PredFuncMode funcMode = GET_PARAM(0);
+    predFuncRef_ = std::tr1::get<0>(funcMode);
+    predFunc_ = std::tr1::get<1>(funcMode);
+    mode_ = std::tr1::get<2>(funcMode);
+    blockSize_ = GET_PARAM(1);
+
+    alloc_ = new uint8_t[3 * MaxBlkSize + 2];
+    predRef_ = new uint8_t[MaxBlkSize * MaxBlkSize];
+    pred_ = new uint8_t[MaxBlkSize * MaxBlkSize];
+  }
+
+  virtual void TearDown() {
+    delete[] alloc_;
+    delete[] predRef_;
+    delete[] pred_;
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunTest() const {
+    int tstIndex = 0;
+    int stride = blockSize_;
+    uint8_t *left = alloc_;
+    uint8_t *above = alloc_ + MaxBlkSize + 1;
+    while (tstIndex < MaxTestNum) {
+      PrepareBuffer();
+      predFuncRef_(predRef_, stride, blockSize_, &above[1], left);
+      ASM_REGISTER_STATE_CHECK(
+          predFunc_(pred_, stride, blockSize_, &above[1], left));
+      DiffPred(tstIndex);
+      tstIndex += 1;
+    }
+  }
+
+  void RunSpeedTestC() const {
+    int tstIndex = 0;
+    int stride = blockSize_;
+    uint8_t *left = alloc_;
+    uint8_t *above = alloc_ + MaxBlkSize + 1;
+    PrepareBuffer();
+    while (tstIndex < MaxTestNum) {
+      predFuncRef_(predRef_, stride, blockSize_, &above[1], left);
+      tstIndex += 1;
+    }
+  }
+
+  void RunSpeedTestSSE() const {
+    int tstIndex = 0;
+    int stride = blockSize_;
+    uint8_t *left = alloc_;
+    uint8_t *above = alloc_ + MaxBlkSize + 1;
+    PrepareBuffer();
+    while (tstIndex < MaxTestNum) {
+      predFunc_(predRef_, stride, blockSize_, &above[1], left);
+      tstIndex += 1;
+    }
+  }
+
+ private:
+  void PrepareBuffer() const {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    int i = 0;
+    while (i < (3 * MaxBlkSize + 2)) {
+      alloc_[i] = rnd.Rand8();
+      i += 1;
+    }
+  }
+
+  void DiffPred(int testNum) const {
+    int i = 0;
+    while (i < blockSize_ * blockSize_) {
+      EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " "
+                                       << "Block size: " << blockSize_ << " "
+                                       << "Test number: " << testNum;
+      i += 1;
+    }
+  }
+
+  Predictor predFunc_;
+  Predictor predFuncRef_;
+  int mode_;
+  int blockSize_;
+  uint8_t *alloc_;
+  uint8_t *pred_;
+  uint8_t *predRef_;
+};
+
+#if CONFIG_HIGHBITDEPTH
+class AV1HbdFilterIntraPredOptimzTest
+    : public ::testing::TestWithParam<HbdPredParams> {
+ public:
+  virtual ~AV1HbdFilterIntraPredOptimzTest() {}
+  virtual void SetUp() {
+    HbdPredFuncMode funcMode = GET_PARAM(0);
+    predFuncRef_ = std::tr1::get<0>(funcMode);
+    predFunc_ = std::tr1::get<1>(funcMode);
+    mode_ = std::tr1::get<2>(funcMode);
+    blockSize_ = GET_PARAM(1);
+    bd_ = GET_PARAM(2);
+
+    alloc_ = new uint16_t[3 * MaxBlkSize + 2];
+    predRef_ = new uint16_t[MaxBlkSize * MaxBlkSize];
+    pred_ = new uint16_t[MaxBlkSize * MaxBlkSize];
+  }
+
+  virtual void TearDown() {
+    delete[] alloc_;
+    delete[] predRef_;
+    delete[] pred_;
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunTest() const {
+    int tstIndex = 0;
+    int stride = blockSize_;
+    uint16_t *left = alloc_;
+    uint16_t *above = alloc_ + MaxBlkSize + 1;
+    while (tstIndex < MaxTestNum) {
+      PrepareBuffer();
+      predFuncRef_(predRef_, stride, blockSize_, &above[1], left, bd_);
+      ASM_REGISTER_STATE_CHECK(
+          predFunc_(pred_, stride, blockSize_, &above[1], left, bd_));
+      DiffPred(tstIndex);
+      tstIndex += 1;
+    }
+  }
+
+  void RunSpeedTestC() const {
+    int tstIndex = 0;
+    int stride = blockSize_;
+    uint16_t *left = alloc_;
+    uint16_t *above = alloc_ + MaxBlkSize + 1;
+    PrepareBuffer();
+    while (tstIndex < MaxTestNum) {
+      predFuncRef_(predRef_, stride, blockSize_, &above[1], left, bd_);
+      tstIndex += 1;
+    }
+  }
+
+  void RunSpeedTestSSE() const {
+    int tstIndex = 0;
+    int stride = blockSize_;
+    uint16_t *left = alloc_;
+    uint16_t *above = alloc_ + MaxBlkSize + 1;
+    PrepareBuffer();
+    while (tstIndex < MaxTestNum) {
+      predFunc_(predRef_, stride, blockSize_, &above[1], left, bd_);
+      tstIndex += 1;
+    }
+  }
+
+ private:
+  void PrepareBuffer() const {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    int i = 0;
+    while (i < (3 * MaxBlkSize + 2)) {
+      alloc_[i] = rnd.Rand16() & ((1 << bd_) - 1);
+      i += 1;
+    }
+  }
+
+  void DiffPred(int testNum) const {
+    int i = 0;
+    while (i < blockSize_ * blockSize_) {
+      EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " "
+                                       << "Block size: " << blockSize_ << " "
+                                       << "Bit depth: " << bd_ << " "
+                                       << "Test number: " << testNum;
+      i += 1;
+    }
+  }
+
+  HbdPredictor predFunc_;
+  HbdPredictor predFuncRef_;
+  int mode_;
+  int blockSize_;
+  int bd_;
+  uint16_t *alloc_;
+  uint16_t *pred_;
+  uint16_t *predRef_;
+};
+#endif  // CONFIG_HIGHBITDEPTH
+
+TEST_P(AV1FilterIntraPredOptimzTest, BitExactCheck) { RunTest(); }
+
+#if PREDICTORS_SPEED_TEST
+TEST_P(AV1FilterIntraPredOptimzTest, SpeedCheckC) { RunSpeedTestC(); }
+
+TEST_P(AV1FilterIntraPredOptimzTest, SpeedCheckSSE) { RunSpeedTestSSE(); }
+#endif
+
+#if CONFIG_HIGHBITDEPTH
+TEST_P(AV1HbdFilterIntraPredOptimzTest, BitExactCheck) { RunTest(); }
+
+#if PREDICTORS_SPEED_TEST
+TEST_P(AV1HbdFilterIntraPredOptimzTest, SpeedCheckC) { RunSpeedTestC(); }
+
+TEST_P(AV1HbdFilterIntraPredOptimzTest, SpeedCheckSSE) { RunSpeedTestSSE(); }
+#endif  // PREDICTORS_SPEED_TEST
+#endif  // CONFIG_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+const PredFuncMode kPredFuncMdArray[] = {
+  make_tuple(av1_dc_filter_predictor_c, av1_dc_filter_predictor_sse4_1,
+             DC_PRED),
+  make_tuple(av1_v_filter_predictor_c, av1_v_filter_predictor_sse4_1, V_PRED),
+  make_tuple(av1_h_filter_predictor_c, av1_h_filter_predictor_sse4_1, H_PRED),
+  make_tuple(av1_d45_filter_predictor_c, av1_d45_filter_predictor_sse4_1,
+             D45_PRED),
+  make_tuple(av1_d135_filter_predictor_c, av1_d135_filter_predictor_sse4_1,
+             D135_PRED),
+  make_tuple(av1_d117_filter_predictor_c, av1_d117_filter_predictor_sse4_1,
+             D117_PRED),
+  make_tuple(av1_d153_filter_predictor_c, av1_d153_filter_predictor_sse4_1,
+             D153_PRED),
+  make_tuple(av1_d207_filter_predictor_c, av1_d207_filter_predictor_sse4_1,
+             D207_PRED),
+  make_tuple(av1_d63_filter_predictor_c, av1_d63_filter_predictor_sse4_1,
+             D63_PRED),
+  make_tuple(av1_tm_filter_predictor_c, av1_tm_filter_predictor_sse4_1,
+             TM_PRED),
+};
+
+const int kBlkSize[] = { 4, 8, 16, 32 };
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AV1FilterIntraPredOptimzTest,
+    ::testing::Combine(::testing::ValuesIn(kPredFuncMdArray),
+                       ::testing::ValuesIn(kBlkSize)));
+
+#if CONFIG_HIGHBITDEPTH
+const HbdPredFuncMode kHbdPredFuncMdArray[] = {
+  make_tuple(av1_highbd_dc_filter_predictor_c,
+             av1_highbd_dc_filter_predictor_sse4_1, DC_PRED),
+  make_tuple(av1_highbd_v_filter_predictor_c,
+             av1_highbd_v_filter_predictor_sse4_1, V_PRED),
+  make_tuple(av1_highbd_h_filter_predictor_c,
+             av1_highbd_h_filter_predictor_sse4_1, H_PRED),
+  make_tuple(av1_highbd_d45_filter_predictor_c,
+             av1_highbd_d45_filter_predictor_sse4_1, D45_PRED),
+  make_tuple(av1_highbd_d135_filter_predictor_c,
+             av1_highbd_d135_filter_predictor_sse4_1, D135_PRED),
+  make_tuple(av1_highbd_d117_filter_predictor_c,
+             av1_highbd_d117_filter_predictor_sse4_1, D117_PRED),
+  make_tuple(av1_highbd_d153_filter_predictor_c,
+             av1_highbd_d153_filter_predictor_sse4_1, D153_PRED),
+  make_tuple(av1_highbd_d207_filter_predictor_c,
+             av1_highbd_d207_filter_predictor_sse4_1, D207_PRED),
+  make_tuple(av1_highbd_d63_filter_predictor_c,
+             av1_highbd_d63_filter_predictor_sse4_1, D63_PRED),
+  make_tuple(av1_highbd_tm_filter_predictor_c,
+             av1_highbd_tm_filter_predictor_sse4_1, TM_PRED),
+};
+
+const int kBd[] = { 10, 12 };
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AV1HbdFilterIntraPredOptimzTest,
+    ::testing::Combine(::testing::ValuesIn(kHbdPredFuncMdArray),
+                       ::testing::ValuesIn(kBlkSize),
+                       ::testing::ValuesIn(kBd)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/frame_size_tests.cc b/third_party/aom/test/frame_size_tests.cc
new file mode 100644
index 000000000..73cc9c075
--- /dev/null
+++ b/third_party/aom/test/frame_size_tests.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/video_source.h"
+
+namespace {
+
+class AV1FrameSizeTests : public ::libaom_test::EncoderTest,
+                          public ::testing::Test {
+ protected:
+  AV1FrameSizeTests()
+      : EncoderTest(&::libaom_test::kAV1), expected_res_(AOM_CODEC_OK) {}
+  virtual ~AV1FrameSizeTests() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(::libaom_test::kRealTime);
+  }
+
+  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                                  libaom_test::Decoder *decoder) {
+    EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError();
+    return !::testing::Test::HasFailure();
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, 7);
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  int expected_res_;
+};
+
+TEST_F(AV1FrameSizeTests, TestInvalidSizes) {
+  ::libaom_test::RandomVideoSource video;
+
+#if CONFIG_SIZE_LIMIT
+  video.SetSize(DECODE_WIDTH_LIMIT + 16, DECODE_HEIGHT_LIMIT + 16);
+  video.set_limit(2);
+  expected_res_ = AOM_CODEC_CORRUPT_FRAME;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+#endif
+}
+
+TEST_F(AV1FrameSizeTests, LargeValidSizes) {
+  ::libaom_test::RandomVideoSource video;
+
+#if CONFIG_SIZE_LIMIT
+  video.SetSize(DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
+  video.set_limit(2);
+  expected_res_ = AOM_CODEC_OK;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+#else
+  // This test produces a pretty large single frame allocation,  (roughly
+  // 25 megabits). The encoder allocates a good number of these frames
+  // one for each lag in frames (for 2 pass), and then one for each possible
+  // reference buffer (8) - we can end up with up to 30 buffers of roughly this
+  // size or almost 1 gig of memory.
+  // In total the allocations will exceed 2GiB which may cause a failure with
+  // non-64 bit platforms, use a smaller size in that case.
+  if (sizeof(void *) < 8)
+    video.SetSize(2560, 1440);
+  else
+    video.SetSize(4096, 4096);
+
+  video.set_limit(2);
+  expected_res_ = AOM_CODEC_OK;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+#endif
+}
+
+TEST_F(AV1FrameSizeTests, OneByOneVideo) {
+  ::libaom_test::RandomVideoSource video;
+
+  video.SetSize(1, 1);
+  video.set_limit(2);
+  expected_res_ = AOM_CODEC_OK;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+#undef ONE_BY_ONE_VIDEO_NAME
+}  // namespace
diff --git a/third_party/aom/test/function_equivalence_test.h b/third_party/aom/test/function_equivalence_test.h
new file mode 100644
index 000000000..4b22c74a2
--- /dev/null
+++ b/third_party/aom/test/function_equivalence_test.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_FUNCTION_EQUIVALENCE_TEST_H_
+#define TEST_FUNCTION_EQUIVALENCE_TEST_H_
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace libaom_test {
+// Base class for tests that compare 2 implementations of the same function
+// for equivalence. The template parameter should be pointer to a function
+// that is being tested.
+//
+// The test takes a 3-parameters encapsulating struct 'FuncParam', containing:
+//   - Pointer to reference function
+//   - Pointer to tested function
+//   - Integer bit depth (default to 0).
+//
+// These values are then accessible in the tests as member of params_:
+// params_.ref_func, params_.tst_func, and params_.bit_depth.
+//
+
+template <typename T>
+struct FuncParam {
+  FuncParam(T ref = NULL, T tst = NULL, int bit_depth = 0)
+      : ref_func(ref), tst_func(tst), bit_depth(bit_depth) {}
+  T ref_func;
+  T tst_func;
+  int bit_depth;
+};
+
+template <typename T>
+std::ostream &operator<<(std::ostream &os, const FuncParam<T> &p) {
+  return os << "bit_depth:" << p.bit_depth
+            << " function:" << reinterpret_cast<const void *>(p.ref_func)
+            << " function:" << reinterpret_cast<const void *>(p.tst_func);
+}
+
+template <typename T>
+class FunctionEquivalenceTest : public ::testing::TestWithParam<FuncParam<T> > {
+ public:
+  FunctionEquivalenceTest() : rng_(ACMRandom::DeterministicSeed()) {}
+
+  virtual ~FunctionEquivalenceTest() {}
+
+  virtual void SetUp() { params_ = this->GetParam(); }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  ACMRandom rng_;
+  FuncParam<T> params_;
+};
+
+}  // namespace libaom_test
+#endif  // TEST_FUNCTION_EQUIVALENCE_TEST_H_
diff --git a/third_party/aom/test/hadamard_test.cc b/third_party/aom/test/hadamard_test.cc
new file mode 100644
index 000000000..db5cb7474
--- /dev/null
+++ b/third_party/aom/test/hadamard_test.cc
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <algorithm>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libaom_test::ACMRandom;
+
+typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
+
+void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
+  int16_t b[8];
+  for (int i = 0; i < 8; i += 2) {
+    b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
+    b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
+  }
+  int16_t c[8];
+  for (int i = 0; i < 8; i += 4) {
+    c[i + 0] = b[i + 0] + b[i + 2];
+    c[i + 1] = b[i + 1] + b[i + 3];
+    c[i + 2] = b[i + 0] - b[i + 2];
+    c[i + 3] = b[i + 1] - b[i + 3];
+  }
+  out[0] = c[0] + c[4];
+  out[7] = c[1] + c[5];
+  out[3] = c[2] + c[6];
+  out[4] = c[3] + c[7];
+  out[2] = c[0] - c[4];
+  out[6] = c[1] - c[5];
+  out[1] = c[2] - c[6];
+  out[5] = c[3] - c[7];
+}
+
+void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
+  int16_t buf[64];
+  for (int i = 0; i < 8; ++i) {
+    hadamard_loop(a + i, a_stride, buf + i * 8);
+  }
+
+  for (int i = 0; i < 8; ++i) {
+    hadamard_loop(buf + i, 8, b + i * 8);
+  }
+}
+
+void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
+  /* The source is a 16x16 block. The destination is rearranged to 8x32.
+   * Input is 9 bit. */
+  reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
+  reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
+  reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
+  reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
+
+  /* Overlay the 8x8 blocks and combine. */
+  for (int i = 0; i < 64; ++i) {
+    /* 8x8 steps the range up to 15 bits. */
+    const int16_t a0 = b[0];
+    const int16_t a1 = b[64];
+    const int16_t a2 = b[128];
+    const int16_t a3 = b[192];
+
+    /* Prevent the result from escaping int16_t. */
+    const int16_t b0 = (a0 + a1) >> 1;
+    const int16_t b1 = (a0 - a1) >> 1;
+    const int16_t b2 = (a2 + a3) >> 1;
+    const int16_t b3 = (a2 - a3) >> 1;
+
+    /* Store a 16 bit value. */
+    b[0] = b0 + b2;
+    b[64] = b1 + b3;
+    b[128] = b0 - b2;
+    b[192] = b1 - b3;
+
+    ++b;
+  }
+}
+
+class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
+ public:
+  virtual void SetUp() {
+    h_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  HadamardFunc h_func_;
+  ACMRandom rnd_;
+};
+
+class Hadamard8x8Test : public HadamardTestBase {};
+
+TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
+  DECLARE_ALIGNED(16, int16_t, a[64]);
+  DECLARE_ALIGNED(16, int16_t, b[64]);
+  int16_t b_ref[64];
+  for (int i = 0; i < 64; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+  memset(b, 0, sizeof(b));
+  memset(b_ref, 0, sizeof(b_ref));
+
+  reference_hadamard8x8(a, 8, b_ref);
+  ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
+
+  // The order of the output is not important. Sort before checking.
+  std::sort(b, b + 64);
+  std::sort(b_ref, b_ref + 64);
+  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(Hadamard8x8Test, VaryStride) {
+  DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
+  DECLARE_ALIGNED(16, int16_t, b[64]);
+  int16_t b_ref[64];
+  for (int i = 0; i < 64 * 8; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+
+  for (int i = 8; i < 64; i += 8) {
+    memset(b, 0, sizeof(b));
+    memset(b_ref, 0, sizeof(b_ref));
+
+    reference_hadamard8x8(a, i, b_ref);
+    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+    // The order of the output is not important. Sort before checking.
+    std::sort(b, b + 64);
+    std::sort(b_ref, b_ref + 64);
+    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
+                        ::testing::Values(&aom_hadamard_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
+                        ::testing::Values(&aom_hadamard_8x8_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3 && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
+                        ::testing::Values(&aom_hadamard_8x8_ssse3));
+#endif  // HAVE_SSSE3 && ARCH_X86_64
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
+                        ::testing::Values(&aom_hadamard_8x8_neon));
+#endif  // HAVE_NEON
+
+class Hadamard16x16Test : public HadamardTestBase {};
+
+TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
+  DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+  int16_t b_ref[16 * 16];
+  for (int i = 0; i < 16 * 16; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+  memset(b, 0, sizeof(b));
+  memset(b_ref, 0, sizeof(b_ref));
+
+  reference_hadamard16x16(a, 16, b_ref);
+  ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
+
+  // The order of the output is not important. Sort before checking.
+  std::sort(b, b + 16 * 16);
+  std::sort(b_ref, b_ref + 16 * 16);
+  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(Hadamard16x16Test, VaryStride) {
+  DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+  int16_t b_ref[16 * 16];
+  for (int i = 0; i < 16 * 16 * 8; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+
+  for (int i = 8; i < 64; i += 8) {
+    memset(b, 0, sizeof(b));
+    memset(b_ref, 0, sizeof(b_ref));
+
+    reference_hadamard16x16(a, i, b_ref);
+    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+    // The order of the output is not important. Sort before checking.
+    std::sort(b, b + 16 * 16);
+    std::sort(b_ref, b_ref + 16 * 16);
+    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
+                        ::testing::Values(&aom_hadamard_16x16_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
+                        ::testing::Values(&aom_hadamard_16x16_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
+                        ::testing::Values(&aom_hadamard_16x16_neon));
+#endif  // HAVE_NEON
+}  // namespace
diff --git a/third_party/aom/test/hbd_metrics_test.cc b/third_party/aom/test/hbd_metrics_test.cc
new file mode 100644
index 000000000..4def53b21
--- /dev/null
+++ b/third_party/aom/test/hbd_metrics_test.cc
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <new>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "./aom_config.h"
+#include "aom_dsp/psnr.h"
+#include "aom_dsp/ssim.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/msvc.h"
+#include "aom_scale/yv12config.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+typedef double (*LBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
+                                const YV12_BUFFER_CONFIG *dest);
+typedef double (*HBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
+                                const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                                uint32_t bd);
+
+double compute_hbd_psnr(const YV12_BUFFER_CONFIG *source,
+                        const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                        uint32_t bd) {
+  PSNR_STATS psnr;
+  aom_calc_highbd_psnr(source, dest, &psnr, bd, in_bd);
+  return psnr.psnr[0];
+}
+
+double compute_psnr(const YV12_BUFFER_CONFIG *source,
+                    const YV12_BUFFER_CONFIG *dest) {
+  PSNR_STATS psnr;
+  aom_calc_psnr(source, dest, &psnr);
+  return psnr.psnr[0];
+}
+
+double compute_hbd_psnrhvs(const YV12_BUFFER_CONFIG *source,
+                           const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                           uint32_t bd) {
+  double tempy, tempu, tempv;
+  return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, bd, in_bd);
+}
+
+double compute_psnrhvs(const YV12_BUFFER_CONFIG *source,
+                       const YV12_BUFFER_CONFIG *dest) {
+  double tempy, tempu, tempv;
+  return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, 8, 8);
+}
+
+double compute_hbd_fastssim(const YV12_BUFFER_CONFIG *source,
+                            const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                            uint32_t bd) {
+  double tempy, tempu, tempv;
+  return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, bd, in_bd);
+}
+
+double compute_fastssim(const YV12_BUFFER_CONFIG *source,
+                        const YV12_BUFFER_CONFIG *dest) {
+  double tempy, tempu, tempv;
+  return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, 8, 8);
+}
+
+double compute_hbd_aomssim(const YV12_BUFFER_CONFIG *source,
+                           const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
+                           uint32_t bd) {
+  double ssim, weight;
+  ssim = aom_highbd_calc_ssim(source, dest, &weight, bd, in_bd);
+  return 100 * pow(ssim / weight, 8.0);
+}
+
+double compute_aomssim(const YV12_BUFFER_CONFIG *source,
+                       const YV12_BUFFER_CONFIG *dest) {
+  double ssim, weight;
+  ssim = aom_calc_ssim(source, dest, &weight);
+  return 100 * pow(ssim / weight, 8.0);
+}
+
+class HBDMetricsTestBase {
+ public:
+  virtual ~HBDMetricsTestBase() {}
+
+ protected:
+  void RunAccuracyCheck() {
+    const int width = 1920;
+    const int height = 1080;
+    size_t i = 0;
+    const uint8_t kPixFiller = 128;
+    YV12_BUFFER_CONFIG lbd_src, lbd_dst;
+    YV12_BUFFER_CONFIG hbd_src, hbd_dst;
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    double lbd_db, hbd_db;
+
+    memset(&lbd_src, 0, sizeof(lbd_src));
+    memset(&lbd_dst, 0, sizeof(lbd_dst));
+    memset(&hbd_src, 0, sizeof(hbd_src));
+    memset(&hbd_dst, 0, sizeof(hbd_dst));
+
+    aom_alloc_frame_buffer(&lbd_src, width, height, 1, 1, 0, 32, 16);
+    aom_alloc_frame_buffer(&lbd_dst, width, height, 1, 1, 0, 32, 16);
+    aom_alloc_frame_buffer(&hbd_src, width, height, 1, 1, 1, 32, 16);
+    aom_alloc_frame_buffer(&hbd_dst, width, height, 1, 1, 1, 32, 16);
+
+    memset(lbd_src.buffer_alloc, kPixFiller, lbd_src.buffer_alloc_sz);
+    while (i < lbd_src.buffer_alloc_sz) {
+      uint16_t spel, dpel;
+      spel = lbd_src.buffer_alloc[i];
+      // Create some distortion for dst buffer.
+      dpel = rnd.Rand8();
+      lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+      ((uint16_t *)(hbd_src.buffer_alloc))[i] = spel << (bit_depth_ - 8);
+      ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+      i++;
+    }
+
+    lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+    hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
+    EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+    i = 0;
+    while (i < lbd_src.buffer_alloc_sz) {
+      uint16_t dpel;
+      // Create some small distortion for dst buffer.
+      dpel = 120 + (rnd.Rand8() >> 4);
+      lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+      ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+      i++;
+    }
+
+    lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+    hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
+    EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+    i = 0;
+    while (i < lbd_src.buffer_alloc_sz) {
+      uint16_t dpel;
+      // Create some small distortion for dst buffer.
+      dpel = 126 + (rnd.Rand8() >> 6);
+      lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
+      ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
+      i++;
+    }
+
+    lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+    hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
+    EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
+
+    aom_free_frame_buffer(&lbd_src);
+    aom_free_frame_buffer(&lbd_dst);
+    aom_free_frame_buffer(&hbd_src);
+    aom_free_frame_buffer(&hbd_dst);
+  }
+
+  int input_bit_depth_;
+  int bit_depth_;
+  double threshold_;
+  LBDMetricFunc lbd_metric_;
+  HBDMetricFunc hbd_metric_;
+};
+
+typedef std::tr1::tuple<LBDMetricFunc, HBDMetricFunc, int, int, double>
+    MetricTestTParam;
+class HBDMetricsTest : public HBDMetricsTestBase,
+                       public ::testing::TestWithParam<MetricTestTParam> {
+ public:
+  virtual void SetUp() {
+    lbd_metric_ = GET_PARAM(0);
+    hbd_metric_ = GET_PARAM(1);
+    input_bit_depth_ = GET_PARAM(2);
+    bit_depth_ = GET_PARAM(3);
+    threshold_ = GET_PARAM(4);
+  }
+  virtual void TearDown() {}
+};
+
+TEST_P(HBDMetricsTest, RunAccuracyCheck) { RunAccuracyCheck(); }
+
+// Allow small variation due to floating point operations.
+static const double kSsim_thresh = 0.001;
+// Allow some additional errors accumulated in floating point operations.
+static const double kFSsim_thresh = 0.03;
+// Allow some extra variation due to rounding error accumulated in dct.
+static const double kPhvs_thresh = 0.3;
+
+INSTANTIATE_TEST_CASE_P(
+    AOMSSIM, HBDMetricsTest,
+    ::testing::Values(MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
+                                       8, 10, kSsim_thresh),
+                      MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
+                                       10, 10, kPhvs_thresh),
+                      MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
+                                       8, 12, kSsim_thresh),
+                      MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
+                                       12, 12, kPhvs_thresh)));
+INSTANTIATE_TEST_CASE_P(
+    FASTSSIM, HBDMetricsTest,
+    ::testing::Values(MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
+                                       8, 10, kFSsim_thresh),
+                      MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
+                                       10, 10, kFSsim_thresh),
+                      MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
+                                       8, 12, kFSsim_thresh),
+                      MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
+                                       12, 12, kFSsim_thresh)));
+INSTANTIATE_TEST_CASE_P(
+    PSNRHVS, HBDMetricsTest,
+    ::testing::Values(MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
+                                       8, 10, kPhvs_thresh),
+                      MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
+                                       10, 10, kPhvs_thresh),
+                      MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
+                                       8, 12, kPhvs_thresh),
+                      MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
+                                       12, 12, kPhvs_thresh)));
+INSTANTIATE_TEST_CASE_P(
+    PSNR, HBDMetricsTest,
+    ::testing::Values(
+        MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 10, kPhvs_thresh),
+        MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 10, 10,
+                         kPhvs_thresh),
+        MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 12, kPhvs_thresh),
+        MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 12, 12,
+                         kPhvs_thresh)));
+}  // namespace
diff --git a/third_party/aom/test/i420_video_source.h b/third_party/aom/test/i420_video_source.h
new file mode 100644
index 000000000..0825296d7
--- /dev/null
+++ b/third_party/aom/test/i420_video_source.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_I420_VIDEO_SOURCE_H_
+#define TEST_I420_VIDEO_SOURCE_H_
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "test/yuv_video_source.h"
+
+namespace libaom_test {
+
+// This class extends VideoSource to allow parsing of raw yv12
+// so that we can do actual file encodes.
+class I420VideoSource : public YUVVideoSource {
+ public:
+  I420VideoSource(const std::string &file_name, unsigned int width,
+                  unsigned int height, int rate_numerator, int rate_denominator,
+                  unsigned int start, int limit)
+      : YUVVideoSource(file_name, AOM_IMG_FMT_I420, width, height,
+                       rate_numerator, rate_denominator, start, limit) {}
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_I420_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/idct8x8_test.cc b/third_party/aom/test/idct8x8_test.cc
new file mode 100644
index 000000000..f99a4075f
--- /dev/null
+++ b/third_party/aom/test/idct8x8_test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/msvc.h"  // for round()
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+void reference_dct_1d(double input[8], double output[8]) {
+  const double kPi = 3.141592653589793238462643383279502884;
+  const double kInvSqrt2 = 0.707106781186547524400844362104;
+  for (int k = 0; k < 8; k++) {
+    output[k] = 0.0;
+    for (int n = 0; n < 8; n++)
+      output[k] += input[n] * cos(kPi * (2 * n + 1) * k / 16.0);
+    if (k == 0) output[k] = output[k] * kInvSqrt2;
+  }
+}
+
+void reference_dct_2d(int16_t input[64], double output[64]) {
+  // First transform columns
+  for (int i = 0; i < 8; ++i) {
+    double temp_in[8], temp_out[8];
+    for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
+    reference_dct_1d(temp_in, temp_out);
+    for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
+  }
+  // Then transform rows
+  for (int i = 0; i < 8; ++i) {
+    double temp_in[8], temp_out[8];
+    for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
+    reference_dct_1d(temp_in, temp_out);
+    for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j];
+  }
+  // Scale by some magic number
+  for (int i = 0; i < 64; ++i) output[i] *= 2;
+}
+
+TEST(AV1Idct8x8Test, AccuracyCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 10000;
+  for (int i = 0; i < count_test_block; ++i) {
+    int16_t input[64];
+    tran_low_t coeff[64];
+    double output_r[64];
+    uint8_t dst[64], src[64];
+
+    for (int j = 0; j < 64; ++j) {
+      src[j] = rnd.Rand8();
+      dst[j] = rnd.Rand8();
+    }
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 64; ++j) input[j] = src[j] - dst[j];
+
+    reference_dct_2d(input, output_r);
+    for (int j = 0; j < 64; ++j)
+      coeff[j] = static_cast<tran_low_t>(round(output_r[j]));
+    aom_idct8x8_64_add_c(coeff, dst, 8);
+    for (int j = 0; j < 64; ++j) {
+      const int diff = dst[j] - src[j];
+      const int error = diff * diff;
+      EXPECT_GE(1, error) << "Error: 8x8 FDCT/IDCT has error " << error
+                          << " at index " << j;
+    }
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/idct_test.cc b/third_party/aom/test/idct_test.cc
new file mode 100644
index 000000000..a880a9182
--- /dev/null
+++ b/third_party/aom/test/idct_test.cc
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "./aom_config.h"
+#include "./aom_rtcd.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "aom/aom_integer.h"
+
+typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr,
+                         int pred_stride, unsigned char *dst_ptr,
+                         int dst_stride);
+namespace {
+class IDCTTest : public ::testing::TestWithParam<IdctFunc> {
+ protected:
+  virtual void SetUp() {
+    int i;
+
+    UUT = GetParam();
+    memset(input, 0, sizeof(input));
+    /* Set up guard blocks */
+    for (i = 0; i < 256; i++) output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+  IdctFunc UUT;
+  int16_t input[16];
+  unsigned char output[256];
+  unsigned char predict[256];
+};
+
+TEST_P(IDCTTest, TestGuardBlocks) {
+  int i;
+
+  for (i = 0; i < 256; i++)
+    if ((i & 0xF) < 4 && i < 64)
+      EXPECT_EQ(0, output[i]) << i;
+    else
+      EXPECT_EQ(255, output[i]);
+}
+
+TEST_P(IDCTTest, TestAllZeros) {
+  int i;
+
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+  for (i = 0; i < 256; i++)
+    if ((i & 0xF) < 4 && i < 64)
+      EXPECT_EQ(0, output[i]) << "i==" << i;
+    else
+      EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestAllOnes) {
+  int i;
+
+  input[0] = 4;
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+  for (i = 0; i < 256; i++)
+    if ((i & 0xF) < 4 && i < 64)
+      EXPECT_EQ(1, output[i]) << "i==" << i;
+    else
+      EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestAddOne) {
+  int i;
+
+  for (i = 0; i < 256; i++) predict[i] = i;
+  input[0] = 4;
+  ASM_REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
+
+  for (i = 0; i < 256; i++)
+    if ((i & 0xF) < 4 && i < 64)
+      EXPECT_EQ(i + 1, output[i]) << "i==" << i;
+    else
+      EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestWithData) {
+  int i;
+
+  for (i = 0; i < 16; i++) input[i] = i;
+
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+  for (i = 0; i < 256; i++)
+    if ((i & 0xF) > 3 || i > 63)
+      EXPECT_EQ(255, output[i]) << "i==" << i;
+    else if (i == 0)
+      EXPECT_EQ(11, output[i]) << "i==" << i;
+    else if (i == 34)
+      EXPECT_EQ(1, output[i]) << "i==" << i;
+    else if (i == 2 || i == 17 || i == 32)
+      EXPECT_EQ(3, output[i]) << "i==" << i;
+    else
+      EXPECT_EQ(0, output[i]) << "i==" << i;
+}
+
+INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(aom_short_idct4x4llm_c));
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
+                        ::testing::Values(aom_short_idct4x4llm_mmx));
+#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
+                        ::testing::Values(aom_short_idct4x4llm_msa));
+#endif
+}
diff --git a/third_party/aom/test/intrabc_test.cc b/third_party/aom/test/intrabc_test.cc
new file mode 100644
index 000000000..84cfa5c48
--- /dev/null
+++ b/third_party/aom/test/intrabc_test.cc
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "av1/common/enums.h"
+#include "av1/common/mv.h"
+#include "av1/common/mvref_common.h"
+#include "av1/common/tile_common.h"
+
+namespace {
+TEST(IntrabcTest, DvValidation) {
+  struct DvTestCase {
+    MV dv;
+    int mi_row_offset;
+    int mi_col_offset;
+    BLOCK_SIZE bsize;
+    bool valid;
+  };
+  const int kSubPelScale = 8;
+  const int kTileMaxMibWidth = 8;
+  const DvTestCase kDvCases[] = {
+#if CONFIG_EXT_PARTITION
+    { { 0, 0 }, 0, 0, BLOCK_128X128, false },
+#endif
+    { { 0, 0 }, 0, 0, BLOCK_64X64, false },
+    { { 0, 0 }, 0, 0, BLOCK_32X32, false },
+    { { 0, 0 }, 0, 0, BLOCK_16X16, false },
+    { { 0, 0 }, 0, 0, BLOCK_8X8, false },
+    { { 0, 0 }, 0, 0, BLOCK_4X4, false },
+    { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      true },
+    { { 0, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      true },
+    { { -MAX_SB_SIZE * kSubPelScale, 0 },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      true },
+    { { MAX_SB_SIZE * kSubPelScale, 0 },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      false },
+    { { 0, MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_16X16,
+      false },
+    { { -32 * kSubPelScale, -32 * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      true },
+    { { -32 * kSubPelScale, -32 * kSubPelScale },
+      32 / MI_SIZE,
+      32 / MI_SIZE,
+      BLOCK_32X32,
+      false },
+    { { -32 * kSubPelScale - kSubPelScale / 2, -32 * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      false },
+    { { -33 * kSubPelScale, -32 * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      true },
+    { { -32 * kSubPelScale, -32 * kSubPelScale - kSubPelScale / 2 },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      false },
+    { { -32 * kSubPelScale, -33 * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_32X32,
+      true },
+    { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      true },
+    { { -(MAX_SB_SIZE + 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE + 1) * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -(MAX_SB_SIZE - 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      true },
+    { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      true },
+    { { -(MAX_SB_SIZE - 1) * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+    { { -MAX_SB_SIZE * kSubPelScale, MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      true },
+    { { -MAX_SB_SIZE * kSubPelScale,
+        (kTileMaxMibWidth - 2) * MAX_SB_SIZE * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      true },
+    { { -MAX_SB_SIZE * kSubPelScale,
+        ((kTileMaxMibWidth - 2) * MAX_SB_SIZE + 1) * kSubPelScale },
+      MAX_SB_SIZE / MI_SIZE,
+      MAX_SB_SIZE / MI_SIZE,
+      BLOCK_LARGEST,
+      false },
+  };
+  TileInfo tile;
+  tile.mi_row_start = 8 * MAX_MIB_SIZE;
+  tile.mi_row_end = 16 * MAX_MIB_SIZE;
+  tile.mi_col_start = 24 * MAX_MIB_SIZE;
+  tile.mi_col_end = tile.mi_col_start + kTileMaxMibWidth * MAX_MIB_SIZE;
+  for (int i = 0; i < static_cast<int>(GTEST_ARRAY_SIZE_(kDvCases)); ++i) {
+    EXPECT_EQ(kDvCases[i].valid,
+              is_dv_valid(kDvCases[i].dv, &tile,
+                          tile.mi_row_start + kDvCases[i].mi_row_offset,
+                          tile.mi_col_start + kDvCases[i].mi_col_offset,
+                          kDvCases[i].bsize))
+        << "DvCases[" << i << "]";
+  }
+}
+}  // namespace
diff --git a/third_party/aom/test/intrapred_test.cc b/third_party/aom/test/intrapred_test.cc
new file mode 100644
index 000000000..4efed57b6
--- /dev/null
+++ b/third_party/aom/test/intrapred_test.cc
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/blockd.h"
+#include "av1/common/pred_common.h"
+#include "aom_mem/aom_mem.h"
+
+namespace {
+
+using libaom_test::ACMRandom;
+
+const int count_test_block = 100000;
+
+typedef void (*IntraPred)(uint16_t *dst, ptrdiff_t stride,
+                          const uint16_t *above, const uint16_t *left, int bps);
+
+struct IntraPredFunc {
+  IntraPredFunc(IntraPred pred = NULL, IntraPred ref = NULL,
+                int block_size_value = 0, int bit_depth_value = 0)
+      : pred_fn(pred), ref_fn(ref), block_size(block_size_value),
+        bit_depth(bit_depth_value) {}
+
+  IntraPred pred_fn;
+  IntraPred ref_fn;
+  int block_size;
+  int bit_depth;
+};
+
+class AV1IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> {
+ public:
+  void RunTest(uint16_t *left_col, uint16_t *above_data, uint16_t *dst,
+               uint16_t *ref_dst) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int block_size = params_.block_size;
+    above_row_ = above_data + 16;
+    left_col_ = left_col;
+    dst_ = dst;
+    ref_dst_ = ref_dst;
+    int error_count = 0;
+    for (int i = 0; i < count_test_block; ++i) {
+      // Fill edges with random data, try first with saturated values.
+      for (int x = -1; x <= block_size * 2; x++) {
+        if (i == 0) {
+          above_row_[x] = mask_;
+        } else {
+          above_row_[x] = rnd.Rand16() & mask_;
+        }
+      }
+      for (int y = 0; y < block_size; y++) {
+        if (i == 0) {
+          left_col_[y] = mask_;
+        } else {
+          left_col_[y] = rnd.Rand16() & mask_;
+        }
+      }
+      Predict();
+      CheckPrediction(i, &error_count);
+    }
+    ASSERT_EQ(0, error_count);
+  }
+
+ protected:
+  virtual void SetUp() {
+    params_ = GetParam();
+    stride_ = params_.block_size * 3;
+    mask_ = (1 << params_.bit_depth) - 1;
+  }
+
+  void Predict() {
+    const int bit_depth = params_.bit_depth;
+    params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
+    ASM_REGISTER_STATE_CHECK(
+        params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
+  }
+
+  void CheckPrediction(int test_case_number, int *error_count) const {
+    // For each pixel ensure that the calculated value is the same as reference.
+    const int block_size = params_.block_size;
+    for (int y = 0; y < block_size; y++) {
+      for (int x = 0; x < block_size; x++) {
+        *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_];
+        if (*error_count == 1) {
+          ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_])
+              << " Failed on Test Case Number " << test_case_number;
+        }
+      }
+    }
+  }
+
+  uint16_t *above_row_;
+  uint16_t *left_col_;
+  uint16_t *dst_;
+  uint16_t *ref_dst_;
+  ptrdiff_t stride_;
+  int mask_;
+
+  IntraPredFunc params_;
+};
+
+TEST_P(AV1IntraPredTest, IntraPredTests) {
+  // max block size is 32
+  DECLARE_ALIGNED(16, uint16_t, left_col[2 * 32]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[2 * 32 + 32]);
+  DECLARE_ALIGNED(16, uint16_t, dst[3 * 32 * 32]);
+  DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 32 * 32]);
+  RunTest(left_col, above_data, dst, ref_dst);
+}
+
+#if HAVE_SSE2
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2_TO_C_8, AV1IntraPredTest,
+    ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
+                                    &aom_highbd_dc_predictor_32x32_c, 32, 8),
+#if !CONFIG_ALT_INTRA
+                      IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
+                                    &aom_highbd_tm_predictor_16x16_c, 16, 8),
+                      IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
+                                    &aom_highbd_tm_predictor_32x32_c, 32, 8),
+#endif  // !CONFIG_ALT_INTRA
+
+                      IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
+                                    &aom_highbd_dc_predictor_4x4_c, 4, 8),
+                      IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
+                                    &aom_highbd_dc_predictor_8x8_c, 8, 8),
+                      IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
+                                    &aom_highbd_dc_predictor_16x16_c, 16, 8),
+                      IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2,
+                                    &aom_highbd_v_predictor_4x4_c, 4, 8),
+                      IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2,
+                                    &aom_highbd_v_predictor_8x8_c, 8, 8),
+                      IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
+                                    &aom_highbd_v_predictor_16x16_c, 16, 8),
+                      IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
+                                    &aom_highbd_v_predictor_32x32_c, 32, 8)
+#if !CONFIG_ALT_INTRA
+                          ,
+                      IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
+                                    &aom_highbd_tm_predictor_4x4_c, 4, 8),
+                      IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
+                                    &aom_highbd_tm_predictor_8x8_c, 8, 8)
+#endif  // !CONFIG_ALT_INTRA
+                          ));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2_TO_C_10, AV1IntraPredTest,
+    ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
+                                    &aom_highbd_dc_predictor_32x32_c, 32, 10),
+#if !CONFIG_ALT_INTRA
+                      IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
+                                    &aom_highbd_tm_predictor_16x16_c, 16, 10),
+                      IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
+                                    &aom_highbd_tm_predictor_32x32_c, 32, 10),
+#endif  // !CONFIG_ALT_INTRA
+                      IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
+                                    &aom_highbd_dc_predictor_4x4_c, 4, 10),
+                      IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
+                                    &aom_highbd_dc_predictor_8x8_c, 8, 10),
+                      IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
+                                    &aom_highbd_dc_predictor_16x16_c, 16, 10),
+                      IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2,
+                                    &aom_highbd_v_predictor_4x4_c, 4, 10),
+                      IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2,
+                                    &aom_highbd_v_predictor_8x8_c, 8, 10),
+                      IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
+                                    &aom_highbd_v_predictor_16x16_c, 16, 10),
+                      IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
+                                    &aom_highbd_v_predictor_32x32_c, 32, 10)
+#if !CONFIG_ALT_INTRA
+                          ,
+                      IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
+                                    &aom_highbd_tm_predictor_4x4_c, 4, 10),
+                      IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
+                                    &aom_highbd_tm_predictor_8x8_c, 8, 10)
+#endif  // !CONFIG_ALT_INTRA
+                          ));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2_TO_C_12, AV1IntraPredTest,
+    ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
+                                    &aom_highbd_dc_predictor_32x32_c, 32, 12),
+#if !CONFIG_ALT_INTRA
+                      IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
+                                    &aom_highbd_tm_predictor_16x16_c, 16, 12),
+                      IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
+                                    &aom_highbd_tm_predictor_32x32_c, 32, 12),
+#endif  // !CONFIG_ALT_INTRA
+                      IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
+                                    &aom_highbd_dc_predictor_4x4_c, 4, 12),
+                      IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
+                                    &aom_highbd_dc_predictor_8x8_c, 8, 12),
+                      IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
+                                    &aom_highbd_dc_predictor_16x16_c, 16, 12),
+                      IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2,
+                                    &aom_highbd_v_predictor_4x4_c, 4, 12),
+                      IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2,
+                                    &aom_highbd_v_predictor_8x8_c, 8, 12),
+                      IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
+                                    &aom_highbd_v_predictor_16x16_c, 16, 12),
+                      IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
+                                    &aom_highbd_v_predictor_32x32_c, 32, 12)
+#if !CONFIG_ALT_INTRA
+                          ,
+                      IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
+                                    &aom_highbd_tm_predictor_4x4_c, 4, 12),
+                      IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
+                                    &aom_highbd_tm_predictor_8x8_c, 8, 12)
+#endif  // !CONFIG_ALT_INTRA
+                          ));
+
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE2
+}  // namespace
diff --git a/third_party/aom/test/ivf_video_source.h b/third_party/aom/test/ivf_video_source.h
new file mode 100644
index 000000000..0d3e9f9cb
--- /dev/null
+++ b/third_party/aom/test/ivf_video_source.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_IVF_VIDEO_SOURCE_H_
+#define TEST_IVF_VIDEO_SOURCE_H_
+#include <cstdio>
+#include <cstdlib>
+#include <new>
+#include <string>
+#include "test/video_source.h"
+
+namespace libaom_test {
+const unsigned int kCodeBufferSize = 256 * 1024;
+const unsigned int kIvfFileHdrSize = 32;
+const unsigned int kIvfFrameHdrSize = 12;
+
+static unsigned int MemGetLe32(const uint8_t *mem) {
+  return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]);
+}
+
+// This class extends VideoSource to allow parsing of ivf files,
+// so that we can do actual file decodes.
+class IVFVideoSource : public CompressedVideoSource {
+ public:
+  explicit IVFVideoSource(const std::string &file_name)
+      : file_name_(file_name), input_file_(NULL), compressed_frame_buf_(NULL),
+        frame_sz_(0), frame_(0), end_of_file_(false) {}
+
+  virtual ~IVFVideoSource() {
+    delete[] compressed_frame_buf_;
+
+    if (input_file_) fclose(input_file_);
+  }
+
+  virtual void Init() {
+    // Allocate a buffer for read in the compressed video frame.
+    compressed_frame_buf_ = new uint8_t[libaom_test::kCodeBufferSize];
+    ASSERT_TRUE(compressed_frame_buf_ != NULL)
+        << "Allocate frame buffer failed";
+  }
+
+  virtual void Begin() {
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+                                     << file_name_;
+
+    // Read file header
+    uint8_t file_hdr[kIvfFileHdrSize];
+    ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_))
+        << "File header read failed.";
+    // Check file header
+    ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' &&
+                file_hdr[2] == 'I' && file_hdr[3] == 'F')
+        << "Input is not an IVF file.";
+
+    FillFrame();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  void FillFrame() {
+    ASSERT_TRUE(input_file_ != NULL);
+    uint8_t frame_hdr[kIvfFrameHdrSize];
+    // Check frame header and read a frame from input_file.
+    if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) !=
+        kIvfFrameHdrSize) {
+      end_of_file_ = true;
+    } else {
+      end_of_file_ = false;
+
+      frame_sz_ = MemGetLe32(frame_hdr);
+      ASSERT_LE(frame_sz_, kCodeBufferSize)
+          << "Frame is too big for allocated code buffer";
+      ASSERT_EQ(frame_sz_,
+                fread(compressed_frame_buf_, 1, frame_sz_, input_file_))
+          << "Failed to read complete frame";
+    }
+  }
+
+  virtual const uint8_t *cxdata() const {
+    return end_of_file_ ? NULL : compressed_frame_buf_;
+  }
+  virtual size_t frame_size() const { return frame_sz_; }
+  virtual unsigned int frame_number() const { return frame_; }
+
+ protected:
+  std::string file_name_;
+  FILE *input_file_;
+  uint8_t *compressed_frame_buf_;
+  size_t frame_sz_;
+  unsigned int frame_;
+  bool end_of_file_;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_IVF_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/level_test.cc b/third_party/aom/test/level_test.cc
new file mode 100644
index 000000000..1049d4901
--- /dev/null
+++ b/third_party/aom/test/level_test.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+class LevelTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  LevelTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), min_gf_internal_(24), target_level_(0),
+        level_(0) {}
+  virtual ~LevelTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    if (encoding_mode_ != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 25;
+      cfg_.rc_end_usage = AOM_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = AOM_CBR;
+    }
+    cfg_.rc_2pass_vbr_minsection_pct = 5;
+    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+    cfg_.rc_target_bitrate = 400;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AV1E_SET_TARGET_LEVEL, target_level_);
+      encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_gf_internal_);
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+    encoder->Control(AV1E_GET_LEVEL, &level_);
+    ASSERT_LE(level_, 51);
+    ASSERT_GE(level_, 0);
+  }
+
+  ::libaom_test::TestMode encoding_mode_;
+  int cpu_used_;
+  int min_gf_internal_;
+  int target_level_;
+  int level_;
+};
+
+// Test for keeping level stats only
+TEST_P(LevelTest, TestTargetLevel0) {
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       40);
+  target_level_ = 0;
+  min_gf_internal_ = 4;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(11, level_);
+
+  cfg_.rc_target_bitrate = 1600;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(20, level_);
+}
+
+// Test for level control being turned off
+TEST_P(LevelTest, TestTargetLevel255) {
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       30);
+  target_level_ = 255;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+TEST_P(LevelTest, TestTargetLevelApi) {
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1);
+  static const aom_codec_iface_t *codec = &aom_codec_av1_cx_algo;
+  aom_codec_ctx_t enc;
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(codec, &cfg, 0));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, codec, &cfg, 0));
+  for (int level = 0; level <= 256; ++level) {
+    if (level == 10 || level == 11 || level == 20 || level == 21 ||
+        level == 30 || level == 31 || level == 40 || level == 41 ||
+        level == 50 || level == 51 || level == 52 || level == 60 ||
+        level == 61 || level == 62 || level == 0 || level == 255)
+      EXPECT_EQ(AOM_CODEC_OK,
+                aom_codec_control(&enc, AV1E_SET_TARGET_LEVEL, level));
+    else
+      EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
+                aom_codec_control(&enc, AV1E_SET_TARGET_LEVEL, level));
+  }
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
+}
+
+AV1_INSTANTIATE_TEST_CASE(LevelTest,
+                          ::testing::Values(::libaom_test::kTwoPassGood,
+                                            ::libaom_test::kOnePassGood),
+                          ::testing::Range(0, 9));
+}  // namespace
diff --git a/third_party/aom/test/lossless_test.cc b/third_party/aom/test/lossless_test.cc
new file mode 100644
index 000000000..5c5b32d93
--- /dev/null
+++ b/third_party/aom/test/lossless_test.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+const int kMaxPsnr = 100;
+
+class LosslessTestLarge
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> {
+ protected:
+  LosslessTestLarge()
+      : EncoderTest(GET_PARAM(0)), psnr_(kMaxPsnr), nframes_(0),
+        encoding_mode_(GET_PARAM(1)) {}
+
+  virtual ~LosslessTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      // Only call Control if quantizer > 0 to verify that using quantizer
+      // alone will activate lossless
+      if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) {
+        encoder->Control(AV1E_SET_LOSSLESS, 1);
+      }
+    }
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    psnr_ = kMaxPsnr;
+    nframes_ = 0;
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (pkt->data.psnr.psnr[0] < psnr_) psnr_ = pkt->data.psnr.psnr[0];
+  }
+
+  double GetMinPsnr() const { return psnr_; }
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  libaom_test::TestMode encoding_mode_;
+};
+
+TEST_P(LosslessTestLarge, TestLossLessEncoding) {
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.g_lag_in_frames = 25;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 0;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  // intentionally changed the dimension for better testing coverage
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 5);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_lossless = GetMinPsnr();
+  EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
+TEST_P(LosslessTestLarge, TestLossLessEncoding444) {
+  libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 5);
+
+  cfg_.g_profile = 1;
+  cfg_.g_timebase = video.timebase();
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.g_lag_in_frames = 25;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 0;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_lossless = GetMinPsnr();
+  EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
+TEST_P(LosslessTestLarge, TestLossLessEncodingCtrl) {
+  const aom_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.g_lag_in_frames = 25;
+  // Intentionally set Q > 0, to make sure control can be used to activate
+  // lossless
+  cfg_.rc_min_quantizer = 10;
+  cfg_.rc_max_quantizer = 20;
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 5);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  const double psnr_lossless = GetMinPsnr();
+  EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
+AV1_INSTANTIATE_TEST_CASE(LosslessTestLarge,
+                          ::testing::Values(::libaom_test::kOnePassGood,
+                                            ::libaom_test::kTwoPassGood));
+}  // namespace
diff --git a/third_party/aom/test/lpf_8_test.cc b/third_party/aom/test/lpf_8_test.cc
new file mode 100644
index 000000000..cee0d3b81
--- /dev/null
+++ b/third_party/aom/test/lpf_8_test.cc
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/av1_loopfilter.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_integer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+// Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
+//                                         16 Coefs within filtered section
+//                                         8  Coeffs following filtered section
+const int kNumCoeffs = 1024;
+
+const int number_of_iterations = 10000;
+
+#if CONFIG_HIGHBITDEPTH
+typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
+                          const uint8_t *limit, const uint8_t *thresh, int bd);
+typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
+                               const uint8_t *limit0, const uint8_t *thresh0,
+                               const uint8_t *blimit1, const uint8_t *limit1,
+                               const uint8_t *thresh1, int bd);
+#else
+typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
+                          const uint8_t *limit, const uint8_t *thresh);
+typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
+                               const uint8_t *limit0, const uint8_t *thresh0,
+                               const uint8_t *blimit1, const uint8_t *limit1,
+                               const uint8_t *thresh1);
+#endif  // CONFIG_HIGHBITDEPTH
+
+typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
+typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
+
+class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
+ public:
+  virtual ~Loop8Test6Param() {}
+  virtual void SetUp() {
+    loopfilter_op_ = GET_PARAM(0);
+    ref_loopfilter_op_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int bit_depth_;
+  int mask_;
+  loop_op_t loopfilter_op_;
+  loop_op_t ref_loopfilter_op_;
+};
+
+class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
+ public:
+  virtual ~Loop8Test9Param() {}
+  virtual void SetUp() {
+    loopfilter_op_ = GET_PARAM(0);
+    ref_loopfilter_op_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int bit_depth_;
+  int mask_;
+  dual_loop_op_t loopfilter_op_;
+  dual_loop_op_t ref_loopfilter_op_;
+};
+
+TEST_P(Loop8Test6Param, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = number_of_iterations;
+#if CONFIG_HIGHBITDEPTH
+  int32_t bd = bit_depth_;
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
+#else
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
+#endif  // CONFIG_HIGHBITDEPTH
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < count_test_block; ++i) {
+    int err_count = 0;
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    int32_t p = kNumCoeffs / 32;
+
+    uint16_t tmp_s[kNumCoeffs];
+    int j = 0;
+    while (j < kNumCoeffs) {
+      uint8_t val = rnd.Rand8();
+      if (val & 0x80) {  // 50% chance to choose a new value.
+        tmp_s[j] = rnd.Rand16();
+        j++;
+      } else {  // 50% chance to repeat previous value in row X times
+        int k = 0;
+        while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
+          if (j < 1) {
+            tmp_s[j] = rnd.Rand16();
+          } else if (val & 0x20) {  // Increment by an value within the limit
+            tmp_s[j] = (tmp_s[j - 1] + (*limit - 1));
+          } else {  // Decrement by an value within the limit
+            tmp_s[j] = (tmp_s[j - 1] - (*limit - 1));
+          }
+          j++;
+        }
+      }
+    }
+    for (j = 0; j < kNumCoeffs; j++) {
+      if (i % 2) {
+        s[j] = tmp_s[j] & mask_;
+      } else {
+        s[j] = tmp_s[p * (j % p) + j / p] & mask_;
+      }
+      ref_s[j] = s[j];
+    }
+#if CONFIG_HIGHBITDEPTH
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
+    ASM_REGISTER_STATE_CHECK(
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
+#else
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
+    ASM_REGISTER_STATE_CHECK(
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
+#endif  // CONFIG_HIGHBITDEPTH
+
+    for (j = 0; j < kNumCoeffs; ++j) {
+      err_count += ref_s[j] != s[j];
+    }
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Loop8Test6Param, C output doesn't match SSE2 "
+         "loopfilter output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(Loop8Test6Param, ValueCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = number_of_iterations;
+#if CONFIG_HIGHBITDEPTH
+  const int32_t bd = bit_depth_;
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
+#else
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
+#endif  // CONFIG_HIGHBITDEPTH
+  int err_count_total = 0;
+  int first_failure = -1;
+
+  // NOTE: The code in av1_loopfilter.c:update_sharpness computes mblim as a
+  // function of sharpness_lvl and the loopfilter lvl as:
+  // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
+  // ...
+  // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
+  //        SIMD_WIDTH);
+  // This means that the largest value for mblim will occur when sharpness_lvl
+  // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
+  // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
+  // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
+  // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
+
+  for (int i = 0; i < count_test_block; ++i) {
+    int err_count = 0;
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    int32_t p = kNumCoeffs / 32;
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      s[j] = rnd.Rand16() & mask_;
+      ref_s[j] = s[j];
+    }
+#if CONFIG_HIGHBITDEPTH
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
+    ASM_REGISTER_STATE_CHECK(
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
+#else
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
+    ASM_REGISTER_STATE_CHECK(
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
+#endif  // CONFIG_HIGHBITDEPTH
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      err_count += ref_s[j] != s[j];
+    }
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Loop8Test6Param, C output doesn't match SSE2 "
+         "loopfilter output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(Loop8Test9Param, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = number_of_iterations;
+#if CONFIG_HIGHBITDEPTH
+  const int32_t bd = bit_depth_;
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
+#else
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
+#endif  // CONFIG_HIGHBITDEPTH
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < count_test_block; ++i) {
+    int err_count = 0;
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    int32_t p = kNumCoeffs / 32;
+    uint16_t tmp_s[kNumCoeffs];
+    int j = 0;
+    const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;
+    while (j < kNumCoeffs) {
+      uint8_t val = rnd.Rand8();
+      if (val & 0x80) {  // 50% chance to choose a new value.
+        tmp_s[j] = rnd.Rand16();
+        j++;
+      } else {  // 50% chance to repeat previous value in row X times.
+        int k = 0;
+        while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
+          if (j < 1) {
+            tmp_s[j] = rnd.Rand16();
+          } else if (val & 0x20) {  // Increment by a value within the limit.
+            tmp_s[j] = (tmp_s[j - 1] + (limit - 1));
+          } else {  // Decrement by an value within the limit.
+            tmp_s[j] = (tmp_s[j - 1] - (limit - 1));
+          }
+          j++;
+        }
+      }
+    }
+    for (j = 0; j < kNumCoeffs; j++) {
+      if (i % 2) {
+        s[j] = tmp_s[j] & mask_;
+      } else {
+        s[j] = tmp_s[p * (j % p) + j / p] & mask_;
+      }
+      ref_s[j] = s[j];
+    }
+#if CONFIG_HIGHBITDEPTH
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
+                       limit1, thresh1, bd);
+    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
+                                            thresh0, blimit1, limit1, thresh1,
+                                            bd));
+#else
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
+                       limit1, thresh1);
+    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
+                                            thresh0, blimit1, limit1, thresh1));
+#endif  // CONFIG_HIGHBITDEPTH
+    for (j = 0; j < kNumCoeffs; ++j) {
+      err_count += ref_s[j] != s[j];
+    }
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Loop8Test9Param, C output doesn't match SSE2 "
+         "loopfilter output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(Loop8Test9Param, ValueCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = number_of_iterations;
+#if CONFIG_HIGHBITDEPTH
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
+#else
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
+#endif  // CONFIG_HIGHBITDEPTH
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < count_test_block; ++i) {
+    int err_count = 0;
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      s[j] = rnd.Rand16() & mask_;
+      ref_s[j] = s[j];
+    }
+#if CONFIG_HIGHBITDEPTH
+    const int32_t bd = bit_depth_;
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
+                       limit1, thresh1, bd);
+    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
+                                            thresh0, blimit1, limit1, thresh1,
+                                            bd));
+#else
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
+                       limit1, thresh1);
+    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
+                                            thresh0, blimit1, limit1, thresh1));
+#endif  // CONFIG_HIGHBITDEPTH
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      err_count += ref_s[j] != s[j];
+    }
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Loop8Test9Param, C output doesn't match SSE2"
+         "loopfilter output. "
+      << "First failed at test case " << first_failure;
+}
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Loop8Test6Param,
+    ::testing::Values(make_tuple(&aom_highbd_lpf_horizontal_4_sse2,
+                                 &aom_highbd_lpf_horizontal_4_c, 8),
+                      make_tuple(&aom_highbd_lpf_vertical_4_sse2,
+                                 &aom_highbd_lpf_vertical_4_c, 8),
+                      make_tuple(&aom_highbd_lpf_horizontal_8_sse2,
+                                 &aom_highbd_lpf_horizontal_8_c, 8),
+                      make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
+                                 &aom_highbd_lpf_horizontal_edge_8_c, 8),
+                      make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
+                                 &aom_highbd_lpf_horizontal_edge_16_c, 8),
+                      make_tuple(&aom_highbd_lpf_vertical_8_sse2,
+                                 &aom_highbd_lpf_vertical_8_c, 8),
+                      make_tuple(&aom_highbd_lpf_vertical_16_sse2,
+                                 &aom_highbd_lpf_vertical_16_c, 8),
+                      make_tuple(&aom_highbd_lpf_horizontal_4_sse2,
+                                 &aom_highbd_lpf_horizontal_4_c, 10),
+                      make_tuple(&aom_highbd_lpf_vertical_4_sse2,
+                                 &aom_highbd_lpf_vertical_4_c, 10),
+                      make_tuple(&aom_highbd_lpf_horizontal_8_sse2,
+                                 &aom_highbd_lpf_horizontal_8_c, 10),
+                      make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
+                                 &aom_highbd_lpf_horizontal_edge_8_c, 10),
+                      make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
+                                 &aom_highbd_lpf_horizontal_edge_16_c, 10),
+                      make_tuple(&aom_highbd_lpf_vertical_8_sse2,
+                                 &aom_highbd_lpf_vertical_8_c, 10),
+                      make_tuple(&aom_highbd_lpf_vertical_16_sse2,
+                                 &aom_highbd_lpf_vertical_16_c, 10),
+                      make_tuple(&aom_highbd_lpf_horizontal_4_sse2,
+                                 &aom_highbd_lpf_horizontal_4_c, 12),
+                      make_tuple(&aom_highbd_lpf_vertical_4_sse2,
+                                 &aom_highbd_lpf_vertical_4_c, 12),
+                      make_tuple(&aom_highbd_lpf_horizontal_8_sse2,
+                                 &aom_highbd_lpf_horizontal_8_c, 12),
+                      make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
+                                 &aom_highbd_lpf_horizontal_edge_8_c, 12),
+                      make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
+                                 &aom_highbd_lpf_horizontal_edge_16_c, 12),
+                      make_tuple(&aom_highbd_lpf_vertical_8_sse2,
+                                 &aom_highbd_lpf_vertical_8_c, 12),
+                      make_tuple(&aom_highbd_lpf_vertical_16_sse2,
+                                 &aom_highbd_lpf_vertical_16_c, 12),
+                      make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
+                                 &aom_highbd_lpf_vertical_16_dual_c, 8),
+                      make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
+                                 &aom_highbd_lpf_vertical_16_dual_c, 10),
+                      make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
+                                 &aom_highbd_lpf_vertical_16_dual_c, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
+        make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
+        make_tuple(&aom_lpf_horizontal_edge_8_sse2,
+                   &aom_lpf_horizontal_edge_8_c, 8),
+        make_tuple(&aom_lpf_horizontal_edge_16_sse2,
+                   &aom_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
+        make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
+        make_tuple(&aom_lpf_vertical_16_sse2, &aom_lpf_vertical_16_c, 8),
+        make_tuple(&aom_lpf_vertical_16_dual_sse2, &aom_lpf_vertical_16_dual_c,
+                   8)));
+#endif  // CONFIG_HIGHBITDEPTH
+#endif
+
+#if HAVE_AVX2 && (!CONFIG_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+    AVX2, Loop8Test6Param,
+    ::testing::Values(make_tuple(&aom_lpf_horizontal_edge_8_avx2,
+                                 &aom_lpf_horizontal_edge_8_c, 8),
+                      make_tuple(&aom_lpf_horizontal_edge_16_avx2,
+                                 &aom_lpf_horizontal_edge_16_c, 8)));
+#endif
+
+#if HAVE_SSE2
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Loop8Test9Param,
+    ::testing::Values(make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+                                 &aom_highbd_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+                                 &aom_highbd_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+                                 &aom_highbd_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+                                 &aom_highbd_lpf_vertical_8_dual_c, 8),
+                      make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+                                 &aom_highbd_lpf_horizontal_4_dual_c, 10),
+                      make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+                                 &aom_highbd_lpf_horizontal_8_dual_c, 10),
+                      make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+                                 &aom_highbd_lpf_vertical_4_dual_c, 10),
+                      make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+                                 &aom_highbd_lpf_vertical_8_dual_c, 10),
+                      make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+                                 &aom_highbd_lpf_horizontal_4_dual_c, 12),
+                      make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+                                 &aom_highbd_lpf_horizontal_8_dual_c, 12),
+                      make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+                                 &aom_highbd_lpf_vertical_4_dual_c, 12),
+                      make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+                                 &aom_highbd_lpf_vertical_8_dual_c, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Loop8Test9Param,
+    ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_sse2,
+                                 &aom_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&aom_lpf_horizontal_8_dual_sse2,
+                                 &aom_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&aom_lpf_vertical_4_dual_sse2,
+                                 &aom_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&aom_lpf_vertical_8_dual_sse2,
+                                 &aom_lpf_vertical_8_dual_c, 8)));
+#endif  // CONFIG_HIGHBITDEPTH
+#endif
+
+#if HAVE_NEON
+#if CONFIG_HIGHBITDEPTH
+// No neon high bitdepth functions.
+#else
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test6Param,
+    ::testing::Values(
+#if HAVE_NEON_ASM
+        // Using #if inside the macro is unsupported on MSVS but the tests are
+        // not
+        // currently built for MSVS with ARM and NEON.
+        make_tuple(&aom_lpf_horizontal_edge_8_neon,
+                   &aom_lpf_horizontal_edge_8_c, 8),
+        make_tuple(&aom_lpf_horizontal_edge_16_neon,
+                   &aom_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&aom_lpf_vertical_16_neon, &aom_lpf_vertical_16_c, 8),
+        make_tuple(&aom_lpf_vertical_16_dual_neon, &aom_lpf_vertical_16_dual_c,
+                   8),
+#endif  // HAVE_NEON_ASM
+        make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8),
+        make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8),
+        make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8),
+        make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8)));
+INSTANTIATE_TEST_CASE_P(NEON, Loop8Test9Param,
+                        ::testing::Values(
+#if HAVE_NEON_ASM
+                            make_tuple(&aom_lpf_horizontal_8_dual_neon,
+                                       &aom_lpf_horizontal_8_dual_c, 8),
+                            make_tuple(&aom_lpf_vertical_8_dual_neon,
+                                       &aom_lpf_vertical_8_dual_c, 8),
+#endif  // HAVE_NEON_ASM
+                            make_tuple(&aom_lpf_horizontal_4_dual_neon,
+                                       &aom_lpf_horizontal_4_dual_c, 8),
+                            make_tuple(&aom_lpf_vertical_4_dual_neon,
+                                       &aom_lpf_vertical_4_dual_c, 8)));
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // HAVE_NEON
+
+#if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    DSPR2, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&aom_lpf_horizontal_4_dspr2, &aom_lpf_horizontal_4_c, 8),
+        make_tuple(&aom_lpf_horizontal_8_dspr2, &aom_lpf_horizontal_8_c, 8),
+        make_tuple(&aom_lpf_horizontal_edge_8, &aom_lpf_horizontal_edge_8, 8),
+        make_tuple(&aom_lpf_horizontal_edge_16, &aom_lpf_horizontal_edge_16, 8),
+        make_tuple(&aom_lpf_vertical_4_dspr2, &aom_lpf_vertical_4_c, 8),
+        make_tuple(&aom_lpf_vertical_8_dspr2, &aom_lpf_vertical_8_c, 8),
+        make_tuple(&aom_lpf_vertical_16_dspr2, &aom_lpf_vertical_16_c, 8),
+        make_tuple(&aom_lpf_vertical_16_dual_dspr2, &aom_lpf_vertical_16_dual_c,
+                   8)));
+
+INSTANTIATE_TEST_CASE_P(
+    DSPR2, Loop8Test9Param,
+    ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_dspr2,
+                                 &aom_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&aom_lpf_horizontal_8_dual_dspr2,
+                                 &aom_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&aom_lpf_vertical_4_dual_dspr2,
+                                 &aom_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&aom_lpf_vertical_8_dual_dspr2,
+                                 &aom_lpf_vertical_8_dual_c, 8)));
+#endif  // HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_MSA && (!CONFIG_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+    MSA, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&aom_lpf_horizontal_4_msa, &aom_lpf_horizontal_4_c, 8),
+        make_tuple(&aom_lpf_horizontal_8_msa, &aom_lpf_horizontal_8_c, 8),
+        make_tuple(&aom_lpf_horizontal_edge_8_msa, &aom_lpf_horizontal_edge_8_c,
+                   8),
+        make_tuple(&aom_lpf_horizontal_edge_16_msa,
+                   &aom_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&aom_lpf_vertical_4_msa, &aom_lpf_vertical_4_c, 8),
+        make_tuple(&aom_lpf_vertical_8_msa, &aom_lpf_vertical_8_c, 8),
+        make_tuple(&aom_lpf_vertical_16_msa, &aom_lpf_vertical_16_c, 8)));
+
+INSTANTIATE_TEST_CASE_P(
+    MSA, Loop8Test9Param,
+    ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_msa,
+                                 &aom_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&aom_lpf_horizontal_8_dual_msa,
+                                 &aom_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&aom_lpf_vertical_4_dual_msa,
+                                 &aom_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&aom_lpf_vertical_8_dual_msa,
+                                 &aom_lpf_vertical_8_dual_c, 8)));
+#endif  // HAVE_MSA && (!CONFIG_HIGHBITDEPTH)
+
+}  // namespace
diff --git a/third_party/aom/test/masked_sad_test.cc b/third_party/aom/test/masked_sad_test.cc
new file mode 100644
index 000000000..53f85eef7
--- /dev/null
+++ b/third_party/aom/test/masked_sad_test.cc
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "aom/aom_integer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+const int number_of_iterations = 500;
+
+typedef unsigned int (*MaskedSADFunc)(const uint8_t *a, int a_stride,
+                                      const uint8_t *b, int b_stride,
+                                      const uint8_t *m, int m_stride);
+typedef std::tr1::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam;
+
+class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
+ public:
+  virtual ~MaskedSADTest() {}
+  virtual void SetUp() {
+    maskedSAD_op_ = GET_PARAM(0);
+    ref_maskedSAD_op_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  MaskedSADFunc maskedSAD_op_;
+  MaskedSADFunc ref_maskedSAD_op_;
+};
+
+TEST_P(MaskedSADTest, OperationCheck) {
+  unsigned int ref_ret, ret;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SB_SIZE;
+  int ref_stride = MAX_SB_SIZE;
+  int msk_stride = MAX_SB_SIZE;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
+      src_ptr[j] = rnd.Rand8();
+      ref_ptr[j] = rnd.Rand8();
+      msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
+      assert(msk_ptr[j] <= 64);
+    }
+
+    ref_ret = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
+                                msk_ptr, msk_stride);
+    ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride, ref_ptr,
+                                                 ref_stride, msk_ptr,
+                                                 msk_stride));
+    if (ret != ref_ret) {
+      err_count++;
+      if (first_failure == -1) first_failure = i;
+    }
+  }
+  EXPECT_EQ(0, err_count)
+      << "Error: Masked SAD Test, C output doesn't match SSSE3 output. "
+      << "First failed at test case " << first_failure;
+}
+
+#if CONFIG_HIGHBITDEPTH
+typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *a, int a_stride,
+                                            const uint8_t *b, int b_stride,
+                                            const uint8_t *m, int m_stride);
+typedef std::tr1::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc>
+    HighbdMaskedSADParam;
+
+class HighbdMaskedSADTest
+    : public ::testing::TestWithParam<HighbdMaskedSADParam> {
+ public:
+  virtual ~HighbdMaskedSADTest() {}
+  virtual void SetUp() {
+    maskedSAD_op_ = GET_PARAM(0);
+    ref_maskedSAD_op_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  HighbdMaskedSADFunc maskedSAD_op_;
+  HighbdMaskedSADFunc ref_maskedSAD_op_;
+};
+
+TEST_P(HighbdMaskedSADTest, OperationCheck) {
+  unsigned int ref_ret, ret;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SB_SIZE;
+  int ref_stride = MAX_SB_SIZE;
+  int msk_stride = MAX_SB_SIZE;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
+      src_ptr[j] = rnd.Rand16() & 0xfff;
+      ref_ptr[j] = rnd.Rand16() & 0xfff;
+      msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
+    }
+
+    ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
+                                msk_ptr, msk_stride);
+    ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride, ref8_ptr,
+                                                 ref_stride, msk_ptr,
+                                                 msk_stride));
+    if (ret != ref_ret) {
+      err_count++;
+      if (first_failure == -1) first_failure = i;
+    }
+  }
+  EXPECT_EQ(0, err_count)
+      << "Error: High BD Masked SAD Test, C output doesn't match SSSE3 output. "
+      << "First failed at test case " << first_failure;
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3_C_COMPARE, MaskedSADTest,
+    ::testing::Values(
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c),
+        make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c),
+        make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c),
+        make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c),
+        make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c),
+        make_tuple(&aom_masked_sad32x32_ssse3, &aom_masked_sad32x32_c),
+        make_tuple(&aom_masked_sad32x16_ssse3, &aom_masked_sad32x16_c),
+        make_tuple(&aom_masked_sad16x32_ssse3, &aom_masked_sad16x32_c),
+        make_tuple(&aom_masked_sad16x16_ssse3, &aom_masked_sad16x16_c),
+        make_tuple(&aom_masked_sad16x8_ssse3, &aom_masked_sad16x8_c),
+        make_tuple(&aom_masked_sad8x16_ssse3, &aom_masked_sad8x16_c),
+        make_tuple(&aom_masked_sad8x8_ssse3, &aom_masked_sad8x8_c),
+        make_tuple(&aom_masked_sad8x4_ssse3, &aom_masked_sad8x4_c),
+        make_tuple(&aom_masked_sad4x8_ssse3, &aom_masked_sad4x8_c),
+        make_tuple(&aom_masked_sad4x4_ssse3, &aom_masked_sad4x4_c)));
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, HighbdMaskedSADTest,
+                        ::testing::Values(
+#if CONFIG_EXT_PARTITION
+                            make_tuple(&aom_highbd_masked_sad128x128_ssse3,
+                                       &aom_highbd_masked_sad128x128_c),
+                            make_tuple(&aom_highbd_masked_sad128x64_ssse3,
+                                       &aom_highbd_masked_sad128x64_c),
+                            make_tuple(&aom_highbd_masked_sad64x128_ssse3,
+                                       &aom_highbd_masked_sad64x128_c),
+#endif  // CONFIG_EXT_PARTITION
+                            make_tuple(&aom_highbd_masked_sad64x64_ssse3,
+                                       &aom_highbd_masked_sad64x64_c),
+                            make_tuple(&aom_highbd_masked_sad64x32_ssse3,
+                                       &aom_highbd_masked_sad64x32_c),
+                            make_tuple(&aom_highbd_masked_sad32x64_ssse3,
+                                       &aom_highbd_masked_sad32x64_c),
+                            make_tuple(&aom_highbd_masked_sad32x32_ssse3,
+                                       &aom_highbd_masked_sad32x32_c),
+                            make_tuple(&aom_highbd_masked_sad32x16_ssse3,
+                                       &aom_highbd_masked_sad32x16_c),
+                            make_tuple(&aom_highbd_masked_sad16x32_ssse3,
+                                       &aom_highbd_masked_sad16x32_c),
+                            make_tuple(&aom_highbd_masked_sad16x16_ssse3,
+                                       &aom_highbd_masked_sad16x16_c),
+                            make_tuple(&aom_highbd_masked_sad16x8_ssse3,
+                                       &aom_highbd_masked_sad16x8_c),
+                            make_tuple(&aom_highbd_masked_sad8x16_ssse3,
+                                       &aom_highbd_masked_sad8x16_c),
+                            make_tuple(&aom_highbd_masked_sad8x8_ssse3,
+                                       &aom_highbd_masked_sad8x8_c),
+                            make_tuple(&aom_highbd_masked_sad8x4_ssse3,
+                                       &aom_highbd_masked_sad8x4_c),
+                            make_tuple(&aom_highbd_masked_sad4x8_ssse3,
+                                       &aom_highbd_masked_sad4x8_c),
+                            make_tuple(&aom_highbd_masked_sad4x4_ssse3,
+                                       &aom_highbd_masked_sad4x4_c)));
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSSE3
+}  // namespace
diff --git a/third_party/aom/test/masked_variance_test.cc b/third_party/aom/test/masked_variance_test.cc
new file mode 100644
index 000000000..65e852aea
--- /dev/null
+++ b/third_party/aom/test/masked_variance_test.cc
@@ -0,0 +1,790 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_mem/aom_mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+const int number_of_iterations = 500;
+
+typedef unsigned int (*MaskedVarianceFunc)(const uint8_t *a, int a_stride,
+                                           const uint8_t *b, int b_stride,
+                                           const uint8_t *m, int m_stride,
+                                           unsigned int *sse);
+
+typedef std::tr1::tuple<MaskedVarianceFunc, MaskedVarianceFunc>
+    MaskedVarianceParam;
+
+class MaskedVarianceTest
+    : public ::testing::TestWithParam<MaskedVarianceParam> {
+ public:
+  virtual ~MaskedVarianceTest() {}
+  virtual void SetUp() {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  MaskedVarianceFunc opt_func_;
+  MaskedVarianceFunc ref_func_;
+};
+
+TEST_P(MaskedVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SB_SIZE;
+  int ref_stride = MAX_SB_SIZE;
+  int msk_stride = MAX_SB_SIZE;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
+      src_ptr[j] = rnd.Rand8();
+      ref_ptr[j] = rnd.Rand8();
+      msk_ptr[j] = rnd(65);
+    }
+
+    ref_ret = ref_func_(src_ptr, src_stride, ref_ptr, ref_stride, msk_ptr,
+                        msk_stride, &ref_sse);
+    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride, ref_ptr,
+                                                 ref_stride, msk_ptr,
+                                                 msk_stride, &opt_sse));
+
+    if (opt_ret != ref_ret || opt_sse != ref_sse) {
+      err_count++;
+      if (first_failure == -1) first_failure = i;
+    }
+  }
+
+  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test OperationCheck,"
+                          << "C output doesn't match SSSE3 output. "
+                          << "First failed at test case " << first_failure;
+}
+
+TEST_P(MaskedVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SB_SIZE;
+  int ref_stride = MAX_SB_SIZE;
+  int msk_stride = MAX_SB_SIZE;
+
+  for (int i = 0; i < 8; ++i) {
+    memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SB_SIZE * MAX_SB_SIZE);
+    memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SB_SIZE * MAX_SB_SIZE);
+    memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE * MAX_SB_SIZE);
+
+    ref_ret = ref_func_(src_ptr, src_stride, ref_ptr, ref_stride, msk_ptr,
+                        msk_stride, &ref_sse);
+    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride, ref_ptr,
+                                                 ref_stride, msk_ptr,
+                                                 msk_stride, &opt_sse));
+
+    if (opt_ret != ref_ret || opt_sse != ref_sse) {
+      err_count++;
+      if (first_failure == -1) first_failure = i;
+    }
+  }
+
+  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
+                          << "C output doesn't match SSSE3 output. "
+                          << "First failed at test case " << first_failure;
+}
+
+typedef unsigned int (*MaskedSubPixelVarianceFunc)(
+    const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
+    int b_stride, const uint8_t *m, int m_stride, unsigned int *sse);
+
+typedef std::tr1::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc>
+    MaskedSubPixelVarianceParam;
+
+class MaskedSubPixelVarianceTest
+    : public ::testing::TestWithParam<MaskedSubPixelVarianceParam> {
+ public:
+  virtual ~MaskedSubPixelVarianceTest() {}
+  virtual void SetUp() {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  MaskedSubPixelVarianceFunc opt_func_;
+  MaskedSubPixelVarianceFunc ref_func_;
+};
+
+TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SB_SIZE + 1);
+  int ref_stride = (MAX_SB_SIZE + 1);
+  int msk_stride = (MAX_SB_SIZE + 1);
+  int xoffset;
+  int yoffset;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int xoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) };
+    int yoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) };
+    for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) {
+      src_ptr[j] = rnd.Rand8();
+      ref_ptr[j] = rnd.Rand8();
+      msk_ptr[j] = rnd(65);
+    }
+    for (int k = 0; k < 3; k++) {
+      xoffset = xoffsets[k];
+      for (int l = 0; l < 3; l++) {
+        xoffset = xoffsets[k];
+        yoffset = yoffsets[l];
+
+        ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
+                            ref_stride, msk_ptr, msk_stride, &ref_sse);
+        ASM_REGISTER_STATE_CHECK(
+            opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
+                                ref_stride, msk_ptr, msk_stride, &opt_sse));
+
+        if (opt_ret != ref_ret || opt_sse != ref_sse) {
+          err_count++;
+          if (first_failure == -1) first_failure = i;
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+      << "Error: Masked Sub Pixel Variance Test OperationCheck,"
+      << "C output doesn't match SSSE3 output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SB_SIZE + 1);
+  int ref_stride = (MAX_SB_SIZE + 1);
+  int msk_stride = (MAX_SB_SIZE + 1);
+
+  for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
+    for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
+      for (int i = 0; i < 8; ++i) {
+        memset(src_ptr, (i & 0x1) ? 255 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
+        memset(ref_ptr, (i & 0x2) ? 255 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
+        memset(msk_ptr, (i & 0x4) ? 64 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
+
+        ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
+                            ref_stride, msk_ptr, msk_stride, &ref_sse);
+        ASM_REGISTER_STATE_CHECK(
+            opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
+                                ref_stride, msk_ptr, msk_stride, &opt_sse));
+
+        if (opt_ret != ref_ret || opt_sse != ref_sse) {
+          err_count++;
+          if (first_failure == -1) {
+            first_failure = i;
+            first_failure_x = xoffset;
+            first_failure_y = yoffset;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
+                          << "C output doesn't match SSSE3 output. "
+                          << "First failed at test case " << first_failure
+                          << " x_offset = " << first_failure_x
+                          << " y_offset = " << first_failure_y;
+}
+
+#if CONFIG_HIGHBITDEPTH
+typedef std::tr1::tuple<MaskedVarianceFunc, MaskedVarianceFunc, aom_bit_depth_t>
+    HighbdMaskedVarianceParam;
+
+class HighbdMaskedVarianceTest
+    : public ::testing::TestWithParam<HighbdMaskedVarianceParam> {
+ public:
+  virtual ~HighbdMaskedVarianceTest() {}
+  virtual void SetUp() {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  MaskedVarianceFunc opt_func_;
+  MaskedVarianceFunc ref_func_;
+  aom_bit_depth_t bit_depth_;
+};
+
+TEST_P(HighbdMaskedVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SB_SIZE;
+  int ref_stride = MAX_SB_SIZE;
+  int msk_stride = MAX_SB_SIZE;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
+      src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      msk_ptr[j] = rnd(65);
+    }
+
+    ref_ret = ref_func_(src8_ptr, src_stride, ref8_ptr, ref_stride, msk_ptr,
+                        msk_stride, &ref_sse);
+    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride, ref8_ptr,
+                                                 ref_stride, msk_ptr,
+                                                 msk_stride, &opt_sse));
+
+    if (opt_ret != ref_ret || opt_sse != ref_sse) {
+      err_count++;
+      if (first_failure == -1) first_failure = i;
+    }
+  }
+
+  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test OperationCheck,"
+                          << "C output doesn't match SSSE3 output. "
+                          << "First failed at test case " << first_failure;
+}
+
+TEST_P(HighbdMaskedVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SB_SIZE;
+  int ref_stride = MAX_SB_SIZE;
+  int msk_stride = MAX_SB_SIZE;
+
+  for (int i = 0; i < 8; ++i) {
+    aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
+                 MAX_SB_SIZE * MAX_SB_SIZE);
+    aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
+                 MAX_SB_SIZE * MAX_SB_SIZE);
+    memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE * MAX_SB_SIZE);
+
+    ref_ret = ref_func_(src8_ptr, src_stride, ref8_ptr, ref_stride, msk_ptr,
+                        msk_stride, &ref_sse);
+    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride, ref8_ptr,
+                                                 ref_stride, msk_ptr,
+                                                 msk_stride, &opt_sse));
+
+    if (opt_ret != ref_ret || opt_sse != ref_sse) {
+      err_count++;
+      if (first_failure == -1) first_failure = i;
+    }
+  }
+
+  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
+                          << "C output doesn't match SSSE3 output. "
+                          << "First failed at test case " << first_failure;
+}
+
+typedef std::tr1::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc,
+                        aom_bit_depth_t>
+    HighbdMaskedSubPixelVarianceParam;
+
+class HighbdMaskedSubPixelVarianceTest
+    : public ::testing::TestWithParam<HighbdMaskedSubPixelVarianceParam> {
+ public:
+  virtual ~HighbdMaskedSubPixelVarianceTest() {}
+  virtual void SetUp() {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  MaskedSubPixelVarianceFunc opt_func_;
+  MaskedSubPixelVarianceFunc ref_func_;
+  aom_bit_depth_t bit_depth_;
+};
+
+TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int src_stride = (MAX_SB_SIZE + 1);
+  int ref_stride = (MAX_SB_SIZE + 1);
+  int msk_stride = (MAX_SB_SIZE + 1);
+  int xoffset, yoffset;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
+      for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
+        for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) {
+          src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+          ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+          msk_ptr[j] = rnd(65);
+        }
+
+        ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
+                            ref_stride, msk_ptr, msk_stride, &ref_sse);
+        ASM_REGISTER_STATE_CHECK(opt_ret =
+                                     opt_func_(src8_ptr, src_stride, xoffset,
+                                               yoffset, ref8_ptr, ref_stride,
+                                               msk_ptr, msk_stride, &opt_sse));
+
+        if (opt_ret != ref_ret || opt_sse != ref_sse) {
+          err_count++;
+          if (first_failure == -1) {
+            first_failure = i;
+            first_failure_x = xoffset;
+            first_failure_y = yoffset;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+      << "Error: Masked Sub Pixel Variance Test OperationCheck,"
+      << "C output doesn't match SSSE3 output. "
+      << "First failed at test case " << first_failure
+      << " x_offset = " << first_failure_x << " y_offset = " << first_failure_y;
+}
+
+TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SB_SIZE + 1);
+  int ref_stride = (MAX_SB_SIZE + 1);
+  int msk_stride = (MAX_SB_SIZE + 1);
+
+  for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
+    for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
+      for (int i = 0; i < 8; ++i) {
+        aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
+                     (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
+        aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
+                     (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
+        memset(msk_ptr, (i & 0x4) ? 64 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
+
+        ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
+                            ref_stride, msk_ptr, msk_stride, &ref_sse);
+        ASM_REGISTER_STATE_CHECK(opt_ret =
+                                     opt_func_(src8_ptr, src_stride, xoffset,
+                                               yoffset, ref8_ptr, ref_stride,
+                                               msk_ptr, msk_stride, &opt_sse));
+
+        if (opt_ret != ref_ret || opt_sse != ref_sse) {
+          err_count++;
+          if (first_failure == -1) {
+            first_failure = i;
+            first_failure_x = xoffset;
+            first_failure_y = yoffset;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
+                          << "C output doesn't match SSSE3 output. "
+                          << "First failed at test case " << first_failure
+                          << " x_offset = " << first_failure_x
+                          << " y_offset = " << first_failure_y;
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3_C_COMPARE, MaskedVarianceTest,
+    ::testing::Values(
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_masked_variance128x128_ssse3,
+                   &aom_masked_variance128x128_c),
+        make_tuple(&aom_masked_variance128x64_ssse3,
+                   &aom_masked_variance128x64_c),
+        make_tuple(&aom_masked_variance64x128_ssse3,
+                   &aom_masked_variance64x128_c),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_masked_variance64x64_ssse3,
+                   &aom_masked_variance64x64_c),
+        make_tuple(&aom_masked_variance64x32_ssse3,
+                   &aom_masked_variance64x32_c),
+        make_tuple(&aom_masked_variance32x64_ssse3,
+                   &aom_masked_variance32x64_c),
+        make_tuple(&aom_masked_variance32x32_ssse3,
+                   &aom_masked_variance32x32_c),
+        make_tuple(&aom_masked_variance32x16_ssse3,
+                   &aom_masked_variance32x16_c),
+        make_tuple(&aom_masked_variance16x32_ssse3,
+                   &aom_masked_variance16x32_c),
+        make_tuple(&aom_masked_variance16x16_ssse3,
+                   &aom_masked_variance16x16_c),
+        make_tuple(&aom_masked_variance16x8_ssse3, &aom_masked_variance16x8_c),
+        make_tuple(&aom_masked_variance8x16_ssse3, &aom_masked_variance8x16_c),
+        make_tuple(&aom_masked_variance8x8_ssse3, &aom_masked_variance8x8_c),
+        make_tuple(&aom_masked_variance8x4_ssse3, &aom_masked_variance8x4_c),
+        make_tuple(&aom_masked_variance4x8_ssse3, &aom_masked_variance4x8_c),
+        make_tuple(&aom_masked_variance4x4_ssse3, &aom_masked_variance4x4_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSSE3_C_COMPARE, MaskedSubPixelVarianceTest,
+    ::testing::Values(
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_masked_sub_pixel_variance128x128_ssse3,
+                   &aom_masked_sub_pixel_variance128x128_c),
+        make_tuple(&aom_masked_sub_pixel_variance128x64_ssse3,
+                   &aom_masked_sub_pixel_variance128x64_c),
+        make_tuple(&aom_masked_sub_pixel_variance64x128_ssse3,
+                   &aom_masked_sub_pixel_variance64x128_c),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_masked_sub_pixel_variance64x64_ssse3,
+                   &aom_masked_sub_pixel_variance64x64_c),
+        make_tuple(&aom_masked_sub_pixel_variance64x32_ssse3,
+                   &aom_masked_sub_pixel_variance64x32_c),
+        make_tuple(&aom_masked_sub_pixel_variance32x64_ssse3,
+                   &aom_masked_sub_pixel_variance32x64_c),
+        make_tuple(&aom_masked_sub_pixel_variance32x32_ssse3,
+                   &aom_masked_sub_pixel_variance32x32_c),
+        make_tuple(&aom_masked_sub_pixel_variance32x16_ssse3,
+                   &aom_masked_sub_pixel_variance32x16_c),
+        make_tuple(&aom_masked_sub_pixel_variance16x32_ssse3,
+                   &aom_masked_sub_pixel_variance16x32_c),
+        make_tuple(&aom_masked_sub_pixel_variance16x16_ssse3,
+                   &aom_masked_sub_pixel_variance16x16_c),
+        make_tuple(&aom_masked_sub_pixel_variance16x8_ssse3,
+                   &aom_masked_sub_pixel_variance16x8_c),
+        make_tuple(&aom_masked_sub_pixel_variance8x16_ssse3,
+                   &aom_masked_sub_pixel_variance8x16_c),
+        make_tuple(&aom_masked_sub_pixel_variance8x8_ssse3,
+                   &aom_masked_sub_pixel_variance8x8_c),
+        make_tuple(&aom_masked_sub_pixel_variance8x4_ssse3,
+                   &aom_masked_sub_pixel_variance8x4_c),
+        make_tuple(&aom_masked_sub_pixel_variance4x8_ssse3,
+                   &aom_masked_sub_pixel_variance4x8_c),
+        make_tuple(&aom_masked_sub_pixel_variance4x4_ssse3,
+                   &aom_masked_sub_pixel_variance4x4_c)));
+
+#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSSE3_C_COMPARE, HighbdMaskedVarianceTest,
+    ::testing::Values(
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_masked_variance128x128_ssse3,
+                   &aom_highbd_masked_variance128x128_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance128x64_ssse3,
+                   &aom_highbd_masked_variance128x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance64x128_ssse3,
+                   &aom_highbd_masked_variance64x128_c, AOM_BITS_8),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_masked_variance64x64_ssse3,
+                   &aom_highbd_masked_variance64x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance64x32_ssse3,
+                   &aom_highbd_masked_variance64x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance32x64_ssse3,
+                   &aom_highbd_masked_variance32x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance32x32_ssse3,
+                   &aom_highbd_masked_variance32x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance32x16_ssse3,
+                   &aom_highbd_masked_variance32x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance16x32_ssse3,
+                   &aom_highbd_masked_variance16x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance16x16_ssse3,
+                   &aom_highbd_masked_variance16x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance16x8_ssse3,
+                   &aom_highbd_masked_variance16x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance8x16_ssse3,
+                   &aom_highbd_masked_variance8x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance8x8_ssse3,
+                   &aom_highbd_masked_variance8x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance8x4_ssse3,
+                   &aom_highbd_masked_variance8x4_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance4x8_ssse3,
+                   &aom_highbd_masked_variance4x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_variance4x4_ssse3,
+                   &aom_highbd_masked_variance4x4_c, AOM_BITS_8),
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_10_masked_variance128x128_ssse3,
+                   &aom_highbd_10_masked_variance128x128_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance128x64_ssse3,
+                   &aom_highbd_10_masked_variance128x64_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance64x128_ssse3,
+                   &aom_highbd_10_masked_variance64x128_c, AOM_BITS_10),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_10_masked_variance64x64_ssse3,
+                   &aom_highbd_10_masked_variance64x64_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance64x32_ssse3,
+                   &aom_highbd_10_masked_variance64x32_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance32x64_ssse3,
+                   &aom_highbd_10_masked_variance32x64_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance32x32_ssse3,
+                   &aom_highbd_10_masked_variance32x32_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance32x16_ssse3,
+                   &aom_highbd_10_masked_variance32x16_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance16x32_ssse3,
+                   &aom_highbd_10_masked_variance16x32_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance16x16_ssse3,
+                   &aom_highbd_10_masked_variance16x16_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance16x8_ssse3,
+                   &aom_highbd_10_masked_variance16x8_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance8x16_ssse3,
+                   &aom_highbd_10_masked_variance8x16_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance8x8_ssse3,
+                   &aom_highbd_10_masked_variance8x8_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance8x4_ssse3,
+                   &aom_highbd_10_masked_variance8x4_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance4x8_ssse3,
+                   &aom_highbd_10_masked_variance4x8_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_variance4x4_ssse3,
+                   &aom_highbd_10_masked_variance4x4_c, AOM_BITS_10),
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_12_masked_variance128x128_ssse3,
+                   &aom_highbd_12_masked_variance128x128_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance128x64_ssse3,
+                   &aom_highbd_12_masked_variance128x64_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance64x128_ssse3,
+                   &aom_highbd_12_masked_variance64x128_c, AOM_BITS_12),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_12_masked_variance64x64_ssse3,
+                   &aom_highbd_12_masked_variance64x64_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance64x32_ssse3,
+                   &aom_highbd_12_masked_variance64x32_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance32x64_ssse3,
+                   &aom_highbd_12_masked_variance32x64_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance32x32_ssse3,
+                   &aom_highbd_12_masked_variance32x32_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance32x16_ssse3,
+                   &aom_highbd_12_masked_variance32x16_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance16x32_ssse3,
+                   &aom_highbd_12_masked_variance16x32_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance16x16_ssse3,
+                   &aom_highbd_12_masked_variance16x16_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance16x8_ssse3,
+                   &aom_highbd_12_masked_variance16x8_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance8x16_ssse3,
+                   &aom_highbd_12_masked_variance8x16_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance8x8_ssse3,
+                   &aom_highbd_12_masked_variance8x8_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance8x4_ssse3,
+                   &aom_highbd_12_masked_variance8x4_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance4x8_ssse3,
+                   &aom_highbd_12_masked_variance4x8_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_variance4x4_ssse3,
+                   &aom_highbd_12_masked_variance4x4_c, AOM_BITS_12)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
+    ::testing::Values(
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_masked_sub_pixel_variance128x128_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance128x128_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance128x64_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance128x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance64x128_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance64x128_c, AOM_BITS_8),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_masked_sub_pixel_variance64x64_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance64x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance64x32_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance64x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance32x64_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance32x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance32x32_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance32x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance32x16_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance32x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance16x32_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance16x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance16x16_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance16x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance16x8_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance16x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance8x16_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance8x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance8x8_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance8x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance8x4_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance8x4_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance4x8_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance4x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_masked_sub_pixel_variance4x4_ssse3,
+                   &aom_highbd_masked_sub_pixel_variance4x4_c, AOM_BITS_8),
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance128x128_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x64_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance128x64_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x128_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance64x128_c,
+                   AOM_BITS_10),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x64_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance64x64_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x32_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance64x32_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x64_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance32x64_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x32_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance32x32_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x16_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance32x16_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x32_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance16x32_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x16_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance16x16_c,
+                   AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x8_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance16x8_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x16_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance8x16_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x8_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance8x8_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x4_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance8x4_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x8_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance4x8_c, AOM_BITS_10),
+        make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x4_ssse3,
+                   &aom_highbd_10_masked_sub_pixel_variance4x4_c, AOM_BITS_10),
+#if CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x128_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance128x128_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x64_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance128x64_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x128_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance64x128_c,
+                   AOM_BITS_12),
+#endif  // CONFIG_EXT_PARTITION
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x64_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance64x64_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x32_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance64x32_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x64_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance32x64_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x32_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance32x32_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x16_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance32x16_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x32_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance16x32_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x16_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance16x16_c,
+                   AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x8_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance16x8_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x16_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance8x16_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x8_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance8x8_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x4_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance8x4_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x8_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance4x8_c, AOM_BITS_12),
+        make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x4_ssse3,
+                   &aom_highbd_12_masked_sub_pixel_variance4x4_c,
+                   AOM_BITS_12)));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#endif  // HAVE_SSSE3
+}  // namespace
diff --git a/third_party/aom/test/md5_helper.h b/third_party/aom/test/md5_helper.h
new file mode 100644
index 000000000..8c9d4f706
--- /dev/null
+++ b/third_party/aom/test/md5_helper.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_MD5_HELPER_H_
+#define TEST_MD5_HELPER_H_
+
+#include "./md5_utils.h"
+#include "aom/aom_decoder.h"
+
+namespace libaom_test {
+class MD5 {
+ public:
+  MD5() { MD5Init(&md5_); }
+
+  void Add(const aom_image_t *img) {
+    for (int plane = 0; plane < 3; ++plane) {
+      const uint8_t *buf = img->planes[plane];
+      // Calculate the width and height to do the md5 check. For the chroma
+      // plane, we never want to round down and thus skip a pixel so if
+      // we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
+      // This works only for chroma_shift of 0 and 1.
+      const int bytes_per_sample =
+          (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
+      const int h =
+          plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift
+                : img->d_h;
+      const int w =
+          (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift
+                 : img->d_w) *
+          bytes_per_sample;
+
+      for (int y = 0; y < h; ++y) {
+        MD5Update(&md5_, buf, w);
+        buf += img->stride[plane];
+      }
+    }
+  }
+
+  void Add(const uint8_t *data, size_t size) {
+    MD5Update(&md5_, data, static_cast<uint32_t>(size));
+  }
+
+  const char *Get(void) {
+    static const char hex[16] = {
+      '0', '1', '2', '3', '4', '5', '6', '7',
+      '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+    };
+    uint8_t tmp[16];
+    MD5Context ctx_tmp = md5_;
+
+    MD5Final(tmp, &ctx_tmp);
+    for (int i = 0; i < 16; i++) {
+      res_[i * 2 + 0] = hex[tmp[i] >> 4];
+      res_[i * 2 + 1] = hex[tmp[i] & 0xf];
+    }
+    res_[32] = 0;
+
+    return res_;
+  }
+
+ protected:
+  char res_[33];
+  MD5Context md5_;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_MD5_HELPER_H_
diff --git a/third_party/aom/test/minmax_test.cc b/third_party/aom/test/minmax_test.cc
new file mode 100644
index 000000000..f82529192
--- /dev/null
+++ b/third_party/aom/test/minmax_test.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "aom/aom_integer.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libaom_test::ACMRandom;
+
+typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, const uint8_t *b,
+                           int b_stride, int *min, int *max);
+
+class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
+ public:
+  virtual void SetUp() {
+    mm_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  MinMaxFunc mm_func_;
+  ACMRandom rnd_;
+};
+
+void reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b,
+                      int b_stride, int *min_ret, int *max_ret) {
+  int min = 255;
+  int max = 0;
+  for (int i = 0; i < 8; i++) {
+    for (int j = 0; j < 8; j++) {
+      const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
+      if (min > diff) min = diff;
+      if (max < diff) max = diff;
+    }
+  }
+
+  *min_ret = min;
+  *max_ret = max;
+}
+
+TEST_P(MinMaxTest, MinValue) {
+  for (int i = 0; i < 64; i++) {
+    uint8_t a[64], b[64];
+    memset(a, 0, sizeof(a));
+    memset(b, 255, sizeof(b));
+    b[i] = i;  // Set a minimum difference of i.
+
+    int min, max;
+    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(255, max);
+    EXPECT_EQ(i, min);
+  }
+}
+
+TEST_P(MinMaxTest, MaxValue) {
+  for (int i = 0; i < 64; i++) {
+    uint8_t a[64], b[64];
+    memset(a, 0, sizeof(a));
+    memset(b, 0, sizeof(b));
+    b[i] = i;  // Set a maximum difference of i.
+
+    int min, max;
+    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(i, max);
+    EXPECT_EQ(0, min);
+  }
+}
+
+TEST_P(MinMaxTest, CompareReference) {
+  uint8_t a[64], b[64];
+  for (int j = 0; j < 64; j++) {
+    a[j] = rnd_.Rand8();
+    b[j] = rnd_.Rand8();
+  }
+
+  int min_ref, max_ref, min, max;
+  reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
+  ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+  EXPECT_EQ(max_ref, max);
+  EXPECT_EQ(min_ref, min);
+}
+
+TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
+  uint8_t a[8 * 64], b[8 * 64];
+  for (int i = 0; i < 8 * 64; i++) {
+    a[i] = rnd_.Rand8();
+    b[i] = rnd_.Rand8();
+  }
+  for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
+    for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
+      int min_ref, max_ref, min, max;
+      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
+      ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
+      EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
+                              << " and b_stride = " << b_stride;
+      EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
+                              << " and b_stride = " << b_stride;
+    }
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&aom_minmax_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
+                        ::testing::Values(&aom_minmax_8x8_sse2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
+                        ::testing::Values(&aom_minmax_8x8_neon));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/motion_vector_test.cc b/third_party/aom/test/motion_vector_test.cc
new file mode 100644
index 000000000..403a8f1a7
--- /dev/null
+++ b/third_party/aom/test/motion_vector_test.cc
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define BUILDING_WITH_ASAN
+#endif
+#endif
+
+#define MAX_EXTREME_MV 1
+#define MIN_EXTREME_MV 2
+
+// Encoding modes
+const libaom_test::TestMode kEncodingModeVectors[] = {
+  ::libaom_test::kTwoPassGood, ::libaom_test::kOnePassGood,
+};
+
+// Encoding speeds
+const int kCpuUsedVectors[] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+// MV test modes: 1 - always use maximum MV; 2 - always use minimum MV.
+const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV };
+
+class MotionVectorTestLarge
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int,
+                                                 int> {
+ protected:
+  MotionVectorTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), mv_test_mode_(GET_PARAM(3)) {}
+
+  virtual ~MotionVectorTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    if (encoding_mode_ != ::libaom_test::kRealTime) {
+      cfg_.g_lag_in_frames = 3;
+      cfg_.rc_end_usage = AOM_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = AOM_CBR;
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      encoder->Control(AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_);
+      if (encoding_mode_ != ::libaom_test::kRealTime) {
+        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+      }
+    }
+  }
+
+  libaom_test::TestMode encoding_mode_;
+  int cpu_used_;
+  int mv_test_mode_;
+};
+
+TEST_P(MotionVectorTestLarge, OverallTest) {
+  int width = 3840;
+  int height = 2160;
+
+#ifdef BUILDING_WITH_ASAN
+  // On the 32-bit system, if using 4k test clip, an "out of memory" error
+  // occurs because of the AddressSanitizer instrumentation memory overhead.
+  // Here, reduce the test clip's resolution while testing on 32-bit system
+  // and AddressSanitizer is enabled.
+  if (sizeof(void *) == 4) {
+    width = 2048;
+    height = 1080;
+  }
+#endif
+
+  cfg_.rc_target_bitrate = 24000;
+  cfg_.g_profile = 0;
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  testing::internal::scoped_ptr<libaom_test::VideoSource> video;
+  video.reset(new libaom_test::YUVVideoSource(
+      "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, width, height, 30, 1, 0, 5));
+
+  ASSERT_TRUE(video.get() != NULL);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+}
+
+AV1_INSTANTIATE_TEST_CASE(MotionVectorTestLarge,
+                          ::testing::ValuesIn(kEncodingModeVectors),
+                          ::testing::ValuesIn(kCpuUsedVectors),
+                          ::testing::ValuesIn(kMVTestModes));
+}  // namespace
diff --git a/third_party/aom/test/obmc_sad_test.cc b/third_party/aom/test/obmc_sad_test.cc
new file mode 100644
index 000000000..219c5d810
--- /dev/null
+++ b/third_party/aom/test/obmc_sad_test.cc
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "test/function_equivalence_test.h"
+#include "test/register_state_check.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "aom/aom_integer.h"
+
+#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
+
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+static const int kIterations = 1000;
+static const int kMaskMax = 64;
+
+typedef unsigned int (*ObmcSadF)(const uint8_t *pre, int pre_stride,
+                                 const int32_t *wsrc, const int32_t *mask);
+typedef libaom_test::FuncParam<ObmcSadF> TestFuncs;
+
+////////////////////////////////////////////////////////////////////////////////
+// 8 bit
+////////////////////////////////////////////////////////////////////////////////
+
+class ObmcSadTest : public FunctionEquivalenceTest<ObmcSadF> {};
+
+TEST_P(ObmcSadTest, RandomValues) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    const int pre_stride = rng_(MAX_SB_SIZE + 1);
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = rng_.Rand8();
+      wsrc[i] = rng_.Rand8() * rng_(kMaskMax * kMaskMax + 1);
+      mask[i] = rng_(kMaskMax * kMaskMax + 1);
+    }
+
+    const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask);
+    unsigned int tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res =
+                                 params_.tst_func(pre, pre_stride, wsrc, mask));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(ObmcSadTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
+    const int pre_stride = iter;
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = UINT8_MAX;
+      wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
+      mask[i] = kMaskMax * kMaskMax;
+    }
+
+    const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask);
+    unsigned int tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res =
+                                 params_.tst_func(pre, pre_stride, wsrc, mask));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+#if HAVE_SSE4_1
+#if CONFIG_MOTION_VAR
+const ObmcSadTest::ParamType sse4_functions[] = {
+#if CONFIG_EXT_PARTITION
+  TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_sse4_1),
+  TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_sse4_1),
+  TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_sse4_1),
+#endif  // CONFIG_EXT_PARTITION
+  TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_sse4_1),
+  TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_sse4_1),
+  TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_sse4_1),
+  TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_sse4_1),
+  TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_sse4_1),
+  TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_sse4_1),
+  TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_sse4_1),
+  TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_sse4_1),
+  TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_sse4_1),
+  TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_sse4_1),
+  TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_sse4_1),
+  TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_sse4_1),
+  TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_sse4_1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcSadTest,
+                        ::testing::ValuesIn(sse4_functions));
+#endif  // CONFIG_MOTION_VAR
+#endif  // HAVE_SSE4_1
+
+////////////////////////////////////////////////////////////////////////////////
+// High bit-depth
+////////////////////////////////////////////////////////////////////////////////
+
+#if CONFIG_HIGHBITDEPTH
+class ObmcSadHBDTest : public FunctionEquivalenceTest<ObmcSadF> {};
+
+TEST_P(ObmcSadHBDTest, RandomValues) {
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    const int pre_stride = rng_(MAX_SB_SIZE + 1);
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = rng_(1 << 12);
+      wsrc[i] = rng_(1 << 12) * rng_(kMaskMax * kMaskMax + 1);
+      mask[i] = rng_(kMaskMax * kMaskMax + 1);
+    }
+
+    const unsigned int ref_res =
+        params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask);
+    unsigned int tst_res;
+    ASM_REGISTER_STATE_CHECK(
+        tst_res =
+            params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(ObmcSadHBDTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
+    const int pre_stride = iter;
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = (1 << 12) - 1;
+      wsrc[i] = ((1 << 12) - 1) * kMaskMax * kMaskMax;
+      mask[i] = kMaskMax * kMaskMax;
+    }
+
+    const unsigned int ref_res =
+        params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask);
+    unsigned int tst_res;
+    ASM_REGISTER_STATE_CHECK(
+        tst_res =
+            params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+#if HAVE_SSE4_1
+#if CONFIG_MOTION_VAR
+ObmcSadHBDTest::ParamType sse4_functions_hbd[] = {
+#if CONFIG_EXT_PARTITION
+  TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_sse4_1),
+#endif  // CONFIG_EXT_PARTITION
+  TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_sse4_1),
+  TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_sse4_1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcSadHBDTest,
+                        ::testing::ValuesIn(sse4_functions_hbd));
+#endif  // CONFIG_MOTION_VAR
+#endif  // HAVE_SSE4_1
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/obmc_variance_test.cc b/third_party/aom/test/obmc_variance_test.cc
new file mode 100644
index 000000000..1b30645a5
--- /dev/null
+++ b/third_party/aom/test/obmc_variance_test.cc
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+
+#include "test/function_equivalence_test.h"
+#include "test/register_state_check.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "aom/aom_integer.h"
+
+#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
+
+using libaom_test::ACMRandom;
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+static const int kIterations = 1000;
+static const int kMaskMax = 64;
+
+typedef unsigned int (*ObmcVarF)(const uint8_t *pre, int pre_stride,
+                                 const int32_t *wsrc, const int32_t *mask,
+                                 unsigned int *sse);
+typedef libaom_test::FuncParam<ObmcVarF> TestFuncs;
+
+////////////////////////////////////////////////////////////////////////////////
+// 8 bit
+////////////////////////////////////////////////////////////////////////////////
+
+class ObmcVarianceTest : public FunctionEquivalenceTest<ObmcVarF> {};
+
+TEST_P(ObmcVarianceTest, RandomValues) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = this->rng_.Rand8();
+      wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1);
+      mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
+    }
+
+    unsigned int ref_sse, tst_sse;
+    const unsigned int ref_res =
+        params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
+    unsigned int tst_res;
+    ASM_REGISTER_STATE_CHECK(
+        tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse));
+
+    ASSERT_EQ(ref_res, tst_res);
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+TEST_P(ObmcVarianceTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
+    const int pre_stride = iter;
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = UINT8_MAX;
+      wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
+      mask[i] = kMaskMax * kMaskMax;
+    }
+
+    unsigned int ref_sse, tst_sse;
+    const unsigned int ref_res =
+        params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
+    unsigned int tst_res;
+    ASM_REGISTER_STATE_CHECK(
+        tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse));
+
+    ASSERT_EQ(ref_res, tst_res);
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+#if HAVE_SSE4_1
+#if CONFIG_MOTION_VAR
+const ObmcVarianceTest::ParamType sse4_functions[] = {
+#if CONFIG_EXT_PARTITION
+  TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_sse4_1),
+  TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_sse4_1),
+  TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_sse4_1),
+#endif  // CONFIG_EXT_PARTITION
+  TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_sse4_1),
+  TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_sse4_1),
+  TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_sse4_1),
+  TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_sse4_1),
+  TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_sse4_1),
+  TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_sse4_1),
+  TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_sse4_1),
+  TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_sse4_1),
+  TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_sse4_1),
+  TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_sse4_1),
+  TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_sse4_1),
+  TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1),
+  TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceTest,
+                        ::testing::ValuesIn(sse4_functions));
+#endif  // CONFIG_MOTION_VAR
+#endif  // HAVE_SSE4_1
+
+////////////////////////////////////////////////////////////////////////////////
+// High bit-depth
+////////////////////////////////////////////////////////////////////////////////
+
+#if CONFIG_HIGHBITDEPTH
+class ObmcVarianceHBDTest : public FunctionEquivalenceTest<ObmcVarF> {};
+
+TEST_P(ObmcVarianceHBDTest, RandomValues) {
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = this->rng_(1 << params_.bit_depth);
+      wsrc[i] = this->rng_(1 << params_.bit_depth) *
+                this->rng_(kMaskMax * kMaskMax + 1);
+      mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
+    }
+
+    unsigned int ref_sse, tst_sse;
+    const unsigned int ref_res = params_.ref_func(
+        CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse);
+    unsigned int tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre),
+                                                        pre_stride, wsrc, mask,
+                                                        &tst_sse));
+
+    ASSERT_EQ(ref_res, tst_res);
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+TEST_P(ObmcVarianceHBDTest, ExtremeValues) {
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
+
+  for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
+    const int pre_stride = iter;
+
+    for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+      pre[i] = (1 << params_.bit_depth) - 1;
+      wsrc[i] = ((1 << params_.bit_depth) - 1) * kMaskMax * kMaskMax;
+      mask[i] = kMaskMax * kMaskMax;
+    }
+
+    unsigned int ref_sse, tst_sse;
+    const unsigned int ref_res = params_.ref_func(
+        CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse);
+    unsigned int tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre),
+                                                        pre_stride, wsrc, mask,
+                                                        &tst_sse));
+
+    ASSERT_EQ(ref_res, tst_res);
+    ASSERT_EQ(ref_sse, tst_sse);
+  }
+}
+
+#if HAVE_SSE4_1
+#if CONFIG_MOTION_VAR
+ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = {
+#if CONFIG_EXT_PARTITION
+  TestFuncs(aom_highbd_obmc_variance128x128_c,
+            aom_highbd_obmc_variance128x128_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance128x64_c,
+            aom_highbd_obmc_variance128x64_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance64x128_c,
+            aom_highbd_obmc_variance64x128_sse4_1, 8),
+#endif  // CONFIG_EXT_PARTITION
+  TestFuncs(aom_highbd_obmc_variance64x64_c,
+            aom_highbd_obmc_variance64x64_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance64x32_c,
+            aom_highbd_obmc_variance64x32_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance32x64_c,
+            aom_highbd_obmc_variance32x64_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance32x32_c,
+            aom_highbd_obmc_variance32x32_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance32x16_c,
+            aom_highbd_obmc_variance32x16_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance16x32_c,
+            aom_highbd_obmc_variance16x32_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance16x16_c,
+            aom_highbd_obmc_variance16x16_sse4_1, 8),
+  TestFuncs(aom_highbd_obmc_variance16x8_c, aom_highbd_obmc_variance16x8_sse4_1,
+            8),
+  TestFuncs(aom_highbd_obmc_variance8x16_c, aom_highbd_obmc_variance8x16_sse4_1,
+            8),
+  TestFuncs(aom_highbd_obmc_variance8x8_c, aom_highbd_obmc_variance8x8_sse4_1,
+            8),
+  TestFuncs(aom_highbd_obmc_variance8x4_c, aom_highbd_obmc_variance8x4_sse4_1,
+            8),
+  TestFuncs(aom_highbd_obmc_variance4x8_c, aom_highbd_obmc_variance4x8_sse4_1,
+            8),
+  TestFuncs(aom_highbd_obmc_variance4x4_c, aom_highbd_obmc_variance4x4_sse4_1,
+            8),
+#if CONFIG_EXT_PARTITION
+  TestFuncs(aom_highbd_10_obmc_variance128x128_c,
+            aom_highbd_10_obmc_variance128x128_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance128x64_c,
+            aom_highbd_10_obmc_variance128x64_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance64x128_c,
+            aom_highbd_10_obmc_variance64x128_sse4_1, 10),
+#endif  // CONFIG_EXT_PARTITION
+  TestFuncs(aom_highbd_10_obmc_variance64x64_c,
+            aom_highbd_10_obmc_variance64x64_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance64x32_c,
+            aom_highbd_10_obmc_variance64x32_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x64_c,
+            aom_highbd_10_obmc_variance32x64_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x32_c,
+            aom_highbd_10_obmc_variance32x32_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance32x16_c,
+            aom_highbd_10_obmc_variance32x16_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x32_c,
+            aom_highbd_10_obmc_variance16x32_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x16_c,
+            aom_highbd_10_obmc_variance16x16_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance16x8_c,
+            aom_highbd_10_obmc_variance16x8_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x16_c,
+            aom_highbd_10_obmc_variance8x16_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x8_c,
+            aom_highbd_10_obmc_variance8x8_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance8x4_c,
+            aom_highbd_10_obmc_variance8x4_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance4x8_c,
+            aom_highbd_10_obmc_variance4x8_sse4_1, 10),
+  TestFuncs(aom_highbd_10_obmc_variance4x4_c,
+            aom_highbd_10_obmc_variance4x4_sse4_1, 10),
+#if CONFIG_EXT_PARTITION
+  TestFuncs(aom_highbd_12_obmc_variance128x128_c,
+            aom_highbd_12_obmc_variance128x128_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance128x64_c,
+            aom_highbd_12_obmc_variance128x64_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance64x128_c,
+            aom_highbd_12_obmc_variance64x128_sse4_1, 12),
+#endif  // CONFIG_EXT_PARTITION
+  TestFuncs(aom_highbd_12_obmc_variance64x64_c,
+            aom_highbd_12_obmc_variance64x64_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance64x32_c,
+            aom_highbd_12_obmc_variance64x32_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x64_c,
+            aom_highbd_12_obmc_variance32x64_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x32_c,
+            aom_highbd_12_obmc_variance32x32_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance32x16_c,
+            aom_highbd_12_obmc_variance32x16_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x32_c,
+            aom_highbd_12_obmc_variance16x32_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x16_c,
+            aom_highbd_12_obmc_variance16x16_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance16x8_c,
+            aom_highbd_12_obmc_variance16x8_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x16_c,
+            aom_highbd_12_obmc_variance8x16_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x8_c,
+            aom_highbd_12_obmc_variance8x8_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance8x4_c,
+            aom_highbd_12_obmc_variance8x4_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance4x8_c,
+            aom_highbd_12_obmc_variance4x8_sse4_1, 12),
+  TestFuncs(aom_highbd_12_obmc_variance4x4_c,
+            aom_highbd_12_obmc_variance4x4_sse4_1, 12)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceHBDTest,
+                        ::testing::ValuesIn(sse4_functions_hbd));
+#endif  // CONFIG_MOTION_VAR
+#endif  // HAVE_SSE4_1
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/partial_idct_test.cc b/third_party/aom/test/partial_idct_test.cc
new file mode 100644
index 000000000..0899b60c3
--- /dev/null
+++ b/third_party/aom/test/partial_idct_test.cc
@@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/blockd.h"
+#include "av1/common/scan.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/aom_timer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef void (*InvTxfmWithBdFunc)(const tran_low_t *in, uint8_t *out,
+                                  int stride, int bd);
+
+template <InvTxfmFunc fn>
+void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
+  (void)bd;
+  fn(in, out, stride);
+}
+
+#if CONFIG_HIGHBITDEPTH
+template <InvTxfmWithBdFunc fn>
+void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
+  fn(in, CONVERT_TO_BYTEPTR(out), stride, bd);
+}
+#endif
+
+typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc,
+                        TX_SIZE, int, int, int>
+    PartialInvTxfmParam;
+const int kMaxNumCoeffs = 1024;
+const int kCountTestBlock = 1000;
+
+class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
+ public:
+  virtual ~PartialIDctTest() {}
+  virtual void SetUp() {
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    ftxfm_ = GET_PARAM(0);
+    full_itxfm_ = GET_PARAM(1);
+    partial_itxfm_ = GET_PARAM(2);
+    tx_size_ = GET_PARAM(3);
+    last_nonzero_ = GET_PARAM(4);
+    bit_depth_ = GET_PARAM(5);
+    pixel_size_ = GET_PARAM(6);
+    mask_ = (1 << bit_depth_) - 1;
+
+    switch (tx_size_) {
+      case TX_4X4: size_ = 4; break;
+      case TX_8X8: size_ = 8; break;
+      case TX_16X16: size_ = 16; break;
+      case TX_32X32: size_ = 32; break;
+      default: FAIL() << "Wrong Size!"; break;
+    }
+
+    // Randomize stride_ to a value less than or equal to 1024
+    stride_ = rnd_(1024) + 1;
+    if (stride_ < size_) {
+      stride_ = size_;
+    }
+    // Align stride_ to 16 if it's bigger than 16.
+    if (stride_ > 16) {
+      stride_ &= ~15;
+    }
+
+    input_block_size_ = size_ * size_;
+    output_block_size_ = size_ * stride_;
+
+    input_block_ = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, sizeof(*input_block_) * input_block_size_));
+    output_block_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, pixel_size_ * output_block_size_));
+    output_block_ref_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, pixel_size_ * output_block_size_));
+  }
+
+  virtual void TearDown() {
+    aom_free(input_block_);
+    input_block_ = NULL;
+    aom_free(output_block_);
+    output_block_ = NULL;
+    aom_free(output_block_ref_);
+    output_block_ref_ = NULL;
+    libaom_test::ClearSystemState();
+  }
+
+  void InitMem() {
+    memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
+    if (pixel_size_ == 1) {
+      for (int j = 0; j < output_block_size_; ++j) {
+        output_block_[j] = output_block_ref_[j] = rnd_.Rand16() & mask_;
+      }
+    } else {
+      ASSERT_EQ(2, pixel_size_);
+      uint16_t *const output = reinterpret_cast<uint16_t *>(output_block_);
+      uint16_t *const output_ref =
+          reinterpret_cast<uint16_t *>(output_block_ref_);
+      for (int j = 0; j < output_block_size_; ++j) {
+        output[j] = output_ref[j] = rnd_.Rand16() & mask_;
+      }
+    }
+  }
+
+  void InitInput() {
+    const int max_coeff = 32766 / 4;
+    int max_energy_leftover = max_coeff * max_coeff;
+    for (int j = 0; j < last_nonzero_; ++j) {
+      int16_t coeff = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
+                                           (rnd_.Rand16() - 32768) / 65536);
+      max_energy_leftover -= coeff * coeff;
+      if (max_energy_leftover < 0) {
+        max_energy_leftover = 0;
+        coeff = 0;
+      }
+      input_block_[av1_default_scan_orders[tx_size_].scan[j]] = coeff;
+    }
+  }
+
+ protected:
+  int last_nonzero_;
+  TX_SIZE tx_size_;
+  tran_low_t *input_block_;
+  uint8_t *output_block_;
+  uint8_t *output_block_ref_;
+  int size_;
+  int stride_;
+  int pixel_size_;
+  int input_block_size_;
+  int output_block_size_;
+  int bit_depth_;
+  int mask_;
+  FwdTxfmFunc ftxfm_;
+  InvTxfmWithBdFunc full_itxfm_;
+  InvTxfmWithBdFunc partial_itxfm_;
+  ACMRandom rnd_;
+};
+
+TEST_P(PartialIDctTest, RunQuantCheck) {
+  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
+
+  InitMem();
+  for (int i = 0; i < kCountTestBlock; ++i) {
+    // Initialize a test block with input range [-mask_, mask_].
+    if (i == 0) {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = mask_;
+      }
+    } else if (i == 1) {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = -mask_;
+      }
+    } else {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = rnd_.Rand8() % 2 ? mask_ : -mask_;
+      }
+    }
+
+    ftxfm_(input_extreme_block, output_ref_block, size_);
+
+    // quantization with minimum allowed step sizes
+    input_block_[0] = (output_ref_block[0] / 4) * 4;
+    for (int k = 1; k < last_nonzero_; ++k) {
+      const int pos = av1_default_scan_orders[tx_size_].scan[k];
+      input_block_[pos] = (output_ref_block[pos] / 4) * 4;
+    }
+
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: partial inverse transform produces different results";
+  }
+}
+
+TEST_P(PartialIDctTest, ResultsMatch) {
+  for (int i = 0; i < kCountTestBlock; ++i) {
+    InitMem();
+    InitInput();
+
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: partial inverse transform produces different results";
+  }
+}
+
+TEST_P(PartialIDctTest, AddOutputBlock) {
+  for (int i = 0; i < kCountTestBlock; ++i) {
+    InitMem();
+    for (int j = 0; j < last_nonzero_; ++j) {
+      input_block_[av1_default_scan_orders[tx_size_].scan[j]] = 10;
+    }
+
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: Transform results are not correctly added to output.";
+  }
+}
+
+TEST_P(PartialIDctTest, SingleExtremeCoeff) {
+  const int16_t max_coeff = std::numeric_limits<int16_t>::max();
+  const int16_t min_coeff = std::numeric_limits<int16_t>::min();
+  for (int i = 0; i < last_nonzero_; ++i) {
+    memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
+    // Run once for min and once for max.
+    for (int j = 0; j < 2; ++j) {
+      const int coeff = j ? min_coeff : max_coeff;
+
+      memset(output_block_, 0, pixel_size_ * output_block_size_);
+      memset(output_block_ref_, 0, pixel_size_ * output_block_size_);
+      input_block_[av1_default_scan_orders[tx_size_].scan[i]] = coeff;
+
+      ASM_REGISTER_STATE_CHECK(
+          full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+      ASM_REGISTER_STATE_CHECK(
+          partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+      ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                          pixel_size_ * output_block_size_))
+          << "Error: Fails with single coeff of " << coeff << " at " << i
+          << ".";
+    }
+  }
+}
+
+TEST_P(PartialIDctTest, DISABLED_Speed) {
+  // Keep runtime stable with transform size.
+  const int kCountSpeedTestBlock = 500000000 / input_block_size_;
+  InitMem();
+  InitInput();
+
+  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+  }
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
+    partial_itxfm_(input_block_, output_block_, stride_, bit_depth_);
+  }
+  libaom_test::ClearSystemState();
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time =
+      static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
+  printf("idct%dx%d_%d (bitdepth %d) time: %5d ms\n", size_, size_,
+         last_nonzero_, bit_depth_, elapsed_time);
+
+  ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                      pixel_size_ * output_block_size_))
+      << "Error: partial inverse transform produces different results";
+}
+
+using std::tr1::make_tuple;
+
+const PartialInvTxfmParam c_partial_idct_tests[] = {
+#if CONFIG_HIGHBITDEPTH
+  make_tuple(&aom_highbd_fdct4x4_c,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 8, 2),
+  make_tuple(&aom_highbd_fdct4x4_c,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 10, 2),
+  make_tuple(&aom_highbd_fdct4x4_c,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 12, 2),
+  make_tuple(&aom_highbd_fdct4x4_c,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 8, 2),
+  make_tuple(&aom_highbd_fdct4x4_c,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 10, 2),
+  make_tuple(&aom_highbd_fdct4x4_c,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 12, 2),
+#endif  // CONFIG_HIGHBITDEPTH
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_135_add_c>, TX_32X32, 135, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_34_add_c>, TX_32X32, 34, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1_add_c>, TX_32X32, 1, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_256_add_c>, TX_16X16, 256, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_38_add_c>, TX_16X16, 38, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_10_add_c>, TX_16X16, 10, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_1_add_c>, TX_16X16, 1, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_64_add_c>, TX_8X8, 64, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_12_add_c>, TX_8X8, 12, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_1_add_c>, TX_8X8, 1, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_16_add_c>, TX_4X4, 16, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_1_add_c>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(C, PartialIDctTest,
+                        ::testing::ValuesIn(c_partial_idct_tests));
+
+#if HAVE_NEON && !CONFIG_HIGHBITDEPTH
+const PartialInvTxfmParam neon_partial_idct_tests[] = {
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1_add_neon>, TX_32X32, 1, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_10_add_neon>, TX_16X16, 10, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_1_add_neon>, TX_16X16, 1, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_12_add_neon>, TX_8X8, 12, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_1_add_neon>, TX_8X8, 1, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_1_add_neon>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest,
+                        ::testing::ValuesIn(neon_partial_idct_tests));
+#endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2
+const PartialInvTxfmParam sse2_partial_idct_tests[] = {
+#if CONFIG_HIGHBITDEPTH
+  make_tuple(&aom_highbd_fdct4x4_c,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 2),
+  make_tuple(
+      &aom_highbd_fdct4x4_c, &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 10, 2),
+  make_tuple(
+      &aom_highbd_fdct4x4_c, &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 12, 2),
+#endif  // CONFIG_HIGHBITDEPTH
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_sse2>, TX_32X32, 1024, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_sse2>, TX_32X32, 135, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_34_add_sse2>, TX_32X32, 34, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_1_add_sse2>, TX_16X16, 1, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_1_add_sse2>, TX_8X8, 1, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_1_add_sse2>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, PartialIDctTest,
+                        ::testing::ValuesIn(sse2_partial_idct_tests));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+const PartialInvTxfmParam ssse3_partial_idct_tests[] = {
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_ssse3>, TX_32X32, 1024, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_135_add_ssse3>, TX_32X32, 135, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_34_add_ssse3>, TX_32X32, 34, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_64_add_ssse3>, TX_8X8, 64, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_12_add_ssse3>, TX_8X8, 12, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest,
+                        ::testing::ValuesIn(ssse3_partial_idct_tests));
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH
+const PartialInvTxfmParam dspr2_partial_idct_tests[] = {
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_dspr2>, TX_32X32, 1024, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_dspr2>, TX_32X32, 135, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_34_add_dspr2>, TX_32X32, 34, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1_add_dspr2>, TX_32X32, 1, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_256_add_dspr2>, TX_16X16, 256, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_10_add_dspr2>, TX_16X16, 10, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_1_add_dspr2>, TX_16X16, 1, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_64_add_dspr2>, TX_8X8, 64, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_12_add_dspr2>, TX_8X8, 12, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_1_add_dspr2>, TX_8X8, 1, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_16_add_dspr2>, TX_4X4, 16, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_1_add_dspr2>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(DSPR2, PartialIDctTest,
+                        ::testing::ValuesIn(dspr2_partial_idct_tests));
+#endif  // HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH
+
+#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
+const PartialInvTxfmParam msa_partial_idct_tests[] = {
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_msa>, TX_32X32, 1024, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_msa>, TX_32X32, 135, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_34_add_msa>, TX_32X32, 34, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1_add_msa>, TX_32X32, 1, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_256_add_msa>, TX_16X16, 256, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_10_add_msa>, TX_16X16, 10, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_1_add_msa>, TX_16X16, 1, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_64_add_msa>, TX_8X8, 64, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_12_add_msa>, TX_8X8, 12, 8, 1),
+  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
+             &wrapper<aom_idct8x8_1_add_msa>, TX_8X8, 1, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_16_add_msa>, TX_4X4, 16, 8, 1),
+  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
+             &wrapper<aom_idct4x4_1_add_msa>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(MSA, PartialIDctTest,
+                        ::testing::ValuesIn(msa_partial_idct_tests));
+#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
+
+}  // namespace
diff --git a/third_party/aom/test/quantize_test.cc b/third_party/aom/test/quantize_test.cc
new file mode 100644
index 000000000..4f61484a2
--- /dev/null
+++ b/third_party/aom/test/quantize_test.cc
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vp8/common/blockd.h"
+#include "vp8/common/onyx.h"
+#include "vp8/encoder/block.h"
+#include "vp8/encoder/onyx_int.h"
+#include "vp8/encoder/quantize.h"
+#include "aom/aom_integer.h"
+#include "aom_mem/aom_mem.h"
+
+namespace {
+#if !CONFIG_AOM_QM
+
+const int kNumBlocks = 25;
+const int kNumBlockEntries = 16;
+
+typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d);
+
+typedef std::tr1::tuple<VP8Quantize, VP8Quantize> VP8QuantizeParam;
+
+using libaom_test::ACMRandom;
+using std::tr1::make_tuple;
+
+// Create and populate a VP8_COMP instance which has a complete set of
+// quantization inputs as well as a second MACROBLOCKD for output.
+class QuantizeTestBase {
+ public:
+  virtual ~QuantizeTestBase() {
+    vp8_remove_compressor(&vp8_comp_);
+    vp8_comp_ = NULL;
+    aom_free(macroblockd_dst_);
+    macroblockd_dst_ = NULL;
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void SetupCompressor() {
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+
+    // The full configuration is necessary to generate the quantization tables.
+    VP8_CONFIG vp8_config;
+    memset(&vp8_config, 0, sizeof(vp8_config));
+
+    vp8_comp_ = vp8_create_compressor(&vp8_config);
+
+    // Set the tables based on a quantizer of 0.
+    vp8_set_quantizer(vp8_comp_, 0);
+
+    // Set up all the block/blockd pointers for the mb in vp8_comp_.
+    vp8cx_frame_init_quantizer(vp8_comp_);
+
+    // Copy macroblockd from the reference to get pre-set-up dequant values.
+    macroblockd_dst_ = reinterpret_cast<MACROBLOCKD *>(
+        aom_memalign(32, sizeof(*macroblockd_dst_)));
+    memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
+    // Fix block pointers - currently they point to the blocks in the reference
+    // structure.
+    vp8_setup_block_dptrs(macroblockd_dst_);
+  }
+
+  void UpdateQuantizer(int q) {
+    vp8_set_quantizer(vp8_comp_, q);
+
+    memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
+    vp8_setup_block_dptrs(macroblockd_dst_);
+  }
+
+  void FillCoeffConstant(int16_t c) {
+    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
+      vp8_comp_->mb.coeff[i] = c;
+    }
+  }
+
+  void FillCoeffRandom() {
+    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
+      vp8_comp_->mb.coeff[i] = rnd_.Rand8();
+    }
+  }
+
+  void CheckOutput() {
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.qcoeff, macroblockd_dst_->qcoeff,
+                        sizeof(*macroblockd_dst_->qcoeff) * kNumBlocks *
+                            kNumBlockEntries))
+        << "qcoeff mismatch";
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.dqcoeff, macroblockd_dst_->dqcoeff,
+                        sizeof(*macroblockd_dst_->dqcoeff) * kNumBlocks *
+                            kNumBlockEntries))
+        << "dqcoeff mismatch";
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.eobs, macroblockd_dst_->eobs,
+                        sizeof(*macroblockd_dst_->eobs) * kNumBlocks))
+        << "eobs mismatch";
+  }
+
+  VP8_COMP *vp8_comp_;
+  MACROBLOCKD *macroblockd_dst_;
+
+ private:
+  ACMRandom rnd_;
+};
+
+class QuantizeTest : public QuantizeTestBase,
+                     public ::testing::TestWithParam<VP8QuantizeParam> {
+ protected:
+  virtual void SetUp() {
+    SetupCompressor();
+    asm_quant_ = GET_PARAM(0);
+    c_quant_ = GET_PARAM(1);
+  }
+
+  void RunComparison() {
+    for (int i = 0; i < kNumBlocks; ++i) {
+      ASM_REGISTER_STATE_CHECK(
+          c_quant_(&vp8_comp_->mb.block[i], &vp8_comp_->mb.e_mbd.block[i]));
+      ASM_REGISTER_STATE_CHECK(
+          asm_quant_(&vp8_comp_->mb.block[i], &macroblockd_dst_->block[i]));
+    }
+
+    CheckOutput();
+  }
+
+ private:
+  VP8Quantize asm_quant_;
+  VP8Quantize c_quant_;
+};
+
+TEST_P(QuantizeTest, TestZeroInput) {
+  FillCoeffConstant(0);
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestLargeNegativeInput) {
+  FillCoeffConstant(0);
+  // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
+  // like BUG=883 where the constant being compared was incorrectly initialized.
+  vp8_comp_->mb.coeff[0] = -8191;
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestRandomInput) {
+  FillCoeffRandom();
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestMultipleQ) {
+  for (int q = 0; q < QINDEX_RANGE; ++q) {
+    UpdateQuantizer(q);
+    FillCoeffRandom();
+    RunComparison();
+  }
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, QuantizeTest,
+    ::testing::Values(
+        make_tuple(&vp8_fast_quantize_b_sse2, &vp8_fast_quantize_b_c),
+        make_tuple(&vp8_regular_quantize_b_sse2, &vp8_regular_quantize_b_c)));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest,
+                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_ssse3,
+                                                     &vp8_fast_quantize_b_c)));
+#endif  // HAVE_SSSE3
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, QuantizeTest,
+    ::testing::Values(make_tuple(&vp8_regular_quantize_b_sse4_1,
+                                 &vp8_regular_quantize_b_c)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
+                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
+                                                     &vp8_fast_quantize_b_c)));
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, QuantizeTest,
+    ::testing::Values(
+        make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
+        make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
+#endif  // HAVE_MSA
+#endif  // CONFIG_AOM_QM
+}  // namespace
diff --git a/third_party/aom/test/realtime_test.cc b/third_party/aom/test/realtime_test.cc
new file mode 100644
index 000000000..ffe4a3146
--- /dev/null
+++ b/third_party/aom/test/realtime_test.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+const int kVideoSourceWidth = 320;
+const int kVideoSourceHeight = 240;
+const int kFramesToEncode = 2;
+
+class RealtimeTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> {
+ protected:
+  RealtimeTest() : EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
+  virtual ~RealtimeTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    cfg_.g_lag_in_frames = 0;
+    SetMode(::libaom_test::kRealTime);
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    // TODO(tomfinegan): We're changing the pass value here to make sure
+    // we get frames when real time mode is combined with |g_pass| set to
+    // AOM_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
+    // the pass value based on the mode passed into EncoderTest::SetMode(),
+    // which overrides the one specified in SetUp() above.
+    cfg_.g_pass = AOM_RC_FIRST_PASS;
+  }
+  virtual void FramePktHook(const aom_codec_cx_pkt_t * /*pkt*/) {
+    frame_packets_++;
+  }
+
+  int frame_packets_;
+};
+
+TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
+  ::libaom_test::RandomVideoSource video;
+  video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
+  video.set_limit(kFramesToEncode);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_EQ(kFramesToEncode, frame_packets_);
+}
+
+AV1_INSTANTIATE_TEST_CASE(RealtimeTest,
+                          ::testing::Values(::libaom_test::kRealTime));
+
+}  // namespace
diff --git a/third_party/aom/test/register_state_check.h b/third_party/aom/test/register_state_check.h
new file mode 100644
index 000000000..330820173
--- /dev/null
+++ b/third_party/aom/test/register_state_check.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_REGISTER_STATE_CHECK_H_
+#define TEST_REGISTER_STATE_CHECK_H_
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+
+// ASM_REGISTER_STATE_CHECK(asm_function)
+//   Minimally validates the environment pre & post function execution. This
+//   variant should be used with assembly functions which are not expected to
+//   fully restore the system state. See platform implementations of
+//   RegisterStateCheck for details.
+//
+// API_REGISTER_STATE_CHECK(api_function)
+//   Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any
+//   additional checks to ensure the environment is in a consistent state pre &
+//   post function execution. This variant should be used with API functions.
+//   See platform implementations of RegisterStateCheckXXX for details.
+//
+
+#if defined(_WIN64)
+
+#undef NOMINMAX
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <winnt.h>
+
+inline bool operator==(const M128A &lhs, const M128A &rhs) {
+  return (lhs.Low == rhs.Low && lhs.High == rhs.High);
+}
+
+namespace libaom_test {
+
+// Compares the state of xmm[6-15] at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// Windows x64.
+class RegisterStateCheck {
+ public:
+  RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
+  ~RegisterStateCheck() { EXPECT_TRUE(Check()); }
+
+ private:
+  static bool StoreRegisters(CONTEXT *const context) {
+    const HANDLE this_thread = GetCurrentThread();
+    EXPECT_TRUE(this_thread != NULL);
+    context->ContextFlags = CONTEXT_FLOATING_POINT;
+    const bool context_saved = GetThreadContext(this_thread, context) == TRUE;
+    EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError();
+    return context_saved;
+  }
+
+  // Compares the register state. Returns true if the states match.
+  bool Check() const {
+    if (!initialized_) return false;
+    CONTEXT post_context;
+    if (!StoreRegisters(&post_context)) return false;
+
+    const M128A *xmm_pre = &pre_context_.Xmm6;
+    const M128A *xmm_post = &post_context.Xmm6;
+    for (int i = 6; i <= 15; ++i) {
+      EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!";
+      ++xmm_pre;
+      ++xmm_post;
+    }
+    return !testing::Test::HasNonfatalFailure();
+  }
+
+  bool initialized_;
+  CONTEXT pre_context_;
+};
+
+#define ASM_REGISTER_STATE_CHECK(statement)    \
+  do {                                         \
+    libaom_test::RegisterStateCheck reg_check; \
+    statement;                                 \
+  } while (false)
+
+}  // namespace libaom_test
+
+#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && !CONFIG_SHARED && \
+    HAVE_NEON_ASM && CONFIG_AV1
+
+extern "C" {
+// Save the d8-d15 registers into store.
+void aom_push_neon(int64_t *store);
+}
+
+namespace libaom_test {
+
+// Compares the state of d8-d15 at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// arm platform.
+class RegisterStateCheck {
+ public:
+  RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); }
+  ~RegisterStateCheck() { EXPECT_TRUE(Check()); }
+
+ private:
+  static bool StoreRegisters(int64_t store[8]) {
+    aom_push_neon(store);
+    return true;
+  }
+
+  // Compares the register state. Returns true if the states match.
+  bool Check() const {
+    if (!initialized_) return false;
+    int64_t post_store[8];
+    aom_push_neon(post_store);
+    for (int i = 0; i < 8; ++i) {
+      EXPECT_EQ(pre_store_[i], post_store[i]) << "d" << i + 8
+                                              << " has been modified";
+    }
+    return !testing::Test::HasNonfatalFailure();
+  }
+
+  bool initialized_;
+  int64_t pre_store_[8];
+};
+
+#define ASM_REGISTER_STATE_CHECK(statement)    \
+  do {                                         \
+    libaom_test::RegisterStateCheck reg_check; \
+    statement;                                 \
+  } while (false)
+
+}  // namespace libaom_test
+
+#else
+
+namespace libaom_test {
+
+class RegisterStateCheck {};
+#define ASM_REGISTER_STATE_CHECK(statement) statement
+
+}  // namespace libaom_test
+
+#endif  // _WIN64
+
+#if ARCH_X86 || ARCH_X86_64
+#if defined(__GNUC__)
+
+namespace libaom_test {
+
+// Checks the FPU tag word pre/post execution to ensure emms has been called.
+class RegisterStateCheckMMX {
+ public:
+  RegisterStateCheckMMX() {
+    __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_));
+  }
+  ~RegisterStateCheckMMX() { EXPECT_TRUE(Check()); }
+
+ private:
+  // Checks the FPU tag word pre/post execution, returning false if not cleared
+  // to 0xffff.
+  bool Check() const {
+    EXPECT_EQ(0xffff, pre_fpu_env_[4])
+        << "FPU was in an inconsistent state prior to call";
+
+    uint16_t post_fpu_env[14];
+    __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env));
+    EXPECT_EQ(0xffff, post_fpu_env[4])
+        << "FPU was left in an inconsistent state after call";
+    return !testing::Test::HasNonfatalFailure();
+  }
+
+  uint16_t pre_fpu_env_[14];
+};
+
+#define API_REGISTER_STATE_CHECK(statement)       \
+  do {                                            \
+    libaom_test::RegisterStateCheckMMX reg_check; \
+    ASM_REGISTER_STATE_CHECK(statement);          \
+  } while (false)
+
+}  // namespace libaom_test
+
+#endif  // __GNUC__
+#endif  // ARCH_X86 || ARCH_X86_64
+
+#ifndef API_REGISTER_STATE_CHECK
+#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK
+#endif
+
+#endif  // TEST_REGISTER_STATE_CHECK_H_
diff --git a/third_party/aom/test/resize_test.cc b/third_party/aom/test/resize_test.cc
new file mode 100644
index 000000000..994b30117
--- /dev/null
+++ b/third_party/aom/test/resize_test.cc
@@ -0,0 +1,717 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/video_source.h"
+#include "test/util.h"
+
+// Enable(1) or Disable(0) writing of the compressed bitstream.
+#define WRITE_COMPRESSED_STREAM 0
+
+namespace {
+
+#if WRITE_COMPRESSED_STREAM
+static void mem_put_le16(char *const mem, unsigned int val) {
+  mem[0] = val;
+  mem[1] = val >> 8;
+}
+
+static void mem_put_le32(char *const mem, unsigned int val) {
+  mem[0] = val;
+  mem[1] = val >> 8;
+  mem[2] = val >> 16;
+  mem[3] = val >> 24;
+}
+
+static void write_ivf_file_header(const aom_codec_enc_cfg_t *const cfg,
+                                  int frame_cnt, FILE *const outfile) {
+  char header[32];
+
+  header[0] = 'D';
+  header[1] = 'K';
+  header[2] = 'I';
+  header[3] = 'F';
+  mem_put_le16(header + 4, 0);                    /* version */
+  mem_put_le16(header + 6, 32);                   /* headersize */
+  mem_put_le32(header + 8, 0x30395056);           /* fourcc (av1) */
+  mem_put_le16(header + 12, cfg->g_w);            /* width */
+  mem_put_le16(header + 14, cfg->g_h);            /* height */
+  mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
+  mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
+  mem_put_le32(header + 24, frame_cnt);           /* length */
+  mem_put_le32(header + 28, 0);                   /* unused */
+
+  (void)fwrite(header, 1, 32, outfile);
+}
+
+static void write_ivf_frame_size(FILE *const outfile, const size_t size) {
+  char header[4];
+  mem_put_le32(header, static_cast<unsigned int>(size));
+  (void)fwrite(header, 1, 4, outfile);
+}
+
+static void write_ivf_frame_header(const aom_codec_cx_pkt_t *const pkt,
+                                   FILE *const outfile) {
+  char header[12];
+  aom_codec_pts_t pts;
+
+  if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return;
+
+  pts = pkt->data.frame.pts;
+  mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz));
+  mem_put_le32(header + 4, pts & 0xFFFFFFFF);
+  mem_put_le32(header + 8, pts >> 32);
+
+  (void)fwrite(header, 1, 12, outfile);
+}
+#endif  // WRITE_COMPRESSED_STREAM
+
+const unsigned int kInitialWidth = 320;
+const unsigned int kInitialHeight = 240;
+
+struct FrameInfo {
+  FrameInfo(aom_codec_pts_t _pts, unsigned int _w, unsigned int _h)
+      : pts(_pts), w(_w), h(_h) {}
+
+  aom_codec_pts_t pts;
+  unsigned int w;
+  unsigned int h;
+};
+
+void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w,
+                         unsigned int initial_h, unsigned int *w,
+                         unsigned int *h, int flag_codec) {
+  if (frame < 10) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 20) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 30) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 40) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 50) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 60) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 70) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 80) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 90) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 100) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 110) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 120) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 130) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 140) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 150) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 160) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 170) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 180) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 190) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 200) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 210) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 220) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 230) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  if (frame < 240) {
+    *w = initial_w * 3 / 4;
+    *h = initial_h * 3 / 4;
+    return;
+  }
+  if (frame < 250) {
+    *w = initial_w / 2;
+    *h = initial_h / 2;
+    return;
+  }
+  if (frame < 260) {
+    *w = initial_w;
+    *h = initial_h;
+    return;
+  }
+  // Go down very low.
+  if (frame < 270) {
+    *w = initial_w / 4;
+    *h = initial_h / 4;
+    return;
+  }
+  if (flag_codec == 1) {
+    // Cases that only works for AV1.
+    // For AV1: Swap width and height of original.
+    if (frame < 320) {
+      *w = initial_h;
+      *h = initial_w;
+      return;
+    }
+  }
+  *w = initial_w;
+  *h = initial_h;
+}
+
+class ResizingVideoSource : public ::libaom_test::DummyVideoSource {
+ public:
+  ResizingVideoSource() {
+    SetSize(kInitialWidth, kInitialHeight);
+    limit_ = 350;
+  }
+  int flag_codec_;
+  virtual ~ResizingVideoSource() {}
+
+ protected:
+  virtual void Next() {
+    ++frame_;
+    unsigned int width;
+    unsigned int height;
+    ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
+                        flag_codec_);
+    SetSize(width, height);
+    FillFrame();
+  }
+};
+
+class ResizeTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> {
+ protected:
+  ResizeTest() : EncoderTest(GET_PARAM(0)) {}
+
+  virtual ~ResizeTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+  }
+
+  virtual void DecompressedFrameHook(const aom_image_t &img,
+                                     aom_codec_pts_t pts) {
+    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
+  }
+
+  std::vector<FrameInfo> frame_info_list_;
+};
+
+TEST_P(ResizeTest, TestExternalResizeWorks) {
+  ResizingVideoSource video;
+  video.flag_codec_ = 0;
+  cfg_.g_lag_in_frames = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const unsigned int frame = static_cast<unsigned>(info->pts);
+    unsigned int expected_w;
+    unsigned int expected_h;
+    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
+                        &expected_h, 0);
+    EXPECT_EQ(expected_w, info->w) << "Frame " << frame
+                                   << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h) << "Frame " << frame
+                                   << " had unexpected height";
+  }
+}
+
+const unsigned int kStepDownFrame = 3;
+const unsigned int kStepUpFrame = 6;
+
+class ResizeInternalTest : public ResizeTest {
+ protected:
+#if WRITE_COMPRESSED_STREAM
+  ResizeInternalTest()
+      : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {}
+#else
+  ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {}
+#endif
+
+  virtual ~ResizeInternalTest() {}
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+#if WRITE_COMPRESSED_STREAM
+    outfile_ = fopen("av10-2-05-resize.ivf", "wb");
+#endif
+  }
+
+  virtual void EndPassHook() {
+#if WRITE_COMPRESSED_STREAM
+    if (outfile_) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        write_ivf_file_header(&cfg_, out_frames_, outfile_);
+      fclose(outfile_);
+      outfile_ = NULL;
+    }
+#endif
+  }
+
+  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                                  libaom_test::Encoder *encoder) {
+    if (change_config_) {
+      int new_q = 60;
+      if (video->frame() == 0) {
+        struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
+        encoder->Control(AOME_SET_SCALEMODE, &mode);
+      }
+      if (video->frame() == 1) {
+        struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
+        encoder->Control(AOME_SET_SCALEMODE, &mode);
+        cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q;
+        encoder->Config(&cfg_);
+      }
+    } else {
+      if (video->frame() == kStepDownFrame) {
+        struct aom_scaling_mode mode = { AOME_FOURFIVE, AOME_THREEFIVE };
+        encoder->Control(AOME_SET_SCALEMODE, &mode);
+      }
+      if (video->frame() == kStepUpFrame) {
+        struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
+        encoder->Control(AOME_SET_SCALEMODE, &mode);
+      }
+    }
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
+    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
+  }
+
+#if WRITE_COMPRESSED_STREAM
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_);
+
+    // Write frame header and data.
+    write_ivf_frame_header(pkt, outfile_);
+    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
+  }
+#endif
+
+  double frame0_psnr_;
+  bool change_config_;
+#if WRITE_COMPRESSED_STREAM
+  FILE *outfile_;
+  unsigned int out_frames_;
+#endif
+};
+
+TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 10);
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  change_config_ = false;
+
+  // q picked such that initial keyframe on this clip is ~30dB PSNR
+  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
+
+  // If the number of frames being encoded is smaller than g_lag_in_frames
+  // the encoded frame is unavailable using the current API. Comparing
+  // frames to detect mismatch would then not be possible. Set
+  // g_lag_in_frames = 0 to get around this.
+  cfg_.g_lag_in_frames = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const aom_codec_pts_t pts = info->pts;
+    if (pts >= kStepDownFrame && pts < kStepUpFrame) {
+      ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
+      ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
+    } else {
+      EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width";
+      EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height";
+    }
+  }
+}
+
+TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 10);
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
+  change_config_ = true;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+class ResizeRealtimeTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int> {
+ protected:
+  ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ResizeRealtimeTest() {}
+
+  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                                  libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(AV1E_SET_AQ_MODE, 3);
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+    }
+
+    if (change_bitrate_ && video->frame() == 120) {
+      change_bitrate_ = false;
+      cfg_.rc_target_bitrate = 500;
+      encoder->Config(&cfg_);
+    }
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+  }
+
+  virtual void DecompressedFrameHook(const aom_image_t &img,
+                                     aom_codec_pts_t pts) {
+    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
+  }
+
+  virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) {
+    double mismatch_psnr = compute_psnr(img1, img2);
+    mismatch_psnr_ += mismatch_psnr;
+    ++mismatch_nframes_;
+  }
+
+  unsigned int GetMismatchFrames() { return mismatch_nframes_; }
+
+  void DefaultConfig() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.kf_mode = AOM_KF_AUTO;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
+    // Enable dropped frames.
+    cfg_.rc_dropframe_thresh = 1;
+    // Enable error_resilience mode.
+    cfg_.g_error_resilient = 1;
+    // Enable dynamic resizing.
+    cfg_.rc_resize_allowed = 1;
+    // Run at low bitrate.
+    cfg_.rc_target_bitrate = 200;
+  }
+
+  std::vector<FrameInfo> frame_info_list_;
+  int set_cpu_used_;
+  bool change_bitrate_;
+  double mismatch_psnr_;
+  int mismatch_nframes_;
+};
+
+TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
+  ResizingVideoSource video;
+  video.flag_codec_ = 1;
+  DefaultConfig();
+  // Disable internal resize for this test.
+  cfg_.rc_resize_allowed = 0;
+  change_bitrate_ = false;
+  mismatch_psnr_ = 0.0;
+  mismatch_nframes_ = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const unsigned int frame = static_cast<unsigned>(info->pts);
+    unsigned int expected_w;
+    unsigned int expected_h;
+    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
+                        &expected_h, 1);
+    EXPECT_EQ(expected_w, info->w) << "Frame " << frame
+                                   << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h) << "Frame " << frame
+                                   << " had unexpected height";
+    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+  }
+}
+
+// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
+// Run at low bitrate, with resize_allowed = 1, and verify that we get
+// one resize down event.
+TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 299);
+  DefaultConfig();
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
+  change_bitrate_ = false;
+  mismatch_psnr_ = 0.0;
+  mismatch_nframes_ = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  unsigned int last_w = cfg_.g_w;
+  unsigned int last_h = cfg_.g_h;
+  int resize_count = 0;
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    if (info->w != last_w || info->h != last_h) {
+      // Verify that resize down occurs.
+      ASSERT_LT(info->w, last_w);
+      ASSERT_LT(info->h, last_h);
+      last_w = info->w;
+      last_h = info->h;
+      resize_count++;
+    }
+  }
+
+#if CONFIG_AV1_DECODER
+  // Verify that we get 1 resize down event in this test.
+  ASSERT_EQ(1, resize_count) << "Resizing should occur.";
+  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+#else
+  printf("Warning: AV1 decoder unavailable, unable to check resize count!\n");
+#endif
+}
+
+// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
+// Start at low target bitrate, raise the bitrate in the middle of the clip,
+// scaling-up should occur after bitrate changed.
+TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 359);
+  DefaultConfig();
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
+  change_bitrate_ = true;
+  mismatch_psnr_ = 0.0;
+  mismatch_nframes_ = 0;
+  // Disable dropped frames.
+  cfg_.rc_dropframe_thresh = 0;
+  // Starting bitrate low.
+  cfg_.rc_target_bitrate = 80;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  unsigned int last_w = cfg_.g_w;
+  unsigned int last_h = cfg_.g_h;
+  int resize_count = 0;
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    if (info->w != last_w || info->h != last_h) {
+      resize_count++;
+      if (resize_count == 1) {
+        // Verify that resize down occurs.
+        ASSERT_LT(info->w, last_w);
+        ASSERT_LT(info->h, last_h);
+      } else if (resize_count == 2) {
+        // Verify that resize up occurs.
+        ASSERT_GT(info->w, last_w);
+        ASSERT_GT(info->h, last_h);
+      }
+      last_w = info->w;
+      last_h = info->h;
+    }
+  }
+
+#if CONFIG_AV1_DECODER
+  // Verify that we get 2 resize events in this test.
+  ASSERT_EQ(resize_count, 2) << "Resizing should occur twice.";
+  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+#else
+  printf("Warning: AV1 decoder unavailable, unable to check resize count!\n");
+#endif
+}
+
+aom_img_fmt_t CspForFrameNumber(int frame) {
+  if (frame < 10) return AOM_IMG_FMT_I420;
+  if (frame < 20) return AOM_IMG_FMT_I444;
+  return AOM_IMG_FMT_I420;
+}
+
+class ResizeCspTest : public ResizeTest {
+ protected:
+#if WRITE_COMPRESSED_STREAM
+  ResizeCspTest()
+      : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {}
+#else
+  ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {}
+#endif
+
+  virtual ~ResizeCspTest() {}
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+#if WRITE_COMPRESSED_STREAM
+    outfile_ = fopen("av11-2-05-cspchape.ivf", "wb");
+#endif
+  }
+
+  virtual void EndPassHook() {
+#if WRITE_COMPRESSED_STREAM
+    if (outfile_) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        write_ivf_file_header(&cfg_, out_frames_, outfile_);
+      fclose(outfile_);
+      outfile_ = NULL;
+    }
+#endif
+  }
+
+  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                                  libaom_test::Encoder *encoder) {
+    if (CspForFrameNumber(video->frame()) != AOM_IMG_FMT_I420 &&
+        cfg_.g_profile != 1) {
+      cfg_.g_profile = 1;
+      encoder->Config(&cfg_);
+    }
+    if (CspForFrameNumber(video->frame()) == AOM_IMG_FMT_I420 &&
+        cfg_.g_profile != 0) {
+      cfg_.g_profile = 0;
+      encoder->Config(&cfg_);
+    }
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
+    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
+  }
+
+#if WRITE_COMPRESSED_STREAM
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_);
+
+    // Write frame header and data.
+    write_ivf_frame_header(pkt, outfile_);
+    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
+  }
+#endif
+
+  double frame0_psnr_;
+#if WRITE_COMPRESSED_STREAM
+  FILE *outfile_;
+  unsigned int out_frames_;
+#endif
+};
+
+class ResizingCspVideoSource : public ::libaom_test::DummyVideoSource {
+ public:
+  ResizingCspVideoSource() {
+    SetSize(kInitialWidth, kInitialHeight);
+    limit_ = 30;
+  }
+
+  virtual ~ResizingCspVideoSource() {}
+
+ protected:
+  virtual void Next() {
+    ++frame_;
+    SetImageFormat(CspForFrameNumber(frame_));
+    FillFrame();
+  }
+};
+
+TEST_P(ResizeCspTest, TestResizeCspWorks) {
+  ResizingCspVideoSource video;
+  init_flags_ = AOM_CODEC_USE_PSNR;
+  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
+  cfg_.g_lag_in_frames = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+AV1_INSTANTIATE_TEST_CASE(ResizeTest,
+                          ::testing::Values(::libaom_test::kRealTime));
+AV1_INSTANTIATE_TEST_CASE(ResizeInternalTest,
+                          ::testing::Values(::libaom_test::kOnePassBest));
+AV1_INSTANTIATE_TEST_CASE(ResizeRealtimeTest,
+                          ::testing::Values(::libaom_test::kRealTime),
+                          ::testing::Range(5, 9));
+AV1_INSTANTIATE_TEST_CASE(ResizeCspTest,
+                          ::testing::Values(::libaom_test::kRealTime));
+}  // namespace
diff --git a/third_party/aom/test/sad_test.cc b/third_party/aom/test/sad_test.cc
new file mode 100644
index 000000000..c3b5dac42
--- /dev/null
+++ b/third_party/aom/test/sad_test.cc
@@ -0,0 +1,1172 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "aom/aom_codec.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/mem.h"
+
+typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr, int src_stride,
+                                   const uint8_t *ref_ptr, int ref_stride);
+typedef std::tr1::tuple<int, int, SadMxNFunc, int> SadMxNParam;
+
+typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
+                                  const uint8_t *ref_ptr, int ref_stride,
+                                  const uint8_t *second_pred);
+typedef std::tr1::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam;
+
+typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride,
+                             const uint8_t *const ref_ptr[], int ref_stride,
+                             uint32_t *sad_array);
+typedef std::tr1::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param;
+
+using libaom_test::ACMRandom;
+
+namespace {
+class SADTestBase : public ::testing::Test {
+ public:
+  SADTestBase(int width, int height, int bit_depth)
+      : width_(width), height_(height), bd_(bit_depth) {}
+
+  static void SetUpTestCase() {
+    source_data8_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kDataBlockSize));
+    reference_data8_ = reinterpret_cast<uint8_t *>(
+        aom_memalign(kDataAlignment, kDataBufferSize));
+    second_pred8_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    source_data16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, kDataBlockSize * sizeof(uint16_t)));
+    reference_data16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t)));
+    second_pred16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+  }
+
+  static void TearDownTestCase() {
+    aom_free(source_data8_);
+    source_data8_ = NULL;
+    aom_free(reference_data8_);
+    reference_data8_ = NULL;
+    aom_free(second_pred8_);
+    second_pred8_ = NULL;
+    aom_free(source_data16_);
+    source_data16_ = NULL;
+    aom_free(reference_data16_);
+    reference_data16_ = NULL;
+    aom_free(second_pred16_);
+    second_pred16_ = NULL;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  // Handle up to 4 128x128 blocks, with stride up to 256
+  static const int kDataAlignment = 16;
+  static const int kDataBlockSize = 128 * 256;
+  static const int kDataBufferSize = 4 * kDataBlockSize;
+
+  virtual void SetUp() {
+    if (bd_ == -1) {
+      use_high_bit_depth_ = false;
+      bit_depth_ = AOM_BITS_8;
+      source_data_ = source_data8_;
+      reference_data_ = reference_data8_;
+      second_pred_ = second_pred8_;
+#if CONFIG_HIGHBITDEPTH
+    } else {
+      use_high_bit_depth_ = true;
+      bit_depth_ = static_cast<aom_bit_depth_t>(bd_);
+      source_data_ = CONVERT_TO_BYTEPTR(source_data16_);
+      reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
+      second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
+#endif  // CONFIG_HIGHBITDEPTH
+    }
+    mask_ = (1 << bit_depth_) - 1;
+    source_stride_ = (width_ + 31) & ~31;
+    reference_stride_ = width_ * 2;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  virtual uint8_t *GetReference(int block_idx) {
+#if CONFIG_HIGHBITDEPTH
+    if (use_high_bit_depth_)
+      return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
+                                block_idx * kDataBlockSize);
+#endif  // CONFIG_HIGHBITDEPTH
+    return reference_data_ + block_idx * kDataBlockSize;
+  }
+
+  // Sum of Absolute Differences. Given two blocks, calculate the absolute
+  // difference between two pixels in the same relative location; accumulate.
+  unsigned int ReferenceSAD(int block_idx) {
+    unsigned int sad = 0;
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const source8 = source_data_;
+#if CONFIG_HIGHBITDEPTH
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+#endif  // CONFIG_HIGHBITDEPTH
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          sad += abs(source8[h * source_stride_ + w] -
+                     reference8[h * reference_stride_ + w]);
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          sad += abs(source16[h * source_stride_ + w] -
+                     reference16[h * reference_stride_ + w]);
+#endif  // CONFIG_HIGHBITDEPTH
+        }
+      }
+    }
+    return sad;
+  }
+
+  // Sum of Absolute Differences Average. Given two blocks, and a prediction
+  // calculate the absolute difference between one pixel and average of the
+  // corresponding and predicted pixels; accumulate.
+  unsigned int ReferenceSADavg(int block_idx) {
+    unsigned int sad = 0;
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const source8 = source_data_;
+    const uint8_t *const second_pred8 = second_pred_;
+#if CONFIG_HIGHBITDEPTH
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+    const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+#endif  // CONFIG_HIGHBITDEPTH
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          const int tmp = second_pred8[h * width_ + w] +
+                          reference8[h * reference_stride_ + w];
+          const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+          sad += abs(source8[h * source_stride_ + w] - comp_pred);
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          const int tmp = second_pred16[h * width_ + w] +
+                          reference16[h * reference_stride_ + w];
+          const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+          sad += abs(source16[h * source_stride_ + w] - comp_pred);
+#endif  // CONFIG_HIGHBITDEPTH
+        }
+      }
+    }
+    return sad;
+  }
+
+  void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
+    uint8_t *data8 = data;
+#if CONFIG_HIGHBITDEPTH
+    uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
+#endif  // CONFIG_HIGHBITDEPTH
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          data16[h * stride + w] = fill_constant;
+#endif  // CONFIG_HIGHBITDEPTH
+        }
+      }
+    }
+  }
+
+  void FillRandom(uint8_t *data, int stride) {
+    uint8_t *data8 = data;
+#if CONFIG_HIGHBITDEPTH
+    uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
+#endif  // CONFIG_HIGHBITDEPTH
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          data8[h * stride + w] = rnd_.Rand8();
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          data16[h * stride + w] = rnd_.Rand16() & mask_;
+#endif  // CONFIG_HIGHBITDEPTH
+        }
+      }
+    }
+  }
+
+  int width_, height_, mask_, bd_;
+  aom_bit_depth_t bit_depth_;
+  static uint8_t *source_data_;
+  static uint8_t *reference_data_;
+  static uint8_t *second_pred_;
+  int source_stride_;
+  bool use_high_bit_depth_;
+  static uint8_t *source_data8_;
+  static uint8_t *reference_data8_;
+  static uint8_t *second_pred8_;
+  static uint16_t *source_data16_;
+  static uint16_t *reference_data16_;
+  static uint16_t *second_pred16_;
+  int reference_stride_;
+
+  ACMRandom rnd_;
+};
+
+class SADx4Test : public SADTestBase,
+                  public ::testing::WithParamInterface<SadMxNx4Param> {
+ public:
+  SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  void SADs(unsigned int *results) {
+    const uint8_t *references[] = { GetReference(0), GetReference(1),
+                                    GetReference(2), GetReference(3) };
+
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(
+        source_data_, source_stride_, references, reference_stride_, results));
+  }
+
+  void CheckSADs() {
+    unsigned int reference_sad, exp_sad[4];
+
+    SADs(exp_sad);
+    for (int block = 0; block < 4; ++block) {
+      reference_sad = ReferenceSAD(block);
+
+      EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
+    }
+  }
+};
+
+class SADTest : public SADTestBase,
+                public ::testing::WithParamInterface<SadMxNParam> {
+ public:
+  SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  unsigned int SAD(int block_idx) {
+    unsigned int ret;
+    const uint8_t *const reference = GetReference(block_idx);
+
+    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_));
+    return ret;
+  }
+
+  void CheckSAD() {
+    const unsigned int reference_sad = ReferenceSAD(0);
+    const unsigned int exp_sad = SAD(0);
+
+    ASSERT_EQ(reference_sad, exp_sad);
+  }
+
+  void SpeedSAD() {
+    int test_count = 20000000;
+    while (test_count > 0) {
+      SAD(0);
+      test_count -= 1;
+    }
+  }
+};
+
+class SADavgTest : public SADTestBase,
+                   public ::testing::WithParamInterface<SadMxNAvgParam> {
+ public:
+  SADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  unsigned int SAD_avg(int block_idx) {
+    unsigned int ret;
+    const uint8_t *const reference = GetReference(block_idx);
+
+    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_,
+                                                second_pred_));
+    return ret;
+  }
+
+  void CheckSAD() {
+    const unsigned int reference_sad = ReferenceSADavg(0);
+    const unsigned int exp_sad = SAD_avg(0);
+
+    ASSERT_EQ(reference_sad, exp_sad);
+  }
+};
+
+uint8_t *SADTestBase::source_data_ = NULL;
+uint8_t *SADTestBase::reference_data_ = NULL;
+uint8_t *SADTestBase::second_pred_ = NULL;
+uint8_t *SADTestBase::source_data8_ = NULL;
+uint8_t *SADTestBase::reference_data8_ = NULL;
+uint8_t *SADTestBase::second_pred8_ = NULL;
+uint16_t *SADTestBase::source_data16_ = NULL;
+uint16_t *SADTestBase::reference_data16_ = NULL;
+uint16_t *SADTestBase::second_pred16_ = NULL;
+
+TEST_P(SADTest, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, mask_);
+  CheckSAD();
+}
+
+TEST_P(SADTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(reference_data_, reference_stride_, 0);
+  CheckSAD();
+}
+
+TEST_P(SADTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 2000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(reference_data_, reference_stride_);
+    CheckSAD();
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+#define SPEED_TEST (0)
+#if SPEED_TEST
+TEST_P(SADTest, Speed) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  SpeedSAD();
+  source_stride_ = tmp_stride;
+}
+#endif
+
+TEST_P(SADavgTest, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, mask_);
+  FillConstant(second_pred_, width_, 0);
+  CheckSAD();
+}
+TEST_P(SADavgTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(reference_data_, reference_stride_, 0);
+  FillConstant(second_pred_, width_, 0);
+  CheckSAD();
+}
+
+TEST_P(SADavgTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADavgTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADavgTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 2000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(reference_data_, reference_stride_);
+    FillRandom(second_pred_, width_);
+    CheckSAD();
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADx4Test, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(GetReference(0), reference_stride_, mask_);
+  FillConstant(GetReference(1), reference_stride_, mask_);
+  FillConstant(GetReference(2), reference_stride_, mask_);
+  FillConstant(GetReference(3), reference_stride_, mask_);
+  CheckSADs();
+}
+
+TEST_P(SADx4Test, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(GetReference(0), reference_stride_, 0);
+  FillConstant(GetReference(1), reference_stride_, 0);
+  FillConstant(GetReference(2), reference_stride_, 0);
+  FillConstant(GetReference(3), reference_stride_, 0);
+  CheckSADs();
+}
+
+TEST_P(SADx4Test, ShortRef) {
+  int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADx4Test, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(SADx4Test, ShortSrc) {
+  int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 1000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(GetReference(0), reference_stride_);
+    FillRandom(GetReference(1), reference_stride_);
+    FillRandom(GetReference(2), reference_stride_);
+    FillRandom(GetReference(3), reference_stride_);
+    CheckSADs();
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(SADx4Test, SrcAlignedByWidth) {
+  uint8_t *tmp_source_data = source_data_;
+  source_data_ += width_;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(GetReference(0), reference_stride_);
+  FillRandom(GetReference(1), reference_stride_);
+  FillRandom(GetReference(2), reference_stride_);
+  FillRandom(GetReference(3), reference_stride_);
+  CheckSADs();
+  source_data_ = tmp_source_data;
+}
+
+using std::tr1::make_tuple;
+
+//------------------------------------------------------------------------------
+// C functions
+const SadMxNParam c_tests[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_sad128x128_c, -1),
+  make_tuple(128, 64, &aom_sad128x64_c, -1),
+  make_tuple(64, 128, &aom_sad64x128_c, -1),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_sad64x64_c, -1),
+  make_tuple(64, 32, &aom_sad64x32_c, -1),
+  make_tuple(32, 64, &aom_sad32x64_c, -1),
+  make_tuple(32, 32, &aom_sad32x32_c, -1),
+  make_tuple(32, 16, &aom_sad32x16_c, -1),
+  make_tuple(16, 32, &aom_sad16x32_c, -1),
+  make_tuple(16, 16, &aom_sad16x16_c, -1),
+  make_tuple(16, 8, &aom_sad16x8_c, -1),
+  make_tuple(8, 16, &aom_sad8x16_c, -1),
+  make_tuple(8, 8, &aom_sad8x8_c, -1),
+  make_tuple(8, 4, &aom_sad8x4_c, -1),
+  make_tuple(4, 8, &aom_sad4x8_c, -1),
+  make_tuple(4, 4, &aom_sad4x4_c, -1),
+#if CONFIG_HIGHBITDEPTH
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_c, 8),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4_c, 8),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_c, 10),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4_c, 10),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_c, 12),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4_c, 12),
+#endif  // CONFIG_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
+
+const SadMxNAvgParam avg_c_tests[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_sad128x128_avg_c, -1),
+  make_tuple(128, 64, &aom_sad128x64_avg_c, -1),
+  make_tuple(64, 128, &aom_sad64x128_avg_c, -1),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_sad64x64_avg_c, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_c, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_c, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_c, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_c, -1),
+  make_tuple(16, 32, &aom_sad16x32_avg_c, -1),
+  make_tuple(16, 16, &aom_sad16x16_avg_c, -1),
+  make_tuple(16, 8, &aom_sad16x8_avg_c, -1),
+  make_tuple(8, 16, &aom_sad8x16_avg_c, -1),
+  make_tuple(8, 8, &aom_sad8x8_avg_c, -1),
+  make_tuple(8, 4, &aom_sad8x4_avg_c, -1),
+  make_tuple(4, 8, &aom_sad4x8_avg_c, -1),
+  make_tuple(4, 4, &aom_sad4x4_avg_c, -1),
+#if CONFIG_HIGHBITDEPTH
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 8),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 8),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 10),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 10),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 12),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 12),
+#endif  // CONFIG_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
+
+const SadMxNx4Param x4d_c_tests[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_sad128x128x4d_c, -1),
+  make_tuple(128, 64, &aom_sad128x64x4d_c, -1),
+  make_tuple(64, 128, &aom_sad64x128x4d_c, -1),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_sad64x64x4d_c, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_c, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_c, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_c, -1),
+  make_tuple(32, 16, &aom_sad32x16x4d_c, -1),
+  make_tuple(16, 32, &aom_sad16x32x4d_c, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_c, -1),
+  make_tuple(16, 8, &aom_sad16x8x4d_c, -1),
+  make_tuple(8, 16, &aom_sad8x16x4d_c, -1),
+  make_tuple(8, 8, &aom_sad8x8x4d_c, -1),
+  make_tuple(8, 4, &aom_sad8x4x4d_c, -1),
+  make_tuple(4, 8, &aom_sad4x8x4d_c, -1),
+  make_tuple(4, 4, &aom_sad4x4x4d_c, -1),
+#if CONFIG_HIGHBITDEPTH
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 8),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 8),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 10),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 10),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 12),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 12),
+#endif  // CONFIG_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
+
+//------------------------------------------------------------------------------
+// ARM functions
+#if HAVE_MEDIA
+const SadMxNParam media_tests[] = {
+  make_tuple(16, 16, &aom_sad16x16_media, -1),
+};
+INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::ValuesIn(media_tests));
+#endif  // HAVE_MEDIA
+
+#if HAVE_NEON
+const SadMxNParam neon_tests[] = {
+  make_tuple(64, 64, &aom_sad64x64_neon, -1),
+  make_tuple(32, 32, &aom_sad32x32_neon, -1),
+  make_tuple(16, 16, &aom_sad16x16_neon, -1),
+  make_tuple(16, 8, &aom_sad16x8_neon, -1),
+  make_tuple(8, 16, &aom_sad8x16_neon, -1),
+  make_tuple(8, 8, &aom_sad8x8_neon, -1),
+  make_tuple(4, 4, &aom_sad4x4_neon, -1),
+};
+INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
+
+const SadMxNx4Param x4d_neon_tests[] = {
+  make_tuple(64, 64, &aom_sad64x64x4d_neon, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_neon, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_neon, -1),
+};
+INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
+#endif  // HAVE_NEON
+
+//------------------------------------------------------------------------------
+// x86 functions
+#if HAVE_SSE2
+const SadMxNParam sse2_tests[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_sad128x128_sse2, -1),
+  make_tuple(128, 64, &aom_sad128x64_sse2, -1),
+  make_tuple(64, 128, &aom_sad64x128_sse2, -1),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_sad64x64_sse2, -1),
+  make_tuple(64, 32, &aom_sad64x32_sse2, -1),
+  make_tuple(32, 64, &aom_sad32x64_sse2, -1),
+  make_tuple(32, 32, &aom_sad32x32_sse2, -1),
+  make_tuple(32, 16, &aom_sad32x16_sse2, -1),
+  make_tuple(16, 32, &aom_sad16x32_sse2, -1),
+  make_tuple(16, 16, &aom_sad16x16_sse2, -1),
+  make_tuple(16, 8, &aom_sad16x8_sse2, -1),
+  make_tuple(8, 16, &aom_sad8x16_sse2, -1),
+  make_tuple(8, 8, &aom_sad8x8_sse2, -1),
+  make_tuple(8, 4, &aom_sad8x4_sse2, -1),
+  make_tuple(4, 8, &aom_sad4x8_sse2, -1),
+  make_tuple(4, 4, &aom_sad4x4_sse2, -1),
+#if CONFIG_HIGHBITDEPTH
+  make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 12),
+#endif  // CONFIG_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
+
+const SadMxNAvgParam avg_sse2_tests[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_sad128x128_avg_sse2, -1),
+  make_tuple(128, 64, &aom_sad128x64_avg_sse2, -1),
+  make_tuple(64, 128, &aom_sad64x128_avg_sse2, -1),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_sad64x64_avg_sse2, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_sse2, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_sse2, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_sse2, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_sse2, -1),
+  make_tuple(16, 32, &aom_sad16x32_avg_sse2, -1),
+  make_tuple(16, 16, &aom_sad16x16_avg_sse2, -1),
+  make_tuple(16, 8, &aom_sad16x8_avg_sse2, -1),
+  make_tuple(8, 16, &aom_sad8x16_avg_sse2, -1),
+  make_tuple(8, 8, &aom_sad8x8_avg_sse2, -1),
+  make_tuple(8, 4, &aom_sad8x4_avg_sse2, -1),
+  make_tuple(4, 8, &aom_sad4x8_avg_sse2, -1),
+  make_tuple(4, 4, &aom_sad4x4_avg_sse2, -1),
+#if CONFIG_HIGHBITDEPTH
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 12),
+#endif  // CONFIG_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests));
+
+const SadMxNx4Param x4d_sse2_tests[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_sad128x128x4d_sse2, -1),
+  make_tuple(128, 64, &aom_sad128x64x4d_sse2, -1),
+  make_tuple(64, 128, &aom_sad64x128x4d_sse2, -1),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(64, 64, &aom_sad64x64x4d_sse2, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_sse2, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_sse2, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_sse2, -1),
+  make_tuple(32, 16, &aom_sad32x16x4d_sse2, -1),
+  make_tuple(16, 32, &aom_sad16x32x4d_sse2, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_sse2, -1),
+  make_tuple(16, 8, &aom_sad16x8x4d_sse2, -1),
+  make_tuple(8, 16, &aom_sad8x16x4d_sse2, -1),
+  make_tuple(8, 8, &aom_sad8x8x4d_sse2, -1),
+  make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1),
+  make_tuple(4, 8, &aom_sad4x8x4d_sse2, -1),
+  make_tuple(4, 4, &aom_sad4x4x4d_sse2, -1),
+#if CONFIG_HIGHBITDEPTH
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 8),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 8),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 8),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 8),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 8),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 10),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 10),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 10),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 10),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 10),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 12),
+  make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 12),
+  make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 12),
+  make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 12),
+  make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 12),
+  make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 12),
+#endif  // CONFIG_HIGHBITDEPTH
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE3
+// Only functions are x3, which do not have tests.
+#endif  // HAVE_SSE3
+
+#if HAVE_SSSE3
+// Only functions are x3, which do not have tests.
+#endif  // HAVE_SSSE3
+
+#if HAVE_SSE4_1
+// Only functions are x8, which do not have tests.
+#endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+const SadMxNParam avx2_tests[] = {
+#if CONFIG_EXT_PARTITION
+  make_tuple(64, 128, &aom_sad64x128_avx2, -1),
+  make_tuple(128, 64, &aom_sad128x64_avx2, -1),
+  make_tuple(128, 128, &aom_sad128x128_avx2, -1),
+#endif
+  make_tuple(64, 64, &aom_sad64x64_avx2, -1),
+  make_tuple(64, 32, &aom_sad64x32_avx2, -1),
+  make_tuple(32, 64, &aom_sad32x64_avx2, -1),
+  make_tuple(32, 32, &aom_sad32x32_avx2, -1),
+  make_tuple(32, 16, &aom_sad32x16_avx2, -1),
+#if CONFIG_HIGHBITDEPTH
+#if CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 12),
+#endif
+  make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 12),
+#endif
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
+
+const SadMxNAvgParam avg_avx2_tests[] = {
+#if CONFIG_EXT_PARTITION
+  make_tuple(64, 128, &aom_sad64x128_avg_avx2, -1),
+  make_tuple(128, 64, &aom_sad128x64_avg_avx2, -1),
+  make_tuple(128, 128, &aom_sad128x128_avg_avx2, -1),
+#endif
+  make_tuple(64, 64, &aom_sad64x64_avg_avx2, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_avx2, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_avx2, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_avx2, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_avx2, -1),
+#if CONFIG_HIGHBITDEPTH
+#if CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 12),
+#endif
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 12),
+#endif
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
+
+const SadMxNx4Param x4d_avx2_tests[] = {
+#if CONFIG_EXT_PARTITION
+  make_tuple(64, 128, &aom_sad64x128x4d_avx2, -1),
+  make_tuple(128, 64, &aom_sad128x64x4d_avx2, -1),
+  make_tuple(128, 128, &aom_sad128x128x4d_avx2, -1),
+#endif
+  make_tuple(64, 64, &aom_sad64x64x4d_avx2, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_avx2, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_avx2, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_avx2, -1),
+#if CONFIG_HIGHBITDEPTH
+#if CONFIG_EXT_PARTITION
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 8),
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 10),
+  make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 12),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 8),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 10),
+  make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 12),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 8),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 10),
+  make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 12),
+#endif
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 8),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 10),
+  make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 12),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 8),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 10),
+  make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 12),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 8),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 10),
+  make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 12),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 8),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 10),
+  make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 12),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 8),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 10),
+  make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 12),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 8),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 10),
+  make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 12),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 8),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 10),
+  make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 12),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 8),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 10),
+  make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 12),
+#endif
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
+#endif  // HAVE_AVX2
+
+//------------------------------------------------------------------------------
+// MIPS functions
+#if HAVE_MSA
+const SadMxNParam msa_tests[] = {
+  make_tuple(64, 64, &aom_sad64x64_msa, -1),
+  make_tuple(64, 32, &aom_sad64x32_msa, -1),
+  make_tuple(32, 64, &aom_sad32x64_msa, -1),
+  make_tuple(32, 32, &aom_sad32x32_msa, -1),
+  make_tuple(32, 16, &aom_sad32x16_msa, -1),
+  make_tuple(16, 32, &aom_sad16x32_msa, -1),
+  make_tuple(16, 16, &aom_sad16x16_msa, -1),
+  make_tuple(16, 8, &aom_sad16x8_msa, -1),
+  make_tuple(8, 16, &aom_sad8x16_msa, -1),
+  make_tuple(8, 8, &aom_sad8x8_msa, -1),
+  make_tuple(8, 4, &aom_sad8x4_msa, -1),
+  make_tuple(4, 8, &aom_sad4x8_msa, -1),
+  make_tuple(4, 4, &aom_sad4x4_msa, -1),
+};
+INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests));
+
+const SadMxNAvgParam avg_msa_tests[] = {
+  make_tuple(64, 64, &aom_sad64x64_avg_msa, -1),
+  make_tuple(64, 32, &aom_sad64x32_avg_msa, -1),
+  make_tuple(32, 64, &aom_sad32x64_avg_msa, -1),
+  make_tuple(32, 32, &aom_sad32x32_avg_msa, -1),
+  make_tuple(32, 16, &aom_sad32x16_avg_msa, -1),
+  make_tuple(16, 32, &aom_sad16x32_avg_msa, -1),
+  make_tuple(16, 16, &aom_sad16x16_avg_msa, -1),
+  make_tuple(16, 8, &aom_sad16x8_avg_msa, -1),
+  make_tuple(8, 16, &aom_sad8x16_avg_msa, -1),
+  make_tuple(8, 8, &aom_sad8x8_avg_msa, -1),
+  make_tuple(8, 4, &aom_sad8x4_avg_msa, -1),
+  make_tuple(4, 8, &aom_sad4x8_avg_msa, -1),
+  make_tuple(4, 4, &aom_sad4x4_avg_msa, -1),
+};
+INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests));
+
+const SadMxNx4Param x4d_msa_tests[] = {
+  make_tuple(64, 64, &aom_sad64x64x4d_msa, -1),
+  make_tuple(64, 32, &aom_sad64x32x4d_msa, -1),
+  make_tuple(32, 64, &aom_sad32x64x4d_msa, -1),
+  make_tuple(32, 32, &aom_sad32x32x4d_msa, -1),
+  make_tuple(32, 16, &aom_sad32x16x4d_msa, -1),
+  make_tuple(16, 32, &aom_sad16x32x4d_msa, -1),
+  make_tuple(16, 16, &aom_sad16x16x4d_msa, -1),
+  make_tuple(16, 8, &aom_sad16x8x4d_msa, -1),
+  make_tuple(8, 16, &aom_sad8x16x4d_msa, -1),
+  make_tuple(8, 8, &aom_sad8x8x4d_msa, -1),
+  make_tuple(8, 4, &aom_sad8x4x4d_msa, -1),
+  make_tuple(4, 8, &aom_sad4x8x4d_msa, -1),
+  make_tuple(4, 4, &aom_sad4x4x4d_msa, -1),
+};
+INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));
+#endif  // HAVE_MSA
+
+}  // namespace
diff --git a/third_party/aom/test/scan_test.cc b/third_party/aom/test/scan_test.cc
new file mode 100644
index 000000000..16c831c8e
--- /dev/null
+++ b/third_party/aom/test/scan_test.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/common_data.h"
+#include "av1/common/scan.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+TEST(ScanTest, av1_augment_prob) {
+  const TX_SIZE tx_size = TX_4X4;
+  const TX_TYPE tx_type = DCT_DCT;
+  const int tx1d_size = tx_size_wide[tx_size];
+  uint32_t prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  const uint32_t ref_prob[16] = {
+    8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2
+  };
+  av1_augment_prob(tx_size, tx_type, prob);
+  for (int r = 0; r < tx1d_size; ++r) {
+    for (int c = 0; c < tx1d_size; ++c) {
+      const uint32_t idx = r * tx1d_size + c;
+      EXPECT_EQ(ref_prob[idx], prob[idx] >> 16);
+    }
+  }
+
+  const SCAN_ORDER *sc = get_default_scan(tx_size, tx_type, 0);
+  const uint32_t mask = (1 << 16) - 1;
+  for (int r = 0; r < tx1d_size; ++r) {
+    for (int c = 0; c < tx1d_size; ++c) {
+      const uint32_t ref_idx = r * tx1d_size + c;
+      const uint32_t scan_idx = mask ^ (prob[r * tx1d_size + c] & mask);
+      const uint32_t idx = sc->scan[scan_idx];
+      EXPECT_EQ(ref_idx, idx);
+    }
+  }
+}
+
+TEST(ScanTest, av1_update_sort_order) {
+  const TX_SIZE tx_size = TX_4X4;
+  const TX_TYPE tx_type = DCT_DCT;
+  const uint32_t prob[16] = { 15, 14, 11, 10, 13, 12, 9, 5,
+                              8,  7,  4,  2,  6,  3,  1, 0 };
+  const int16_t ref_sort_order[16] = { 0, 1,  4, 5,  2,  3,  6,  8,
+                                       9, 12, 7, 10, 13, 11, 14, 15 };
+  int16_t sort_order[16];
+  av1_update_sort_order(tx_size, tx_type, prob, sort_order);
+  for (int i = 0; i < 16; ++i) EXPECT_EQ(ref_sort_order[i], sort_order[i]);
+}
+
+TEST(ScanTest, av1_update_scan_order) {
+  TX_SIZE tx_size = TX_4X4;
+  const TX_TYPE tx_type = DCT_DCT;
+  const uint32_t prob[16] = { 10, 12, 14, 9, 11, 13, 15, 5,
+                              8,  7,  4,  2, 6,  3,  1,  0 };
+  int16_t sort_order[16];
+  int16_t scan[16];
+  int16_t iscan[16];
+  const int16_t ref_iscan[16] = { 0, 1, 2,  6,  3, 4,  5,  10,
+                                  7, 8, 11, 13, 9, 12, 14, 15 };
+
+  av1_update_sort_order(tx_size, tx_type, prob, sort_order);
+  av1_update_scan_order(tx_size, sort_order, scan, iscan);
+
+  for (int i = 0; i < 16; ++i) {
+    EXPECT_EQ(ref_iscan[i], iscan[i]);
+    EXPECT_EQ(i, scan[ref_iscan[i]]);
+  }
+}
+
+TEST(ScanTest, av1_update_neighbors) {
+  TX_SIZE tx_size = TX_4X4;
+  // raster order
+  const int16_t scan[16] = { 0, 1, 2,  3,  4,  5,  6,  7,
+                             8, 9, 10, 11, 12, 13, 14, 15 };
+  int16_t nb[(16 + 1) * 2];
+  const int16_t ref_nb[(16 + 1) * 2] = { 0,  0,  0,  0,  1,  1,  2, 2, 0,
+                                         1,  1,  4,  2,  5,  3,  6, 4, 5,
+                                         5,  8,  6,  9,  7,  10, 8, 9, 9,
+                                         12, 10, 13, 11, 14, 0,  0 };
+
+  // raster order's scan and iscan are the same
+  av1_update_neighbors(tx_size, scan, scan, nb);
+
+  for (int i = 0; i < (16 + 1) * 2; ++i) {
+    EXPECT_EQ(ref_nb[i], nb[i]);
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc
new file mode 100644
index 000000000..e87fe339a
--- /dev/null
+++ b/third_party/aom/test/selfguided_filter_test.cc
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <ctime>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "av1/common/mv.h"
+#include "av1/common/restoration.h"
+
+namespace {
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+using libaom_test::ACMRandom;
+
+typedef tuple<> FilterTestParam;
+
+class AV1SelfguidedFilterTest
+    : public ::testing::TestWithParam<FilterTestParam> {
+ public:
+  virtual ~AV1SelfguidedFilterTest() {}
+  virtual void SetUp() {}
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunSpeedTest() {
+    const int w = 256, h = 256;
+    const int NUM_ITERS = 2000;
+    int i, j;
+
+    uint8_t *input = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t));
+    uint8_t *output = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t));
+    int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+    memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE);
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & 0xFF;
+
+    int xqd[2] = {
+      SGRPROJ_PRJ_MIN0 +
+          rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
+      SGRPROJ_PRJ_MIN1 +
+          rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
+    };
+    // Fix a parameter set, since the speed depends slightly on r.
+    // Change this to test different combinations of values of r.
+    int eps = 15;
+
+    av1_loop_restoration_precal();
+
+    std::clock_t start = std::clock();
+    for (i = 0; i < NUM_ITERS; ++i) {
+      apply_selfguided_restoration(input, w, h, w, eps, xqd, output, w, tmpbuf);
+    }
+    std::clock_t end = std::clock();
+    double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
+
+    printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h,
+           elapsed, elapsed * 1000000. / NUM_ITERS);
+
+    aom_free(input);
+    aom_free(output);
+    aom_free(tmpbuf);
+  }
+
+  void RunCorrectnessTest() {
+    // Set the maximum width/height to test here. We actually test a small
+    // range of sizes *up to* this size, so that we can check, eg.,
+    // the behaviour on tiles which are not a multiple of 4 wide.
+    const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
+    const int NUM_ITERS = 81;
+    int i, j, k;
+
+    uint8_t *input =
+        (uint8_t *)aom_memalign(16, stride * max_h * sizeof(uint8_t));
+    uint8_t *output =
+        (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t));
+    uint8_t *output2 =
+        (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t));
+    int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+    memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE);
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    av1_loop_restoration_precal();
+
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (j = 0; j < max_h; ++j)
+        for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & 0xFF;
+
+      int xqd[2] = {
+        SGRPROJ_PRJ_MIN0 +
+            rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
+        SGRPROJ_PRJ_MIN1 +
+            rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
+      };
+      int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
+
+      // Test various tile sizes around 256x256
+      int test_w = max_w - (i / 9);
+      int test_h = max_h - (i % 9);
+
+      apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd,
+                                   output, out_stride, tmpbuf);
+      apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd,
+                                     output2, out_stride, tmpbuf);
+      for (j = 0; j < test_h; ++j)
+        for (k = 0; k < test_w; ++k)
+          ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
+    }
+
+    aom_free(input);
+    aom_free(output);
+    aom_free(output2);
+    aom_free(tmpbuf);
+  }
+};
+
+TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
+TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
+
+const FilterTestParam params[] = { make_tuple() };
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest,
+                        ::testing::ValuesIn(params));
+#endif
+
+#if CONFIG_HIGHBITDEPTH
+
+typedef tuple<int> HighbdFilterTestParam;
+
+class AV1HighbdSelfguidedFilterTest
+    : public ::testing::TestWithParam<HighbdFilterTestParam> {
+ public:
+  virtual ~AV1HighbdSelfguidedFilterTest() {}
+  virtual void SetUp() {}
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunSpeedTest() {
+    const int w = 256, h = 256;
+    const int NUM_ITERS = 2000;
+    int i, j;
+    int bit_depth = GET_PARAM(0);
+    int mask = (1 << bit_depth) - 1;
+
+    uint16_t *input = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t));
+    uint16_t *output = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t));
+    int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+    memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE);
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & mask;
+
+    int xqd[2] = {
+      SGRPROJ_PRJ_MIN0 +
+          rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
+      SGRPROJ_PRJ_MIN1 +
+          rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
+    };
+    // Fix a parameter set, since the speed depends slightly on r.
+    // Change this to test different combinations of values of r.
+    int eps = 15;
+
+    av1_loop_restoration_precal();
+
+    std::clock_t start = std::clock();
+    for (i = 0; i < NUM_ITERS; ++i) {
+      apply_selfguided_restoration_highbd(input, w, h, w, bit_depth, eps, xqd,
+                                          output, w, tmpbuf);
+    }
+    std::clock_t end = std::clock();
+    double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
+
+    printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h,
+           elapsed, elapsed * 1000000. / NUM_ITERS);
+
+    aom_free(input);
+    aom_free(output);
+    aom_free(tmpbuf);
+  }
+
+  void RunCorrectnessTest() {
+    // Set the maximum width/height to test here. We actually test a small
+    // range of sizes *up to* this size, so that we can check, eg.,
+    // the behaviour on tiles which are not a multiple of 4 wide.
+    const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
+    const int NUM_ITERS = 81;
+    int i, j, k;
+    int bit_depth = GET_PARAM(0);
+    int mask = (1 << bit_depth) - 1;
+
+    uint16_t *input =
+        (uint16_t *)aom_memalign(16, stride * max_h * sizeof(uint16_t));
+    uint16_t *output =
+        (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t));
+    uint16_t *output2 =
+        (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t));
+    int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+    memset(tmpbuf, 0, RESTORATION_TMPBUF_SIZE);
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    av1_loop_restoration_precal();
+
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (j = 0; j < max_h; ++j)
+        for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & mask;
+
+      int xqd[2] = {
+        SGRPROJ_PRJ_MIN0 +
+            rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
+        SGRPROJ_PRJ_MIN1 +
+            rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
+      };
+      int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
+
+      // Test various tile sizes around 256x256
+      int test_w = max_w - (i / 9);
+      int test_h = max_h - (i % 9);
+
+      apply_selfguided_restoration_highbd(input, test_w, test_h, stride,
+                                          bit_depth, eps, xqd, output,
+                                          out_stride, tmpbuf);
+      apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride,
+                                            bit_depth, eps, xqd, output2,
+                                            out_stride, tmpbuf);
+      for (j = 0; j < test_h; ++j)
+        for (k = 0; k < test_w; ++k)
+          ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
+    }
+
+    aom_free(input);
+    aom_free(output);
+    aom_free(output2);
+    aom_free(tmpbuf);
+  }
+};
+
+TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
+TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
+
+const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10),
+                                                make_tuple(12) };
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest,
+                        ::testing::ValuesIn(highbd_params));
+#endif
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/set_maps.sh b/third_party/aom/test/set_maps.sh
new file mode 100755
index 000000000..4f59b06d6
--- /dev/null
+++ b/third_party/aom/test/set_maps.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom set_maps example. To add new tests to this file,
+## do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to set_maps_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required, and set_maps must exist in
+# $LIBAOM_BIN_PATH.
+set_maps_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ -z "$(aom_tool_path set_maps)" ]; then
+    elog "set_maps not found. It must exist in LIBAOM_BIN_PATH or its parent."
+    return 1
+  fi
+}
+
+# Runs set_maps using the codec specified by $1.
+set_maps() {
+  local encoder="$(aom_tool_path set_maps)"
+  local codec="$1"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/set_maps_${codec}.ivf"
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
+      ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+set_maps_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    set_maps av1 || return 1
+  fi
+}
+
+set_maps_tests="set_maps_av1"
+
+run_tests set_maps_verify_environment "${set_maps_tests}"
diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h
new file mode 100644
index 000000000..28bd64a5b
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_impl.h
@@ -0,0 +1,1212 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <assert.h>
+#include <string>
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "aom_dsp/aom_simd.h"
+#undef SIMD_INLINE
+#define SIMD_INLINE static  // Don't enforce inlining
+#include "aom_dsp/simd/v128_intrinsics_c.h"
+
+// Machine tuned code goes into this file. This file is included from
+// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
+// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
+
+using libaom_test::ACMRandom;
+
+namespace SIMD_NAMESPACE {
+
+// Wrap templates around intrinsics using immediate values
+template <int shift>
+v64 imm_v64_shl_n_byte(v64 a) {
+  return v64_shl_n_byte(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_byte(v64 a) {
+  return v64_shr_n_byte(a, shift);
+}
+template <int shift>
+v64 imm_v64_shl_n_8(v64 a) {
+  return v64_shl_n_8(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_u8(v64 a) {
+  return v64_shr_n_u8(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_s8(v64 a) {
+  return v64_shr_n_s8(a, shift);
+}
+template <int shift>
+v64 imm_v64_shl_n_16(v64 a) {
+  return v64_shl_n_16(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_u16(v64 a) {
+  return v64_shr_n_u16(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_s16(v64 a) {
+  return v64_shr_n_s16(a, shift);
+}
+template <int shift>
+v64 imm_v64_shl_n_32(v64 a) {
+  return v64_shl_n_32(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_u32(v64 a) {
+  return v64_shr_n_u32(a, shift);
+}
+template <int shift>
+v64 imm_v64_shr_n_s32(v64 a) {
+  return v64_shr_n_s32(a, shift);
+}
+template <int shift>
+v64 imm_v64_align(v64 a, v64 b) {
+  return v64_align(a, b, shift);
+}
+
+// Wrap templates around corresponding C implementations of the above
+template <int shift>
+c_v64 c_imm_v64_shl_n_byte(c_v64 a) {
+  return c_v64_shl_n_byte(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_byte(c_v64 a) {
+  return c_v64_shr_n_byte(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shl_n_8(c_v64 a) {
+  return c_v64_shl_n_8(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_u8(c_v64 a) {
+  return c_v64_shr_n_u8(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_s8(c_v64 a) {
+  return c_v64_shr_n_s8(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shl_n_16(c_v64 a) {
+  return c_v64_shl_n_16(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_u16(c_v64 a) {
+  return c_v64_shr_n_u16(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_s16(c_v64 a) {
+  return c_v64_shr_n_s16(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shl_n_32(c_v64 a) {
+  return c_v64_shl_n_32(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_u32(c_v64 a) {
+  return c_v64_shr_n_u32(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_shr_n_s32(c_v64 a) {
+  return c_v64_shr_n_s32(a, shift);
+}
+template <int shift>
+c_v64 c_imm_v64_align(c_v64 a, c_v64 b) {
+  return c_v64_align(a, b, shift);
+}
+
+template <int shift>
+v128 imm_v128_shl_n_byte(v128 a) {
+  return v128_shl_n_byte(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_byte(v128 a) {
+  return v128_shr_n_byte(a, shift);
+}
+template <int shift>
+v128 imm_v128_shl_n_8(v128 a) {
+  return v128_shl_n_8(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_u8(v128 a) {
+  return v128_shr_n_u8(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_s8(v128 a) {
+  return v128_shr_n_s8(a, shift);
+}
+template <int shift>
+v128 imm_v128_shl_n_16(v128 a) {
+  return v128_shl_n_16(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_u16(v128 a) {
+  return v128_shr_n_u16(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_s16(v128 a) {
+  return v128_shr_n_s16(a, shift);
+}
+template <int shift>
+v128 imm_v128_shl_n_32(v128 a) {
+  return v128_shl_n_32(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_u32(v128 a) {
+  return v128_shr_n_u32(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_s32(v128 a) {
+  return v128_shr_n_s32(a, shift);
+}
+template <int shift>
+v128 imm_v128_align(v128 a, v128 b) {
+  return v128_align(a, b, shift);
+}
+
+template <int shift>
+c_v128 c_imm_v128_shl_n_byte(c_v128 a) {
+  return c_v128_shl_n_byte(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_byte(c_v128 a) {
+  return c_v128_shr_n_byte(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shl_n_8(c_v128 a) {
+  return c_v128_shl_n_8(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_u8(c_v128 a) {
+  return c_v128_shr_n_u8(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_s8(c_v128 a) {
+  return c_v128_shr_n_s8(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shl_n_16(c_v128 a) {
+  return c_v128_shl_n_16(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_u16(c_v128 a) {
+  return c_v128_shr_n_u16(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_s16(c_v128 a) {
+  return c_v128_shr_n_s16(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shl_n_32(c_v128 a) {
+  return c_v128_shl_n_32(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_u32(c_v128 a) {
+  return c_v128_shr_n_u32(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_s32(c_v128 a) {
+  return c_v128_shr_n_s32(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_align(c_v128 a, c_v128 b) {
+  return c_v128_align(a, b, shift);
+}
+
+// Wrappers around the the SAD and SSD functions
+uint32_t v64_sad_u8(v64 a, v64 b) {
+  return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
+}
+uint32_t v64_ssd_u8(v64 a, v64 b) {
+  return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b));
+}
+
+uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) {
+  return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b));
+}
+uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) {
+  return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b));
+}
+uint32_t v128_sad_u8(v128 a, v128 b) {
+  return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a, b));
+}
+uint32_t v128_ssd_u8(v128 a, v128 b) {
+  return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a, b));
+}
+uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) {
+  return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a, b));
+}
+uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) {
+  return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b));
+}
+
+namespace {
+
+typedef void (*fptr)();
+
+typedef struct {
+  const char *name;
+  fptr ref;
+  fptr simd;
+} mapping;
+
+#define MAP(name)                                                      \
+  {                                                                    \
+    #name, reinterpret_cast < fptr > (c_##name),                       \
+                                      reinterpret_cast < fptr > (name) \
+  }
+
+const mapping m[] = { MAP(v64_sad_u8),
+                      MAP(v64_ssd_u8),
+                      MAP(v64_add_8),
+                      MAP(v64_add_16),
+                      MAP(v64_sadd_s16),
+                      MAP(v64_add_32),
+                      MAP(v64_sub_8),
+                      MAP(v64_ssub_u8),
+                      MAP(v64_ssub_s8),
+                      MAP(v64_sub_16),
+                      MAP(v64_ssub_s16),
+                      MAP(v64_ssub_u16),
+                      MAP(v64_sub_32),
+                      MAP(v64_ziplo_8),
+                      MAP(v64_ziphi_8),
+                      MAP(v64_ziplo_16),
+                      MAP(v64_ziphi_16),
+                      MAP(v64_ziplo_32),
+                      MAP(v64_ziphi_32),
+                      MAP(v64_pack_s32_s16),
+                      MAP(v64_pack_s16_u8),
+                      MAP(v64_pack_s16_s8),
+                      MAP(v64_unziphi_8),
+                      MAP(v64_unziplo_8),
+                      MAP(v64_unziphi_16),
+                      MAP(v64_unziplo_16),
+                      MAP(v64_or),
+                      MAP(v64_xor),
+                      MAP(v64_and),
+                      MAP(v64_andn),
+                      MAP(v64_mullo_s16),
+                      MAP(v64_mulhi_s16),
+                      MAP(v64_mullo_s32),
+                      MAP(v64_madd_s16),
+                      MAP(v64_madd_us8),
+                      MAP(v64_avg_u8),
+                      MAP(v64_rdavg_u8),
+                      MAP(v64_avg_u16),
+                      MAP(v64_min_u8),
+                      MAP(v64_max_u8),
+                      MAP(v64_min_s8),
+                      MAP(v64_max_s8),
+                      MAP(v64_min_s16),
+                      MAP(v64_max_s16),
+                      MAP(v64_cmpgt_s8),
+                      MAP(v64_cmplt_s8),
+                      MAP(v64_cmpeq_8),
+                      MAP(v64_cmpgt_s16),
+                      MAP(v64_cmplt_s16),
+                      MAP(v64_cmpeq_16),
+                      MAP(v64_shuffle_8),
+                      MAP(imm_v64_align<1>),
+                      MAP(imm_v64_align<2>),
+                      MAP(imm_v64_align<3>),
+                      MAP(imm_v64_align<4>),
+                      MAP(imm_v64_align<5>),
+                      MAP(imm_v64_align<6>),
+                      MAP(imm_v64_align<7>),
+                      MAP(v64_abs_s8),
+                      MAP(v64_abs_s16),
+                      MAP(v64_unpacklo_u8_s16),
+                      MAP(v64_unpackhi_u8_s16),
+                      MAP(v64_unpacklo_s8_s16),
+                      MAP(v64_unpackhi_s8_s16),
+                      MAP(v64_unpacklo_u16_s32),
+                      MAP(v64_unpacklo_s16_s32),
+                      MAP(v64_unpackhi_u16_s32),
+                      MAP(v64_unpackhi_s16_s32),
+                      MAP(imm_v64_shr_n_byte<1>),
+                      MAP(imm_v64_shr_n_byte<2>),
+                      MAP(imm_v64_shr_n_byte<3>),
+                      MAP(imm_v64_shr_n_byte<4>),
+                      MAP(imm_v64_shr_n_byte<5>),
+                      MAP(imm_v64_shr_n_byte<6>),
+                      MAP(imm_v64_shr_n_byte<7>),
+                      MAP(imm_v64_shl_n_byte<1>),
+                      MAP(imm_v64_shl_n_byte<2>),
+                      MAP(imm_v64_shl_n_byte<3>),
+                      MAP(imm_v64_shl_n_byte<4>),
+                      MAP(imm_v64_shl_n_byte<5>),
+                      MAP(imm_v64_shl_n_byte<6>),
+                      MAP(imm_v64_shl_n_byte<7>),
+                      MAP(imm_v64_shl_n_8<1>),
+                      MAP(imm_v64_shl_n_8<2>),
+                      MAP(imm_v64_shl_n_8<3>),
+                      MAP(imm_v64_shl_n_8<4>),
+                      MAP(imm_v64_shl_n_8<5>),
+                      MAP(imm_v64_shl_n_8<6>),
+                      MAP(imm_v64_shl_n_8<7>),
+                      MAP(imm_v64_shr_n_u8<1>),
+                      MAP(imm_v64_shr_n_u8<2>),
+                      MAP(imm_v64_shr_n_u8<3>),
+                      MAP(imm_v64_shr_n_u8<4>),
+                      MAP(imm_v64_shr_n_u8<5>),
+                      MAP(imm_v64_shr_n_u8<6>),
+                      MAP(imm_v64_shr_n_u8<7>),
+                      MAP(imm_v64_shr_n_s8<1>),
+                      MAP(imm_v64_shr_n_s8<2>),
+                      MAP(imm_v64_shr_n_s8<3>),
+                      MAP(imm_v64_shr_n_s8<4>),
+                      MAP(imm_v64_shr_n_s8<5>),
+                      MAP(imm_v64_shr_n_s8<6>),
+                      MAP(imm_v64_shr_n_s8<7>),
+                      MAP(imm_v64_shl_n_16<1>),
+                      MAP(imm_v64_shl_n_16<2>),
+                      MAP(imm_v64_shl_n_16<4>),
+                      MAP(imm_v64_shl_n_16<6>),
+                      MAP(imm_v64_shl_n_16<8>),
+                      MAP(imm_v64_shl_n_16<10>),
+                      MAP(imm_v64_shl_n_16<12>),
+                      MAP(imm_v64_shl_n_16<14>),
+                      MAP(imm_v64_shr_n_u16<1>),
+                      MAP(imm_v64_shr_n_u16<2>),
+                      MAP(imm_v64_shr_n_u16<4>),
+                      MAP(imm_v64_shr_n_u16<6>),
+                      MAP(imm_v64_shr_n_u16<8>),
+                      MAP(imm_v64_shr_n_u16<10>),
+                      MAP(imm_v64_shr_n_u16<12>),
+                      MAP(imm_v64_shr_n_u16<14>),
+                      MAP(imm_v64_shr_n_s16<1>),
+                      MAP(imm_v64_shr_n_s16<2>),
+                      MAP(imm_v64_shr_n_s16<4>),
+                      MAP(imm_v64_shr_n_s16<6>),
+                      MAP(imm_v64_shr_n_s16<8>),
+                      MAP(imm_v64_shr_n_s16<10>),
+                      MAP(imm_v64_shr_n_s16<12>),
+                      MAP(imm_v64_shr_n_s16<14>),
+                      MAP(imm_v64_shl_n_32<1>),
+                      MAP(imm_v64_shl_n_32<4>),
+                      MAP(imm_v64_shl_n_32<8>),
+                      MAP(imm_v64_shl_n_32<12>),
+                      MAP(imm_v64_shl_n_32<16>),
+                      MAP(imm_v64_shl_n_32<20>),
+                      MAP(imm_v64_shl_n_32<24>),
+                      MAP(imm_v64_shl_n_32<28>),
+                      MAP(imm_v64_shr_n_u32<1>),
+                      MAP(imm_v64_shr_n_u32<4>),
+                      MAP(imm_v64_shr_n_u32<8>),
+                      MAP(imm_v64_shr_n_u32<12>),
+                      MAP(imm_v64_shr_n_u32<16>),
+                      MAP(imm_v64_shr_n_u32<20>),
+                      MAP(imm_v64_shr_n_u32<24>),
+                      MAP(imm_v64_shr_n_u32<28>),
+                      MAP(imm_v64_shr_n_s32<1>),
+                      MAP(imm_v64_shr_n_s32<4>),
+                      MAP(imm_v64_shr_n_s32<8>),
+                      MAP(imm_v64_shr_n_s32<12>),
+                      MAP(imm_v64_shr_n_s32<16>),
+                      MAP(imm_v64_shr_n_s32<20>),
+                      MAP(imm_v64_shr_n_s32<24>),
+                      MAP(imm_v64_shr_n_s32<28>),
+                      MAP(v64_shl_8),
+                      MAP(v64_shr_u8),
+                      MAP(v64_shr_s8),
+                      MAP(v64_shl_16),
+                      MAP(v64_shr_u16),
+                      MAP(v64_shr_s16),
+                      MAP(v64_shl_32),
+                      MAP(v64_shr_u32),
+                      MAP(v64_shr_s32),
+                      MAP(v64_hadd_u8),
+                      MAP(v64_hadd_s16),
+                      MAP(v64_dotp_s16),
+                      MAP(v64_dotp_su8),
+                      MAP(v64_u64),
+                      MAP(v64_low_u32),
+                      MAP(v64_high_u32),
+                      MAP(v64_low_s32),
+                      MAP(v64_high_s32),
+                      MAP(v64_dup_8),
+                      MAP(v64_dup_16),
+                      MAP(v64_dup_32),
+                      MAP(v64_from_32),
+                      MAP(v64_zero),
+                      MAP(v64_from_16),
+                      MAP(v128_sad_u8),
+                      MAP(v128_ssd_u8),
+                      MAP(v128_add_8),
+                      MAP(v128_add_16),
+                      MAP(v128_sadd_s16),
+                      MAP(v128_add_32),
+                      MAP(v128_sub_8),
+                      MAP(v128_ssub_u8),
+                      MAP(v128_ssub_s8),
+                      MAP(v128_sub_16),
+                      MAP(v128_ssub_s16),
+                      MAP(v128_ssub_u16),
+                      MAP(v128_sub_32),
+                      MAP(v128_ziplo_8),
+                      MAP(v128_ziphi_8),
+                      MAP(v128_ziplo_16),
+                      MAP(v128_ziphi_16),
+                      MAP(v128_ziplo_32),
+                      MAP(v128_ziphi_32),
+                      MAP(v128_ziplo_64),
+                      MAP(v128_ziphi_64),
+                      MAP(v128_unziphi_8),
+                      MAP(v128_unziplo_8),
+                      MAP(v128_unziphi_16),
+                      MAP(v128_unziplo_16),
+                      MAP(v128_unziphi_32),
+                      MAP(v128_unziplo_32),
+                      MAP(v128_pack_s32_s16),
+                      MAP(v128_pack_s16_u8),
+                      MAP(v128_pack_s16_s8),
+                      MAP(v128_or),
+                      MAP(v128_xor),
+                      MAP(v128_and),
+                      MAP(v128_andn),
+                      MAP(v128_mullo_s16),
+                      MAP(v128_mulhi_s16),
+                      MAP(v128_mullo_s32),
+                      MAP(v128_madd_s16),
+                      MAP(v128_madd_us8),
+                      MAP(v128_avg_u8),
+                      MAP(v128_rdavg_u8),
+                      MAP(v128_avg_u16),
+                      MAP(v128_min_u8),
+                      MAP(v128_max_u8),
+                      MAP(v128_min_s8),
+                      MAP(v128_max_s8),
+                      MAP(v128_min_s16),
+                      MAP(v128_max_s16),
+                      MAP(v128_cmpgt_s8),
+                      MAP(v128_cmplt_s8),
+                      MAP(v128_cmpeq_8),
+                      MAP(v128_cmpgt_s16),
+                      MAP(v128_cmpeq_16),
+                      MAP(v128_cmplt_s16),
+                      MAP(v128_shuffle_8),
+                      MAP(imm_v128_align<1>),
+                      MAP(imm_v128_align<2>),
+                      MAP(imm_v128_align<3>),
+                      MAP(imm_v128_align<4>),
+                      MAP(imm_v128_align<5>),
+                      MAP(imm_v128_align<6>),
+                      MAP(imm_v128_align<7>),
+                      MAP(imm_v128_align<8>),
+                      MAP(imm_v128_align<9>),
+                      MAP(imm_v128_align<10>),
+                      MAP(imm_v128_align<11>),
+                      MAP(imm_v128_align<12>),
+                      MAP(imm_v128_align<13>),
+                      MAP(imm_v128_align<14>),
+                      MAP(imm_v128_align<15>),
+                      MAP(v128_abs_s8),
+                      MAP(v128_abs_s16),
+                      MAP(v128_padd_s16),
+                      MAP(v128_unpacklo_u16_s32),
+                      MAP(v128_unpacklo_s16_s32),
+                      MAP(v128_unpackhi_u16_s32),
+                      MAP(v128_unpackhi_s16_s32),
+                      MAP(imm_v128_shr_n_byte<1>),
+                      MAP(imm_v128_shr_n_byte<2>),
+                      MAP(imm_v128_shr_n_byte<3>),
+                      MAP(imm_v128_shr_n_byte<4>),
+                      MAP(imm_v128_shr_n_byte<5>),
+                      MAP(imm_v128_shr_n_byte<6>),
+                      MAP(imm_v128_shr_n_byte<7>),
+                      MAP(imm_v128_shr_n_byte<8>),
+                      MAP(imm_v128_shr_n_byte<9>),
+                      MAP(imm_v128_shr_n_byte<10>),
+                      MAP(imm_v128_shr_n_byte<11>),
+                      MAP(imm_v128_shr_n_byte<12>),
+                      MAP(imm_v128_shr_n_byte<13>),
+                      MAP(imm_v128_shr_n_byte<14>),
+                      MAP(imm_v128_shr_n_byte<15>),
+                      MAP(imm_v128_shl_n_byte<1>),
+                      MAP(imm_v128_shl_n_byte<2>),
+                      MAP(imm_v128_shl_n_byte<3>),
+                      MAP(imm_v128_shl_n_byte<4>),
+                      MAP(imm_v128_shl_n_byte<5>),
+                      MAP(imm_v128_shl_n_byte<6>),
+                      MAP(imm_v128_shl_n_byte<7>),
+                      MAP(imm_v128_shl_n_byte<8>),
+                      MAP(imm_v128_shl_n_byte<9>),
+                      MAP(imm_v128_shl_n_byte<10>),
+                      MAP(imm_v128_shl_n_byte<11>),
+                      MAP(imm_v128_shl_n_byte<12>),
+                      MAP(imm_v128_shl_n_byte<13>),
+                      MAP(imm_v128_shl_n_byte<14>),
+                      MAP(imm_v128_shl_n_byte<15>),
+                      MAP(imm_v128_shl_n_8<1>),
+                      MAP(imm_v128_shl_n_8<2>),
+                      MAP(imm_v128_shl_n_8<3>),
+                      MAP(imm_v128_shl_n_8<4>),
+                      MAP(imm_v128_shl_n_8<5>),
+                      MAP(imm_v128_shl_n_8<6>),
+                      MAP(imm_v128_shl_n_8<7>),
+                      MAP(imm_v128_shr_n_u8<1>),
+                      MAP(imm_v128_shr_n_u8<2>),
+                      MAP(imm_v128_shr_n_u8<3>),
+                      MAP(imm_v128_shr_n_u8<4>),
+                      MAP(imm_v128_shr_n_u8<5>),
+                      MAP(imm_v128_shr_n_u8<6>),
+                      MAP(imm_v128_shr_n_u8<7>),
+                      MAP(imm_v128_shr_n_s8<1>),
+                      MAP(imm_v128_shr_n_s8<2>),
+                      MAP(imm_v128_shr_n_s8<3>),
+                      MAP(imm_v128_shr_n_s8<4>),
+                      MAP(imm_v128_shr_n_s8<5>),
+                      MAP(imm_v128_shr_n_s8<6>),
+                      MAP(imm_v128_shr_n_s8<7>),
+                      MAP(imm_v128_shl_n_16<1>),
+                      MAP(imm_v128_shl_n_16<2>),
+                      MAP(imm_v128_shl_n_16<4>),
+                      MAP(imm_v128_shl_n_16<6>),
+                      MAP(imm_v128_shl_n_16<8>),
+                      MAP(imm_v128_shl_n_16<10>),
+                      MAP(imm_v128_shl_n_16<12>),
+                      MAP(imm_v128_shl_n_16<14>),
+                      MAP(imm_v128_shr_n_u16<1>),
+                      MAP(imm_v128_shr_n_u16<2>),
+                      MAP(imm_v128_shr_n_u16<4>),
+                      MAP(imm_v128_shr_n_u16<6>),
+                      MAP(imm_v128_shr_n_u16<8>),
+                      MAP(imm_v128_shr_n_u16<10>),
+                      MAP(imm_v128_shr_n_u16<12>),
+                      MAP(imm_v128_shr_n_u16<14>),
+                      MAP(imm_v128_shr_n_s16<1>),
+                      MAP(imm_v128_shr_n_s16<2>),
+                      MAP(imm_v128_shr_n_s16<4>),
+                      MAP(imm_v128_shr_n_s16<6>),
+                      MAP(imm_v128_shr_n_s16<8>),
+                      MAP(imm_v128_shr_n_s16<10>),
+                      MAP(imm_v128_shr_n_s16<12>),
+                      MAP(imm_v128_shr_n_s16<14>),
+                      MAP(imm_v128_shl_n_32<1>),
+                      MAP(imm_v128_shl_n_32<4>),
+                      MAP(imm_v128_shl_n_32<8>),
+                      MAP(imm_v128_shl_n_32<12>),
+                      MAP(imm_v128_shl_n_32<16>),
+                      MAP(imm_v128_shl_n_32<20>),
+                      MAP(imm_v128_shl_n_32<24>),
+                      MAP(imm_v128_shl_n_32<28>),
+                      MAP(imm_v128_shr_n_u32<1>),
+                      MAP(imm_v128_shr_n_u32<4>),
+                      MAP(imm_v128_shr_n_u32<8>),
+                      MAP(imm_v128_shr_n_u32<12>),
+                      MAP(imm_v128_shr_n_u32<16>),
+                      MAP(imm_v128_shr_n_u32<20>),
+                      MAP(imm_v128_shr_n_u32<24>),
+                      MAP(imm_v128_shr_n_u32<28>),
+                      MAP(imm_v128_shr_n_s32<1>),
+                      MAP(imm_v128_shr_n_s32<4>),
+                      MAP(imm_v128_shr_n_s32<8>),
+                      MAP(imm_v128_shr_n_s32<12>),
+                      MAP(imm_v128_shr_n_s32<16>),
+                      MAP(imm_v128_shr_n_s32<20>),
+                      MAP(imm_v128_shr_n_s32<24>),
+                      MAP(imm_v128_shr_n_s32<28>),
+                      MAP(v128_from_v64),
+                      MAP(v128_zip_8),
+                      MAP(v128_zip_16),
+                      MAP(v128_zip_32),
+                      MAP(v128_mul_s16),
+                      MAP(v128_unpack_u8_s16),
+                      MAP(v128_unpack_s8_s16),
+                      MAP(v128_unpack_u16_s32),
+                      MAP(v128_unpack_s16_s32),
+                      MAP(v128_shl_8),
+                      MAP(v128_shr_u8),
+                      MAP(v128_shr_s8),
+                      MAP(v128_shl_16),
+                      MAP(v128_shr_u16),
+                      MAP(v128_shr_s16),
+                      MAP(v128_shl_32),
+                      MAP(v128_shr_u32),
+                      MAP(v128_shr_s32),
+                      MAP(v128_hadd_u8),
+                      MAP(v128_dotp_s16),
+                      MAP(v128_low_u32),
+                      MAP(v128_low_v64),
+                      MAP(v128_high_v64),
+                      MAP(v128_from_64),
+                      MAP(v128_from_32),
+                      MAP(v128_zero),
+                      MAP(v128_dup_8),
+                      MAP(v128_dup_16),
+                      MAP(v128_dup_32),
+                      MAP(v128_unpacklo_u8_s16),
+                      MAP(v128_unpackhi_u8_s16),
+                      MAP(v128_unpacklo_s8_s16),
+                      MAP(v128_unpackhi_s8_s16),
+                      MAP(u32_load_unaligned),
+                      MAP(u32_store_unaligned),
+                      MAP(v64_load_unaligned),
+                      MAP(v64_store_unaligned),
+                      MAP(v128_load_unaligned),
+                      MAP(v128_store_unaligned),
+                      { NULL, NULL, NULL } };
+#undef MAP
+
+// Map reference functions to machine tuned functions. Since the
+// functions depend on machine tuned types, the non-machine tuned
+// instantiations of the test can't refer to these functions directly,
+// so we refer to them by name and do the mapping here.
+void Map(const char *name, fptr *ref, fptr *simd) {
+  unsigned int i;
+  for (i = 0; m[i].name && strcmp(name, m[i].name); i++) {
+  }
+
+  *ref = m[i].ref;
+  *simd = m[i].simd;
+}
+
+// Used for printing errors in TestSimd1Arg and TestSimd2Args
+std::string Print(const uint8_t *a, int size) {
+  std::string text = "0x";
+  for (int i = 0; i < size; i++) {
+    const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i];
+    // Same as snprintf(..., ..., "%02x", c)
+    text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10);
+    text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10);
+  }
+
+  return text;
+}
+
+// Used in TestSimd1Arg and TestSimd2Args to restrict argument ranges
+void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
+  switch (maskwidth) {
+    case 0: {
+      break;
+    }
+    case 8: {
+      for (int i = 0; i < size; i++) s[i] &= mask;
+      break;
+    }
+    case 16: {
+      uint16_t *t = reinterpret_cast<uint16_t *>(s);
+      assert(!(reinterpret_cast<uintptr_t>(s) & 1));
+      for (int i = 0; i < size / 2; i++) t[i] &= mask;
+      break;
+    }
+    case 32: {
+      uint32_t *t = reinterpret_cast<uint32_t *>(s);
+      assert(!(reinterpret_cast<uintptr_t>(s) & 3));
+      for (int i = 0; i < size / 4; i++) t[i] &= mask;
+      break;
+    }
+    case 64: {
+      uint64_t *t = reinterpret_cast<uint64_t *>(s);
+      assert(!(reinterpret_cast<uintptr_t>(s) & 7));
+      for (int i = 0; i < size / 8; i++) t[i] &= mask;
+      break;
+    }
+    default: {
+      FAIL() << "Unsupported mask width";
+      break;
+    }
+  }
+}
+
+// We need some extra load/store functions
+void u64_store_aligned(void *p, uint64_t a) {
+  v64_store_aligned(p, v64_from_64(a));
+}
+void s32_store_aligned(void *p, int32_t a) {
+  u32_store_aligned(p, static_cast<uint32_t>(a));
+}
+void s64_store_aligned(void *p, int64_t a) {
+  v64_store_aligned(p, v64_from_64(static_cast<uint64_t>(a)));
+}
+
+void c_u64_store_aligned(void *p, uint64_t a) {
+  c_v64_store_aligned(p, c_v64_from_64(a));
+}
+
+void c_s32_store_aligned(void *p, int32_t a) {
+  c_u32_store_aligned(p, static_cast<uint32_t>(a));
+}
+
+void c_s64_store_aligned(void *p, int64_t a) {
+  c_v64_store_aligned(p, c_v64_from_64(static_cast<uint64_t>(a)));
+}
+
+uint64_t u64_load_aligned(const void *p) {
+  return v64_u64(v64_load_aligned(p));
+}
+uint16_t u16_load_aligned(const void *p) {
+  return *(reinterpret_cast<const uint16_t *>(p));
+}
+uint8_t u8_load_aligned(const void *p) {
+  return *(reinterpret_cast<const uint8_t *>(p));
+}
+
+uint64_t c_u64_load_aligned(const void *p) {
+  return c_v64_u64(c_v64_load_aligned(p));
+}
+uint16_t c_u16_load_aligned(const void *p) {
+  return *(reinterpret_cast<const uint16_t *>(p));
+}
+uint8_t c_u8_load_aligned(const void *p) {
+  return *(reinterpret_cast<const uint8_t *>(p));
+}
+
+// CompareSimd1Arg and CompareSimd2Args compare intrinsics taking 1 or
+// 2 arguments respectively with their corresponding C reference.
+// Ideally, the loads and stores should have gone into the template
+// parameter list, but v64 and v128 could be typedef'ed to the same
+// type (which is the case on x86) and then we can't instantiate both
+// v64 and v128, so the function return and argument types, including
+// the always differing types in the C equivalent are used instead.
+// The function arguments must be void pointers and then go through a
+// cast to avoid matching errors in the branches eliminated by the
+// typeid tests in the calling function.
+template <typename Ret, typename Arg, typename CRet, typename CArg>
+int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store,
+                    fptr c_load, fptr c_simd, void *ref_d, const void *a) {
+  void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
+  Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load;
+  Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd;
+  void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
+  CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load;
+  CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd;
+
+  // Call reference and intrinsic
+  my_c_store(ref_d, my_c_simd(my_c_load(a)));
+  my_store(d, my_simd(my_load(a)));
+
+  // Compare results
+  return memcmp(ref_d, d, sizeof(CRet));
+}
+
+template <typename Ret, typename Arg1, typename Arg2, typename CRet,
+          typename CArg1, typename CArg2>
+int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d,
+                     fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd,
+                     void *ref_d, const void *a, const void *b) {
+  void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
+  Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
+  Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
+  Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd;
+  void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
+  CArg1 (*const my_c_load1)(const void *) =
+      (CArg1(*const)(const void *))c_load1;
+  CArg2 (*const my_c_load2)(const void *) =
+      (CArg2(*const)(const void *))c_load2;
+  CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd;
+
+  // Call reference and intrinsic
+  my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b)));
+  my_store(d, my_simd(my_load1(a), my_load2(b)));
+
+  // Compare results
+  return memcmp(ref_d, d, sizeof(CRet));
+}
+
+}  // namespace
+
+template <typename CRet, typename CArg>
+void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                  const char *name) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  fptr ref_simd;
+  fptr simd;
+  int error = 0;
+  DECLARE_ALIGNED(32, uint8_t, s[sizeof(CArg)]);
+  DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
+  DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  Map(name, &ref_simd, &simd);
+  if (simd == NULL || ref_simd == NULL) {
+    FAIL() << "Internal error: Unknown intrinsic function " << name;
+  }
+  for (unsigned int count = 0;
+       count < iterations && !error && !testing::Test::HasFailure(); count++) {
+    for (unsigned int c = 0; c < sizeof(CArg); c++) s[c] = rnd.Rand8();
+
+    if (maskwidth) {
+      SetMask(s, sizeof(CArg), mask, maskwidth);
+    }
+
+    if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) {
+      // V64_V64
+      error = CompareSimd1Arg<v64, v64, CRet, CArg>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(uint8_t)) {
+      // V64_U8
+      error = CompareSimd1Arg<v64, uint8_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(u8_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(uint16_t)) {
+      // V64_U16
+      error = CompareSimd1Arg<v64, uint16_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(u16_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(uint32_t)) {
+      // V64_U32
+      error = CompareSimd1Arg<v64, uint32_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // U64_V64
+      error = CompareSimd1Arg<uint64_t, v64, CRet, CArg>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(int64_t) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // S64_V64
+      error = CompareSimd1Arg<int64_t, v64, CRet, CArg>(
+          reinterpret_cast<fptr>(s64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // U32_V64
+      error = CompareSimd1Arg<uint32_t, v64, CRet, CArg>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(int32_t) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // S32_V64
+      error = CompareSimd1Arg<int32_t, v64, CRet, CArg>(
+          reinterpret_cast<fptr>(s32_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s32_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // U32_V128
+      error = CompareSimd1Arg<uint32_t, v128, CRet, CArg>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // U64_V128
+      error = CompareSimd1Arg<uint64_t, v128, CRet, CArg>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // V64_V128
+      error = CompareSimd1Arg<v64, v128, CRet, CArg>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // V128_V128
+      error = CompareSimd1Arg<v128, v128, CRet, CArg>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(c_v64)) {
+      // V128_V64
+      error = CompareSimd1Arg<v128, v64, CRet, CArg>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(uint8_t)) {
+      // V128_U8
+      error = CompareSimd1Arg<v128, uint8_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u8_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(uint16_t)) {
+      // V128_U16
+      error = CompareSimd1Arg<v128, uint16_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u16_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(uint32_t)) {
+      // V128_U32
+      error = CompareSimd1Arg<v128, uint32_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else {
+      FAIL() << "Internal error: Unknown intrinsic function "
+             << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
+             << ")";
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
+                      << Print(s, sizeof(s)) << ") -> " << Print(d, sizeof(d))
+                      << " (simd), " << Print(ref_d, sizeof(ref_d)) << " (ref)";
+}
+
+template <typename CRet, typename CArg1, typename CArg2>
+void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                   const char *name) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  fptr ref_simd;
+  fptr simd;
+  int error = 0;
+  DECLARE_ALIGNED(32, uint8_t, s1[sizeof(CArg1)]);
+  DECLARE_ALIGNED(32, uint8_t, s2[sizeof(CArg2)]);
+  DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
+  DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  Map(name, &ref_simd, &simd);
+  if (simd == NULL || ref_simd == NULL) {
+    FAIL() << "Internal error: Unknown intrinsic function " << name;
+  }
+
+  for (unsigned int count = 0;
+       count < iterations && !error && !testing::Test::HasFailure(); count++) {
+    for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
+
+    for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
+
+    if (maskwidth) SetMask(s2, sizeof(CArg2), mask, maskwidth);
+
+    if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) &&
+        typeid(CArg2) == typeid(c_v64)) {
+      // V64_V64V64
+      error = CompareSimd2Args<v64, v64, v64, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg1) == typeid(uint32_t) &&
+               typeid(CArg2) == typeid(uint32_t)) {
+      // V64_U32U32
+      error = CompareSimd2Args<v64, uint32_t, uint32_t, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg1) == typeid(c_v64) &&
+               typeid(CArg2) == typeid(c_v64)) {
+      // U32_V64V64
+      error = CompareSimd2Args<uint32_t, v64, v64, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(int64_t) &&
+               typeid(CArg1) == typeid(c_v64) &&
+               typeid(CArg2) == typeid(c_v64)) {
+      // S64_V64V64
+      error = CompareSimd2Args<int64_t, v64, v64, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(s64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg1) == typeid(c_v64) &&
+               typeid(CArg2) == typeid(uint32_t)) {
+      // V64_V64U32
+      error = CompareSimd2Args<v64, v64, uint32_t, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // V128_V128V128
+      error = CompareSimd2Args<v128, v128, v128, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // U32_V128V128
+      error = CompareSimd2Args<uint32_t, v128, v128, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(int64_t) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // S64_V128V128
+      error = CompareSimd2Args<int64_t, v128, v128, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(s64_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_s64_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg1) == typeid(uint64_t) &&
+               typeid(CArg2) == typeid(uint64_t)) {
+      // V128_U64U64
+      error = CompareSimd2Args<v128, uint64_t, uint64_t, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u64_load_aligned),
+          reinterpret_cast<fptr>(u64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u64_load_aligned),
+          reinterpret_cast<fptr>(c_u64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg1) == typeid(c_v64) &&
+               typeid(CArg2) == typeid(c_v64)) {
+      // V128_V64V64
+      error = CompareSimd2Args<v128, v64, v64, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned),
+          reinterpret_cast<fptr>(v64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(c_v64_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(uint32_t)) {
+      // V128_V128U32
+      error = CompareSimd2Args<v128, v128, uint32_t, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else {
+      FAIL() << "Internal error: Unknown intrinsic function "
+             << typeid(CRet).name() << " " << name << "("
+             << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")";
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
+                      << Print(s1, sizeof(s1)) << ", " << Print(s2, sizeof(s2))
+                      << ") -> " << Print(d, sizeof(d)) << " (simd), "
+                      << Print(ref_d, sizeof(ref_d)) << " (ref)";
+}
+
+// Instantiations to make the functions callable from another files
+template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<c_v64, uint16_t>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<c_v64, uint32_t>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
+                                         const char *);
+template void TestSimd1Arg<uint32_t, c_v64>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<int32_t, c_v64>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd2Args<c_v64, uint32_t, uint32_t>(uint32_t, uint32_t,
+                                                       uint32_t, const char *);
+template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
+                                                 const char *);
+template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
+                                                   const char *);
+template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd1Arg<c_v128, c_v128>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<c_v128, uint8_t>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t,
+                                          const char *);
+template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<uint64_t, c_v128>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v64, c_v128>(uint32_t, uint32_t, uint32_t,
+                                          const char *);
+template void TestSimd2Args<c_v128, c_v128, c_v128>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd2Args<c_v128, c_v128, uint32_t>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t,
+                                                        uint32_t, const char *);
+template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
+                                                  const char *);
+template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t,
+                                                     uint32_t, const char *);
+template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+
+}  // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/simd_cmp_neon.cc b/third_party/aom/test/simd_cmp_neon.cc
new file mode 100644
index 000000000..c8004cc8b
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_neon.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if defined(__OPTIMIZE__) && __OPTIMIZE__
+#define ARCH NEON
+#define ARCH_POSTFIX(name) name##_neon
+#define SIMD_NAMESPACE simd_test_neon
+#include "./simd_cmp_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_cmp_sse2.cc b/third_party/aom/test/simd_cmp_sse2.cc
new file mode 100644
index 000000000..67cb43c10
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_sse2.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSE2
+#define ARCH_POSTFIX(name) name##_sse2
+#define SIMD_NAMESPACE simd_test_sse2
+#include "./simd_cmp_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_cmp_sse4.cc b/third_party/aom/test/simd_cmp_sse4.cc
new file mode 100644
index 000000000..ba826d898
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_sse4.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSE4_1
+#define ARCH_POSTFIX(name) name##_sse4_1
+#define SIMD_NAMESPACE simd_test_sse4_1
+#include "./simd_cmp_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_cmp_ssse3.cc b/third_party/aom/test/simd_cmp_ssse3.cc
new file mode 100644
index 000000000..a6c7000fd
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_ssse3.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSSE3
+#define ARCH_POSTFIX(name) name##_ssse3
+#define SIMD_NAMESPACE simd_test_ssse3
+#include "./simd_cmp_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_impl.h b/third_party/aom/test/simd_impl.h
new file mode 100644
index 000000000..5cfda675d
--- /dev/null
+++ b/third_party/aom/test/simd_impl.h
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#define SIMD_CHECK 1
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "aom_dsp/aom_simd_inline.h"
+#include "aom_dsp/simd/v128_intrinsics_c.h"
+
+namespace SIMD_NAMESPACE {
+
+template <typename param_signature>
+class TestIntrinsic : public ::testing::TestWithParam<param_signature> {
+ public:
+  virtual ~TestIntrinsic() {}
+  virtual void SetUp() {
+    mask = std::tr1::get<0>(this->GetParam());
+    maskwidth = std::tr1::get<1>(this->GetParam());
+    name = std::tr1::get<2>(this->GetParam());
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  uint32_t mask, maskwidth;
+  const char *name;
+};
+
+// Create one typedef for each function signature
+#define TYPEDEF_SIMD(name)                                                  \
+  typedef TestIntrinsic<std::tr1::tuple<uint32_t, uint32_t, const char *> > \
+      ARCH_POSTFIX(name)
+
+TYPEDEF_SIMD(V64_U8);
+TYPEDEF_SIMD(V64_U16);
+TYPEDEF_SIMD(V64_U32);
+TYPEDEF_SIMD(V64_V64);
+TYPEDEF_SIMD(U32_V64);
+TYPEDEF_SIMD(S32_V64);
+TYPEDEF_SIMD(U64_V64);
+TYPEDEF_SIMD(S64_V64);
+TYPEDEF_SIMD(V64_U32U32);
+TYPEDEF_SIMD(V64_V64V64);
+TYPEDEF_SIMD(S64_V64V64);
+TYPEDEF_SIMD(V64_V64U32);
+TYPEDEF_SIMD(U32_V64V64);
+TYPEDEF_SIMD(V128_V64);
+TYPEDEF_SIMD(V128_V128);
+TYPEDEF_SIMD(U32_V128);
+TYPEDEF_SIMD(U64_V128);
+TYPEDEF_SIMD(V64_V128);
+TYPEDEF_SIMD(V128_U8);
+TYPEDEF_SIMD(V128_U16);
+TYPEDEF_SIMD(V128_U32);
+TYPEDEF_SIMD(V128_U64U64);
+TYPEDEF_SIMD(V128_V64V64);
+TYPEDEF_SIMD(V128_V128V128);
+TYPEDEF_SIMD(S64_V128V128);
+TYPEDEF_SIMD(V128_V128U32);
+TYPEDEF_SIMD(U32_V128V128);
+
+// Google Test allows up to 50 tests per case, so split the largest
+typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2);
+typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2);
+typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2);
+typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3);
+typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2);
+
+// These functions are machine tuned located elsewhere
+template <typename c_ret, typename c_arg>
+void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                  const char *name);
+
+template <typename c_ret, typename c_arg1, typename c_arg2>
+void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                   const char *name);
+
+const int kIterations = 65536;
+
+// Add a macro layer since TEST_P will quote the name so we need to
+// expand it first with the prefix.
+#define MY_TEST_P(name, test) TEST_P(name, test)
+
+MY_TEST_P(ARCH_POSTFIX(V64_U8), TestIntrinsics) {
+  TestSimd1Arg<c_v64, uint8_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_U16), TestIntrinsics) {
+  TestSimd1Arg<c_v64, uint16_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_U32), TestIntrinsics) {
+  TestSimd1Arg<c_v64, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V64), TestIntrinsics) {
+  TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U64_V64), TestIntrinsics) {
+  TestSimd1Arg<uint64_t, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S64_V64), TestIntrinsics) {
+  TestSimd1Arg<int64_t, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V64), TestIntrinsics) {
+  TestSimd1Arg<uint32_t, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S32_V64), TestIntrinsics) {
+  TestSimd1Arg<int32_t, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_U32U32), TestIntrinsics) {
+  TestSimd2Args<c_v64, uint32_t, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V64V64), TestIntrinsics) {
+  TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S64_V64V64), TestIntrinsics) {
+  TestSimd2Args<int64_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V64V64), TestIntrinsics) {
+  TestSimd2Args<uint32_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V64U32), TestIntrinsics) {
+  TestSimd2Args<c_v64, c_v64, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+// Google Test allows up to 50 tests per case, so split the largest
+MY_TEST_P(ARCH_POSTFIX(V64_V64_Part2), TestIntrinsics) {
+  TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V64V64_Part2), TestIntrinsics) {
+  TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V128), TestIntrinsics) {
+  TestSimd1Arg<uint32_t, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U64_V128), TestIntrinsics) {
+  TestSimd1Arg<uint64_t, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V128), TestIntrinsics) {
+  TestSimd1Arg<c_v64, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U8), TestIntrinsics) {
+  TestSimd1Arg<c_v128, uint8_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U16), TestIntrinsics) {
+  TestSimd1Arg<c_v128, uint16_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) {
+  TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) {
+  TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) {
+  TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) {
+  TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_U64U64), TestIntrinsics) {
+  TestSimd2Args<c_v128, uint64_t, uint64_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V64V64), TestIntrinsics) {
+  TestSimd2Args<c_v128, c_v64, c_v64>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128U32), TestIntrinsics) {
+  TestSimd2Args<c_v128, c_v128, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128V128_Part2), TestIntrinsics) {
+  TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128_Part2), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+// Add a macro layer since INSTANTIATE_TEST_CASE_P will quote the name
+// so we need to expand it first with the prefix
+#define INSTANTIATE(name, type, ...) \
+  INSTANTIATE_TEST_CASE_P(name, type, ::testing::Values(__VA_ARGS__))
+
+#define SIMD_TUPLE(name, mask, maskwidth) \
+  std::tr1::make_tuple(mask, maskwidth, static_cast<const char *>(#name))
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64),
+            (SIMD_TUPLE(v64_sad_u8, 0U, 0U), SIMD_TUPLE(v64_ssd_u8, 0U, 0U)));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V64_V64V64), SIMD_TUPLE(v64_add_8, 0U, 0U),
+    SIMD_TUPLE(v64_add_16, 0U, 0U), SIMD_TUPLE(v64_sadd_s16, 0U, 0U),
+    SIMD_TUPLE(v64_add_32, 0U, 0U), SIMD_TUPLE(v64_sub_8, 0U, 0U),
+    SIMD_TUPLE(v64_ssub_u8, 0U, 0U), SIMD_TUPLE(v64_ssub_s8, 0U, 0U),
+    SIMD_TUPLE(v64_sub_16, 0U, 0U), SIMD_TUPLE(v64_ssub_s16, 0U, 0U),
+    SIMD_TUPLE(v64_ssub_u16, 0U, 0U), SIMD_TUPLE(v64_sub_32, 0U, 0U),
+    SIMD_TUPLE(v64_ziplo_8, 0U, 0U), SIMD_TUPLE(v64_ziphi_8, 0U, 0U),
+    SIMD_TUPLE(v64_ziplo_16, 0U, 0U), SIMD_TUPLE(v64_ziphi_16, 0U, 0U),
+    SIMD_TUPLE(v64_ziplo_32, 0U, 0U), SIMD_TUPLE(v64_ziphi_32, 0U, 0U),
+    SIMD_TUPLE(v64_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v64_pack_s16_u8, 0U, 0U),
+    SIMD_TUPLE(v64_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v64_unziphi_8, 0U, 0U),
+    SIMD_TUPLE(v64_unziplo_8, 0U, 0U), SIMD_TUPLE(v64_unziphi_16, 0U, 0U),
+    SIMD_TUPLE(v64_unziplo_16, 0U, 0U), SIMD_TUPLE(v64_or, 0U, 0U),
+    SIMD_TUPLE(v64_xor, 0U, 0U), SIMD_TUPLE(v64_and, 0U, 0U),
+    SIMD_TUPLE(v64_andn, 0U, 0U), SIMD_TUPLE(v64_mullo_s16, 0U, 0U),
+    SIMD_TUPLE(v64_mulhi_s16, 0U, 0U), SIMD_TUPLE(v64_mullo_s32, 0U, 0U),
+    SIMD_TUPLE(v64_madd_s16, 0U, 0U), SIMD_TUPLE(v64_madd_us8, 0U, 0U),
+    SIMD_TUPLE(v64_avg_u8, 0U, 0U), SIMD_TUPLE(v64_rdavg_u8, 0U, 0U),
+    SIMD_TUPLE(v64_avg_u16, 0U, 0U), SIMD_TUPLE(v64_min_u8, 0U, 0U),
+    SIMD_TUPLE(v64_max_u8, 0U, 0U), SIMD_TUPLE(v64_min_s8, 0U, 0U),
+    SIMD_TUPLE(v64_max_s8, 0U, 0U), SIMD_TUPLE(v64_min_s16, 0U, 0U),
+    SIMD_TUPLE(v64_max_s16, 0U, 0U), SIMD_TUPLE(v64_cmpgt_s8, 0U, 0U),
+    SIMD_TUPLE(v64_cmplt_s8, 0U, 0U), SIMD_TUPLE(v64_cmpeq_8, 0U, 0U),
+    SIMD_TUPLE(v64_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v64_cmplt_s16, 0U, 0U),
+    SIMD_TUPLE(v64_cmpeq_16, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U),
+    SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U),
+    SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U),
+    SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U),
+    SIMD_TUPLE(imm_v64_align<7>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s8, 0U, 0U),
+            SIMD_TUPLE(v64_abs_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_s16_s32, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2),
+            SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_u32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shr_n_s32<28>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64U32), SIMD_TUPLE(v64_shl_8, 7U, 32U),
+            SIMD_TUPLE(v64_shr_u8, 7U, 32U), SIMD_TUPLE(v64_shr_s8, 7U, 32U),
+            SIMD_TUPLE(v64_shl_16, 15U, 32U), SIMD_TUPLE(v64_shr_u16, 15U, 32U),
+            SIMD_TUPLE(v64_shr_s16, 15U, 32U), SIMD_TUPLE(v64_shl_32, 31U, 32U),
+            SIMD_TUPLE(v64_shr_u32, 31U, 32U),
+            SIMD_TUPLE(v64_shr_s32, 31U, 32U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V64), SIMD_TUPLE(v64_hadd_u8, 0U, 0U),
+            SIMD_TUPLE(v64_u64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64), SIMD_TUPLE(v64_hadd_s16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64), SIMD_TUPLE(v64_low_u32, 0U, 0U),
+            SIMD_TUPLE(v64_high_u32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S32_V64), SIMD_TUPLE(v64_low_s32, 0U, 0U),
+            SIMD_TUPLE(v64_high_s32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64V64), SIMD_TUPLE(v64_dotp_s16, 0U, 0U),
+            SIMD_TUPLE(v64_dotp_su8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U8), SIMD_TUPLE(v64_dup_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U16), SIMD_TUPLE(v64_dup_16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U),
+            SIMD_TUPLE(v128_ssd_u8, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U),
+    SIMD_TUPLE(v128_add_16, 0U, 0U), SIMD_TUPLE(v128_sadd_s16, 0U, 0U),
+    SIMD_TUPLE(v128_add_32, 0U, 0U), SIMD_TUPLE(v128_sub_8, 0U, 0U),
+    SIMD_TUPLE(v128_ssub_u8, 0U, 0U), SIMD_TUPLE(v128_ssub_s8, 0U, 0U),
+    SIMD_TUPLE(v128_sub_16, 0U, 0U), SIMD_TUPLE(v128_ssub_s16, 0U, 0U),
+    SIMD_TUPLE(v128_ssub_u16, 0U, 0U), SIMD_TUPLE(v128_sub_32, 0U, 0U),
+    SIMD_TUPLE(v128_ziplo_8, 0U, 0U), SIMD_TUPLE(v128_ziphi_8, 0U, 0U),
+    SIMD_TUPLE(v128_ziplo_16, 0U, 0U), SIMD_TUPLE(v128_ziphi_16, 0U, 0U),
+    SIMD_TUPLE(v128_ziplo_32, 0U, 0U), SIMD_TUPLE(v128_ziphi_32, 0U, 0U),
+    SIMD_TUPLE(v128_ziplo_64, 0U, 0U), SIMD_TUPLE(v128_ziphi_64, 0U, 0U),
+    SIMD_TUPLE(v128_unziphi_8, 0U, 0U), SIMD_TUPLE(v128_unziplo_8, 0U, 0U),
+    SIMD_TUPLE(v128_unziphi_16, 0U, 0U), SIMD_TUPLE(v128_unziplo_16, 0U, 0U),
+    SIMD_TUPLE(v128_unziphi_32, 0U, 0U), SIMD_TUPLE(v128_unziplo_32, 0U, 0U),
+    SIMD_TUPLE(v128_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v128_pack_s16_u8, 0U, 0U),
+    SIMD_TUPLE(v128_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v128_or, 0U, 0U),
+    SIMD_TUPLE(v128_xor, 0U, 0U), SIMD_TUPLE(v128_and, 0U, 0U),
+    SIMD_TUPLE(v128_andn, 0U, 0U), SIMD_TUPLE(v128_mullo_s16, 0U, 0U),
+    SIMD_TUPLE(v128_mulhi_s16, 0U, 0U), SIMD_TUPLE(v128_mullo_s32, 0U, 0U),
+    SIMD_TUPLE(v128_madd_s16, 0U, 0U), SIMD_TUPLE(v128_madd_us8, 0U, 0U),
+    SIMD_TUPLE(v128_avg_u8, 0U, 0U), SIMD_TUPLE(v128_rdavg_u8, 0U, 0U),
+    SIMD_TUPLE(v128_avg_u16, 0U, 0U), SIMD_TUPLE(v128_min_u8, 0U, 0U),
+    SIMD_TUPLE(v128_max_u8, 0U, 0U), SIMD_TUPLE(v128_min_s8, 0U, 0U),
+    SIMD_TUPLE(v128_max_s8, 0U, 0U), SIMD_TUPLE(v128_min_s16, 0U, 0U),
+    SIMD_TUPLE(v128_max_s16, 0U, 0U), SIMD_TUPLE(v128_cmpgt_s8, 0U, 0U),
+    SIMD_TUPLE(v128_cmplt_s8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_8, 0U, 0U),
+    SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
+            SIMD_TUPLE(v128_cmpeq_16, 0U, 0U),
+            SIMD_TUPLE(v128_cmplt_s16, 0U, 0U),
+            SIMD_TUPLE(v128_shuffle_8, 15U, 8U),
+            SIMD_TUPLE(imm_v128_align<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_align<15>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U),
+            SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_byte<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_byte<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2),
+            SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3),
+            SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U),
+            SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U),
+            SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64U64), SIMD_TUPLE(v128_from_64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64),
+            SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U),
+            SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U),
+            SIMD_TUPLE(v128_shl_16, 15U, 32U),
+            SIMD_TUPLE(v128_shr_u16, 15U, 32U),
+            SIMD_TUPLE(v128_shr_s16, 15U, 32U),
+            SIMD_TUPLE(v128_shl_32, 31U, 32U),
+            SIMD_TUPLE(v128_shr_u32, 31U, 32U),
+            SIMD_TUPLE(v128_shr_s32, 31U, 32U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V128), SIMD_TUPLE(v128_low_v64, 0U, 0U),
+            SIMD_TUPLE(v128_high_v64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U8), SIMD_TUPLE(v128_dup_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128),
+            SIMD_TUPLE(v128_dotp_s16, 0U, 0U));
+
+}  // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/simd_neon_test.cc b/third_party/aom/test/simd_neon_test.cc
new file mode 100644
index 000000000..0565fb4e2
--- /dev/null
+++ b/third_party/aom/test/simd_neon_test.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if defined(__OPTIMIZE__) && __OPTIMIZE__
+#define ARCH NEON
+#define ARCH_POSTFIX(name) name##_neon
+#define SIMD_NAMESPACE simd_test_neon
+#include "./simd_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_sse2_test.cc b/third_party/aom/test/simd_sse2_test.cc
new file mode 100644
index 000000000..a0b49d77e
--- /dev/null
+++ b/third_party/aom/test/simd_sse2_test.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSE2
+#define ARCH_POSTFIX(name) name##_sse2
+#define SIMD_NAMESPACE simd_test_sse2
+#include "./simd_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_sse4_test.cc b/third_party/aom/test/simd_sse4_test.cc
new file mode 100644
index 000000000..73c96427f
--- /dev/null
+++ b/third_party/aom/test/simd_sse4_test.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSE4_1
+#define ARCH_POSTFIX(name) name##_sse4_1
+#define SIMD_NAMESPACE simd_test_sse4_1
+#include "./simd_impl.h"
+#endif
diff --git a/third_party/aom/test/simd_ssse3_test.cc b/third_party/aom/test/simd_ssse3_test.cc
new file mode 100644
index 000000000..9ebeeef1b
--- /dev/null
+++ b/third_party/aom/test/simd_ssse3_test.cc
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
+    (!defined(__GNUC__) && !defined(_DEBUG))
+#define ARCH SSSE3
+#define ARCH_POSTFIX(name) name##_ssse3
+#define SIMD_NAMESPACE simd_test_ssse3
+#include "./simd_impl.h"
+#endif
diff --git a/third_party/aom/test/simple_decoder.sh b/third_party/aom/test/simple_decoder.sh
new file mode 100755
index 000000000..ac3a07b18
--- /dev/null
+++ b/third_party/aom/test/simple_decoder.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom simple_decoder example code. To add new tests to
+## this file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to simple_decoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+simple_decoder_verify_environment() {
+  if [ ! "$(av1_encode_available)" = "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
+    return 1
+  fi
+}
+
+# Runs simple_decoder using $1 as input file. $2 is the codec name, and is used
+# solely to name the output file.
+simple_decoder() {
+  local decoder="${LIBAOM_BIN_PATH}/simple_decoder${AOM_TEST_EXE_SUFFIX}"
+  local input_file="$1"
+  local codec="$2"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"
+
+  if [ ! -x "${decoder}" ]; then
+    elog "${decoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
+      ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+simple_decoder_av1() {
+  if [ "$(av1_decode_available)" = "yes" ]; then
+    if [ ! -e "${AV1_IVF_FILE}" ]; then
+      local file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
+      encode_yuv_raw_input_av1 "${file}" --ivf
+      simple_decoder "${file}" av1 || return 1
+    else
+      simple_decoder "${AV1_IVF_FILE}" av1 || return 1
+    fi
+  fi
+}
+
+simple_decoder_tests="simple_decoder_av1"
+
+run_tests simple_decoder_verify_environment "${simple_decoder_tests}"
diff --git a/third_party/aom/test/simple_encoder.sh b/third_party/aom/test/simple_encoder.sh
new file mode 100755
index 000000000..5cd6b46a1
--- /dev/null
+++ b/third_party/aom/test/simple_encoder.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom simple_encoder example. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to simple_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+simple_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs simple_encoder using the codec specified by $1 with a frame limit of 100.
+simple_encoder() {
+  local encoder="${LIBAOM_BIN_PATH}/simple_encoder${AOM_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"
+
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 5 \
+      ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+
+simple_encoder_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    simple_encoder av1 || return 1
+  fi
+}
+
+simple_encoder_tests="simple_encoder_av1"
+
+run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
diff --git a/third_party/aom/test/subtract_test.cc b/third_party/aom/test/subtract_test.cc
new file mode 100644
index 000000000..c90ca8d56
--- /dev/null
+++ b/third_party/aom/test/subtract_test.cc
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#if CONFIG_AV1
+#include "av1/common/blockd.h"
+#endif
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/mem.h"
+
+#define USE_SPEED_TEST (0)
+
+typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr,
+                             ptrdiff_t diff_stride, const uint8_t *src_ptr,
+                             ptrdiff_t src_stride, const uint8_t *pred_ptr,
+                             ptrdiff_t pred_stride);
+
+namespace {
+
+class AV1SubtractBlockTest : public ::testing::TestWithParam<SubtractFunc> {
+ public:
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+};
+
+using libaom_test::ACMRandom;
+
+TEST_P(AV1SubtractBlockTest, SimpleSubtract) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  // FIXME(rbultje) split in its own file
+  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES;
+       bsize = static_cast<BLOCK_SIZE>(static_cast<int>(bsize) + 1)) {
+    const int block_width = block_size_wide[bsize];
+    const int block_height = block_size_high[bsize];
+    int16_t *diff = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(*diff) * block_width * block_height * 2));
+    uint8_t *pred = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, block_width * block_height * 2));
+    uint8_t *src = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, block_width * block_height * 2));
+
+    for (int n = 0; n < 100; n++) {
+      for (int r = 0; r < block_height; ++r) {
+        for (int c = 0; c < block_width * 2; ++c) {
+          src[r * block_width * 2 + c] = rnd.Rand8();
+          pred[r * block_width * 2 + c] = rnd.Rand8();
+        }
+      }
+
+      GetParam()(block_height, block_width, diff, block_width, src, block_width,
+                 pred, block_width);
+
+      for (int r = 0; r < block_height; ++r) {
+        for (int c = 0; c < block_width; ++c) {
+          EXPECT_EQ(diff[r * block_width + c],
+                    (src[r * block_width + c] - pred[r * block_width + c]))
+              << "r = " << r << ", c = " << c << ", bs = " << bsize;
+        }
+      }
+
+      GetParam()(block_height, block_width, diff, block_width * 2, src,
+                 block_width * 2, pred, block_width * 2);
+
+      for (int r = 0; r < block_height; ++r) {
+        for (int c = 0; c < block_width; ++c) {
+          EXPECT_EQ(
+              diff[r * block_width * 2 + c],
+              (src[r * block_width * 2 + c] - pred[r * block_width * 2 + c]))
+              << "r = " << r << ", c = " << c << ", bs = " << bsize;
+        }
+      }
+    }
+    aom_free(diff);
+    aom_free(pred);
+    aom_free(src);
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, AV1SubtractBlockTest,
+                        ::testing::Values(aom_subtract_block_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, AV1SubtractBlockTest,
+                        ::testing::Values(aom_subtract_block_sse2));
+#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, AV1SubtractBlockTest,
+                        ::testing::Values(aom_subtract_block_neon));
+#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, AV1SubtractBlockTest,
+                        ::testing::Values(aom_subtract_block_msa));
+#endif
+
+typedef void (*HBDSubtractFunc)(int rows, int cols, int16_t *diff_ptr,
+                                ptrdiff_t diff_stride, const uint8_t *src_ptr,
+                                ptrdiff_t src_stride, const uint8_t *pred_ptr,
+                                ptrdiff_t pred_stride, int bd);
+
+using ::std::tr1::get;
+using ::std::tr1::make_tuple;
+using ::std::tr1::tuple;
+
+// <width, height, bit_dpeth, subtract>
+typedef tuple<int, int, int, HBDSubtractFunc> Params;
+
+#if CONFIG_HIGHBITDEPTH
+class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> {
+ public:
+  virtual void SetUp() {
+    block_width_ = GET_PARAM(0);
+    block_height_ = GET_PARAM(1);
+    bit_depth_ = static_cast<aom_bit_depth_t>(GET_PARAM(2));
+    func_ = GET_PARAM(3);
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+
+    const size_t max_width = 128;
+    const size_t max_block_size = max_width * max_width;
+    src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+        aom_memalign(16, max_block_size * sizeof(uint16_t))));
+    pred_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+        aom_memalign(16, max_block_size * sizeof(uint16_t))));
+    diff_ = reinterpret_cast<int16_t *>(
+        aom_memalign(16, max_block_size * sizeof(int16_t)));
+  }
+
+  virtual void TearDown() {
+    aom_free(CONVERT_TO_SHORTPTR(src_));
+    aom_free(CONVERT_TO_SHORTPTR(pred_));
+    aom_free(diff_);
+  }
+
+ protected:
+  void RunForSpeed();
+  void CheckResult();
+
+ private:
+  ACMRandom rnd_;
+  int block_height_;
+  int block_width_;
+  aom_bit_depth_t bit_depth_;
+  HBDSubtractFunc func_;
+  uint8_t *src_;
+  uint8_t *pred_;
+  int16_t *diff_;
+};
+
+void AV1HBDSubtractBlockTest::RunForSpeed() {
+  const int test_num = 200000;
+  const int max_width = 128;
+  const int max_block_size = max_width * max_width;
+  const int mask = (1 << bit_depth_) - 1;
+  int i, j;
+
+  for (j = 0; j < max_block_size; ++j) {
+    CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask;
+    CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask;
+  }
+
+  for (i = 0; i < test_num; ++i) {
+    func_(block_height_, block_width_, diff_, block_width_, src_, block_width_,
+          pred_, block_width_, bit_depth_);
+  }
+}
+
+void AV1HBDSubtractBlockTest::CheckResult() {
+  const int test_num = 100;
+  const int max_width = 128;
+  const int max_block_size = max_width * max_width;
+  const int mask = (1 << bit_depth_) - 1;
+  int i, j;
+
+  for (i = 0; i < test_num; ++i) {
+    for (j = 0; j < max_block_size; ++j) {
+      CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask;
+      CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask;
+    }
+
+    func_(block_height_, block_width_, diff_, block_width_, src_, block_width_,
+          pred_, block_width_, bit_depth_);
+
+    for (int r = 0; r < block_height_; ++r) {
+      for (int c = 0; c < block_width_; ++c) {
+        EXPECT_EQ(diff_[r * block_width_ + c],
+                  (CONVERT_TO_SHORTPTR(src_)[r * block_width_ + c] -
+                   CONVERT_TO_SHORTPTR(pred_)[r * block_width_ + c]))
+            << "r = " << r << ", c = " << c << ", test: " << i;
+      }
+    }
+  }
+}
+
+TEST_P(AV1HBDSubtractBlockTest, CheckResult) { CheckResult(); }
+
+#if USE_SPEED_TEST
+TEST_P(AV1HBDSubtractBlockTest, CheckSpeed) { RunForSpeed(); }
+#endif  // USE_SPEED_TEST
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AV1HBDSubtractBlockTest,
+    ::testing::Values(make_tuple(4, 4, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(4, 4, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(4, 8, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(4, 8, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(8, 4, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(8, 4, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(8, 8, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(8, 8, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(8, 16, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(8, 16, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(16, 8, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(16, 8, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(16, 16, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(16, 16, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(16, 32, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(16, 32, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(32, 16, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(32, 16, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(32, 32, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(32, 32, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(32, 64, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(32, 64, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(64, 32, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(64, 32, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(64, 64, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(64, 64, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(64, 128, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(64, 128, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(128, 64, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(128, 64, 12, &aom_highbd_subtract_block_c),
+                      make_tuple(128, 128, 12, &aom_highbd_subtract_block_sse2),
+                      make_tuple(128, 128, 12, &aom_highbd_subtract_block_c)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/sum_squares_test.cc b/third_party/aom/test/sum_squares_test.cc
new file mode 100644
index 000000000..b8701c196
--- /dev/null
+++ b/third_party/aom/test/sum_squares_test.cc
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "test/function_equivalence_test.h"
+
+using libaom_test::ACMRandom;
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+const int kNumIterations = 10000;
+
+static const int16_t kInt13Max = (1 << 12) - 1;
+
+typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width,
+                              int height);
+typedef libaom_test::FuncParam<SSI16Func> TestFuncs;
+
+class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> {
+ public:
+  virtual ~SumSquaresTest() {}
+  virtual void SetUp() { params_ = this->GetParam(); }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  TestFuncs params_;
+};
+
+TEST_P(SumSquaresTest, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, int16_t, src[256 * 256]);
+
+  int failed = 0;
+
+  const int msb = 11;  // Up to 12 bit input
+  const int limit = 1 << (msb + 1);
+
+  for (int k = 0; k < kNumIterations; k++) {
+    int width = 4 * rnd(32);   // Up to 128x128
+    int height = 4 * rnd(32);  // Up to 128x128
+    int stride = 4 << rnd(7);  // Up to 256 stride
+    while (stride < width) {   // Make sure it's valid
+      stride = 4 << rnd(7);
+    }
+
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src[ii * stride + jj] = rnd(2) ? rnd(limit) : -rnd(limit);
+      }
+    }
+
+    const uint64_t res_ref = params_.ref_func(src, stride, width, height);
+    uint64_t res_tst;
+    ASM_REGISTER_STATE_CHECK(res_tst =
+                                 params_.tst_func(src, stride, width, height));
+
+    if (!failed) {
+      failed = res_ref != res_tst;
+      EXPECT_EQ(res_ref, res_tst)
+          << "Error: Sum Squares Test"
+          << " C output does not match optimized output.";
+    }
+  }
+}
+
+TEST_P(SumSquaresTest, ExtremeValues) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, int16_t, src[256 * 256]);
+
+  int failed = 0;
+
+  const int msb = 11;  // Up to 12 bit input
+  const int limit = 1 << (msb + 1);
+
+  for (int k = 0; k < kNumIterations; k++) {
+    int width = 4 * rnd(32);   // Up to 128x128
+    int height = 4 * rnd(32);  // Up to 128x128
+    int stride = 4 << rnd(7);  // Up to 256 stride
+    while (stride < width) {   // Make sure it's valid
+      stride = 4 << rnd(7);
+    }
+
+    int val = rnd(2) ? limit - 1 : -(limit - 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src[ii * stride + jj] = val;
+      }
+    }
+
+    const uint64_t res_ref = params_.ref_func(src, stride, width, height);
+    uint64_t res_tst;
+    ASM_REGISTER_STATE_CHECK(res_tst =
+                                 params_.tst_func(src, stride, width, height));
+
+    if (!failed) {
+      failed = res_ref != res_tst;
+      EXPECT_EQ(res_ref, res_tst)
+          << "Error: Sum Squares Test"
+          << " C output does not match optimized output.";
+    }
+  }
+}
+
+#if HAVE_SSE2
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, SumSquaresTest,
+    ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
+                                &aom_sum_squares_2d_i16_sse2)));
+
+#endif  // HAVE_SSE2
+
+//////////////////////////////////////////////////////////////////////////////
+// 1D version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef uint64_t (*F1D)(const int16_t *src, uint32_t N);
+typedef libaom_test::FuncParam<F1D> TestFuncs1D;
+
+class SumSquares1DTest : public FunctionEquivalenceTest<F1D> {
+ protected:
+  static const int kIterations = 1000;
+  static const int kMaxSize = 256;
+};
+
+TEST_P(SumSquares1DTest, RandomValues) {
+  DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < kMaxSize * kMaxSize; ++i)
+      src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max;
+
+    const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize
+                          : rng_(kMaxSize) + 1;
+
+    const uint64_t ref_res = params_.ref_func(src, N);
+    uint64_t tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+TEST_P(SumSquares1DTest, ExtremeValues) {
+  DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
+
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    if (rng_(2)) {
+      for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max;
+    } else {
+      for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max;
+    }
+
+    const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize
+                          : rng_(kMaxSize) + 1;
+
+    const uint64_t ref_res = params_.ref_func(src, N);
+    uint64_t tst_res;
+    ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N));
+
+    ASSERT_EQ(ref_res, tst_res);
+  }
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, SumSquares1DTest,
+                        ::testing::Values(TestFuncs1D(
+                            aom_sum_squares_i16_c, aom_sum_squares_i16_sse2)));
+
+#endif  // HAVE_SSE2
+}  // namespace
diff --git a/third_party/aom/test/superframe_test.cc b/third_party/aom/test/superframe_test.cc
new file mode 100644
index 000000000..0f54baeaf
--- /dev/null
+++ b/third_party/aom/test/superframe_test.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <climits>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+const int kTestMode = 0;
+const int kTileCols = 1;
+const int kTileRows = 2;
+
+typedef std::tr1::tuple<libaom_test::TestMode, int, int> SuperframeTestParam;
+
+class SuperframeTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWithParam<SuperframeTestParam> {
+ protected:
+  SuperframeTest()
+      : EncoderTest(GET_PARAM(0)), modified_buf_(NULL), last_sf_pts_(0) {}
+  virtual ~SuperframeTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    const SuperframeTestParam input = GET_PARAM(1);
+    const libaom_test::TestMode mode = std::tr1::get<kTestMode>(input);
+    SetMode(mode);
+    sf_count_ = 0;
+    sf_count_max_ = INT_MAX;
+    n_tile_cols_ = std::tr1::get<kTileCols>(input);
+    n_tile_rows_ = std::tr1::get<kTileRows>(input);
+  }
+
+  virtual void TearDown() { delete[] modified_buf_; }
+
+  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                                  libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_CPUUSED, 2);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
+      encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+    }
+  }
+
+  virtual const aom_codec_cx_pkt_t *MutateEncoderOutputHook(
+      const aom_codec_cx_pkt_t *pkt) {
+    if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return pkt;
+
+    const uint8_t *buffer = reinterpret_cast<uint8_t *>(pkt->data.frame.buf);
+    const uint8_t marker = buffer[pkt->data.frame.sz - 1];
+    const int frames = (marker & 0x7) + 1;
+    const int mag = ((marker >> 3) & 3) + 1;
+    const unsigned int index_sz = 2 + mag * (frames - 1);
+    if ((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz &&
+        buffer[pkt->data.frame.sz - index_sz] == marker) {
+      // frame is a superframe. strip off the index.
+      if (modified_buf_) delete[] modified_buf_;
+      modified_buf_ = new uint8_t[pkt->data.frame.sz - index_sz];
+      memcpy(modified_buf_, pkt->data.frame.buf, pkt->data.frame.sz - index_sz);
+      modified_pkt_ = *pkt;
+      modified_pkt_.data.frame.buf = modified_buf_;
+      modified_pkt_.data.frame.sz -= index_sz;
+
+      sf_count_++;
+      last_sf_pts_ = pkt->data.frame.pts;
+      return &modified_pkt_;
+    }
+
+    // Make sure we do a few frames after the last SF
+    abort_ |=
+        sf_count_ > sf_count_max_ && pkt->data.frame.pts - last_sf_pts_ >= 5;
+    return pkt;
+  }
+
+  int sf_count_;
+  int sf_count_max_;
+  aom_codec_cx_pkt_t modified_pkt_;
+  uint8_t *modified_buf_;
+  aom_codec_pts_t last_sf_pts_;
+
+ private:
+  int n_tile_cols_;
+  int n_tile_rows_;
+};
+
+TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {
+  sf_count_max_ = 0;  // early exit on successful test.
+  cfg_.g_lag_in_frames = 25;
+
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 40);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+#if CONFIG_EXT_REFS
+  // NOTE: The use of BWDREF_FRAME will enable the coding of more non-show
+  //       frames besides ALTREF_FRAME.
+  EXPECT_GE(sf_count_, 1);
+#else
+  EXPECT_EQ(sf_count_, 1);
+#endif  // CONFIG_EXT_REFS
+}
+
+// The superframe index is currently mandatory with both ANS and DAALA_EC due
+// to the decoder starting at the end of the buffer.
+#if CONFIG_EXT_TILE
+// Single tile does not work with ANS (see comment above).
+#if CONFIG_ANS || CONFIG_DAALA_EC
+const int tile_col_values[] = { 1, 2 };
+#else
+const int tile_col_values[] = { 1, 2, 32 };
+#endif
+const int tile_row_values[] = { 1, 2, 32 };
+AV1_INSTANTIATE_TEST_CASE(
+    SuperframeTest,
+    ::testing::Combine(::testing::Values(::libaom_test::kTwoPassGood),
+                       ::testing::ValuesIn(tile_col_values),
+                       ::testing::ValuesIn(tile_row_values)));
+#else
+#if !CONFIG_ANS && !CONFIG_DAALA_EC
+AV1_INSTANTIATE_TEST_CASE(
+    SuperframeTest,
+    ::testing::Combine(::testing::Values(::libaom_test::kTwoPassGood),
+                       ::testing::Values(0), ::testing::Values(0)));
+#endif  // !CONFIG_ANS
+#endif  // CONFIG_EXT_TILE
+}  // namespace
diff --git a/third_party/aom/test/test-data.mk b/third_party/aom/test/test-data.mk
new file mode 100644
index 000000000..168144a00
--- /dev/null
+++ b/third_party/aom/test/test-data.mk
@@ -0,0 +1,45 @@
+LIBAOM_TEST_SRCS-yes += test-data.mk
+
+# Encoder test source
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
+
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
+LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv
+
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += desktop_credits.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += rush_hour_444.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += screendata.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_640_480_30.yuv
+
+ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
+# Encode / Decode test
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.yuv
+endif  # CONFIG_DECODE_PERF_TESTS
+
+ifeq ($(CONFIG_ENCODE_PERF_TESTS),yes)
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += desktop_640_360_30.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += kirland_640_480_30.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += macmarcomoving_640_480_30.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += macmarcostationary_640_480_30.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += tacomanarrows_640_480_30.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += tacomasmallcameramovement_640_480_30.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += thaloundeskmtg_640_480_30.yuv
+endif  # CONFIG_ENCODE_PERF_TESTS
+
+# sort and remove duplicates
+LIBAOM_TEST_DATA-yes := $(sort $(LIBAOM_TEST_DATA-yes))
+
diff --git a/third_party/aom/test/test-data.sha1 b/third_party/aom/test/test-data.sha1
new file mode 100644
index 000000000..3d9bfc7c4
--- /dev/null
+++ b/third_party/aom/test/test-data.sha1
@@ -0,0 +1,28 @@
+d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv
+b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
+a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m
+0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m
+ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m
+c934da6fb8cc54ee2a8c17c54cf6076dac37ead0 *park_joy_90p_10_440.yuv
+614c32ae1eca391e867c70d19974f0d62664dd99 *park_joy_90p_12_420.y4m
+c92825f1ea25c5c37855083a69faac6ac4641a9e *park_joy_90p_12_422.y4m
+b592189b885b6cc85db55cc98512a197d73d3b34 *park_joy_90p_12_444.y4m
+82c1bfcca368c2f22bad7d693d690d5499ecdd11 *park_joy_90p_12_440.yuv
+b9e1e90aece2be6e2c90d89e6ab2372d5f8c792d *park_joy_90p_8_420_a10-1.y4m
+4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c *park_joy_90p_8_420.y4m
+7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 *park_joy_90p_8_422.y4m
+bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 *park_joy_90p_8_444.y4m
+81e1f3843748438b8f2e71db484eb22daf72e939 *park_joy_90p_8_440.yuv
+b1f1c3ec79114b9a0651af24ce634afb44a9a419 *rush_hour_444.y4m
+eb438c6540eb429f74404eedfa3228d409c57874 *desktop_640_360_30.yuv
+89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab *kirland_640_480_30.yuv
+33c533192759e5bb4f07abfbac389dc259db4686 *macmarcomoving_640_480_30.yuv
+8bfaab121080821b8f03b23467911e59ec59b8fe *macmarcostationary_640_480_30.yuv
+70894878d916a599842d9ad0dcd24e10c13e5467 *niklas_640_480_30.yuv
+8784b6df2d8cc946195a90ac00540500d2e522e4 *tacomanarrows_640_480_30.yuv
+edd86a1f5e62fd9da9a9d46078247759c2638009 *tacomasmallcameramovement_640_480_30.yuv
+9a70e8b7d14fba9234d0e51dce876635413ce444 *thaloundeskmtg_640_480_30.yuv
+e7d315dbf4f3928779e0dc624311196d44491d32 *niklas_1280_720_30.yuv
+717da707afcaa1f692ff1946f291054eb75a4f06 *screendata.y4m
+9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m
+5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m
diff --git a/third_party/aom/test/test.cmake b/third_party/aom/test/test.cmake
new file mode 100644
index 000000000..8d3ab7059
--- /dev/null
+++ b/third_party/aom/test/test.cmake
@@ -0,0 +1,315 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+include("${AOM_ROOT}/test/test_data_util.cmake")
+
+set(AOM_UNIT_TEST_WRAPPER_SOURCES
+    "${AOM_CONFIG_DIR}/usage_exit.c"
+    "${AOM_ROOT}/test/test_libaom.cc")
+
+set(AOM_UNIT_TEST_COMMON_SOURCES
+    "${AOM_ROOT}/test/acm_random.h"
+    "${AOM_ROOT}/test/clear_system_state.h"
+    "${AOM_ROOT}/test/codec_factory.h"
+    "${AOM_ROOT}/test/convolve_test.cc"
+    "${AOM_ROOT}/test/function_equivalence_test.h"
+    "${AOM_ROOT}/test/md5_helper.h"
+    "${AOM_ROOT}/test/register_state_check.h"
+    "${AOM_ROOT}/test/transform_test_base.h"
+    "${AOM_ROOT}/test/util.h"
+    "${AOM_ROOT}/test/video_source.h")
+
+if (CONFIG_ACCOUNTING)
+  set(AOM_UNIT_TEST_COMMON_SOURCES
+      ${AOM_UNIT_TEST_COMMON_SOURCES}
+      "${AOM_ROOT}/test/accounting_test.cc")
+endif ()
+
+if (CONFIG_ADAPT_SCAN)
+  set(AOM_UNIT_TEST_COMMON_SOURCES
+      ${AOM_UNIT_TEST_COMMON_SOURCES}
+      "${AOM_ROOT}/test/scan_test.cc")
+endif ()
+
+if (CONFIG_GLOBAL_MOTION OR CONFIG_WARPED_MOTION)
+  if (HAVE_SSE2)
+    set(AOM_UNIT_TEST_COMMON_SOURCES
+        ${AOM_UNIT_TEST_COMMON_SOURCES}
+        "${AOM_ROOT}/test/warp_filter_test.cc"
+        "${AOM_ROOT}/test/warp_filter_test_util.cc"
+        "${AOM_ROOT}/test/warp_filter_test_util.h")
+  endif ()
+endif ()
+
+set(AOM_UNIT_TEST_DECODER_SOURCES
+    "${AOM_ROOT}/test/decode_api_test.cc"
+    "${AOM_ROOT}/test/decode_test_driver.cc"
+    "${AOM_ROOT}/test/decode_test_driver.h"
+    "${AOM_ROOT}/test/ivf_video_source.h")
+
+set(AOM_UNIT_TEST_ENCODER_SOURCES
+    "${AOM_ROOT}/test/altref_test.cc"
+    "${AOM_ROOT}/test/aq_segment_test.cc"
+    "${AOM_ROOT}/test/datarate_test.cc"
+    "${AOM_ROOT}/test/dct16x16_test.cc"
+    "${AOM_ROOT}/test/dct32x32_test.cc"
+    "${AOM_ROOT}/test/encode_api_test.cc"
+    "${AOM_ROOT}/test/encode_test_driver.cc"
+    "${AOM_ROOT}/test/encode_test_driver.h"
+    "${AOM_ROOT}/test/error_resilience_test.cc"
+    "${AOM_ROOT}/test/i420_video_source.h"
+    "${AOM_ROOT}/test/sad_test.cc"
+    "${AOM_ROOT}/test/y4m_test.cc"
+    "${AOM_ROOT}/test/y4m_video_source.h"
+    "${AOM_ROOT}/test/yuv_video_source.h")
+
+set(AOM_DECODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/decode_perf_test.cc")
+set(AOM_ENCODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/encode_perf_test.cc")
+set(AOM_UNIT_TEST_WEBM_SOURCES "${AOM_ROOT}/test/webm_video_source.h")
+
+set(AOM_TEST_INTRA_PRED_SPEED_SOURCES
+    "${AOM_CONFIG_DIR}/usage_exit.c"
+    "${AOM_ROOT}/test/test_intra_pred_speed.cc")
+
+if (CONFIG_AV1)
+  set(AOM_UNIT_TEST_COMMON_SOURCES
+      ${AOM_UNIT_TEST_COMMON_SOURCES}
+      "${AOM_ROOT}/test/av1_convolve_optimz_test.cc"
+      "${AOM_ROOT}/test/av1_convolve_test.cc"
+      "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc"
+      "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc"
+      "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc"
+      "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc"
+      "${AOM_ROOT}/test/av1_txfm_test.cc"
+      "${AOM_ROOT}/test/av1_txfm_test.h"
+      "${AOM_ROOT}/test/intrapred_test.cc"
+      "${AOM_ROOT}/test/lpf_8_test.cc"
+      "${AOM_ROOT}/test/simd_cmp_impl.h")
+
+  if (CONFIG_CDEF)
+    set(AOM_UNIT_TEST_COMMON_SOURCES
+        ${AOM_UNIT_TEST_COMMON_SOURCES}
+        "${AOM_ROOT}/test/clpf_test.cc")
+  endif ()
+
+  if (CONFIG_FILTER_INTRA)
+    if (HAVE_SSE4_1)
+      set(AOM_UNIT_TEST_COMMON_SOURCES
+          ${AOM_UNIT_TEST_COMMON_SOURCES}
+          # TODO: not sure if this intrinsics or a wrapper calling intrin/asm.
+          #"${AOM_ROOT}/test/filterintra_predictors_test.cc")
+          )
+    endif ()
+  endif ()
+
+  set(AOM_UNIT_TEST_COMMON_INTRIN_NEON
+    ${AOM_UNIT_TEST_COMMON_INTRIN_NEON}
+      "${AOM_ROOT}/test/simd_cmp_neon.cc"
+      "${AOM_ROOT}/test/simd_neon_test.cc")
+  set(AOM_UNIT_TEST_COMMON_INTRIN_SSE2
+      ${AOM_UNIT_TEST_COMMON_INTRIN_SSE2}
+      "${AOM_ROOT}/test/simd_cmp_sse2.cc")
+  set(AOM_UNIT_TEST_COMMON_INTRIN_SSSE3
+      ${AOM_UNIT_TEST_COMMON_INTRIN_SSSE3}
+      "${AOM_ROOT}/test/simd_cmp_ssse3.cc")
+  set(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1
+      ${AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1}
+      "${AOM_ROOT}/test/simd_cmp_sse4.cc")
+endif ()
+
+if (CONFIG_AV1_ENCODER)
+  set(AOM_UNIT_TEST_ENCODER_SOURCES
+      ${AOM_UNIT_TEST_ENCODER_SOURCES}
+      "${AOM_ROOT}/test/active_map_test.cc"
+      "${AOM_ROOT}/test/arf_freq_test.cc"
+      "${AOM_ROOT}/test/av1_dct_test.cc"
+      "${AOM_ROOT}/test/av1_fht16x16_test.cc"
+      "${AOM_ROOT}/test/av1_fht8x8_test.cc"
+      "${AOM_ROOT}/test/av1_inv_txfm_test.cc"
+      "${AOM_ROOT}/test/avg_test.cc"
+      "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc"
+      "${AOM_ROOT}/test/blend_a64_mask_test.cc"
+      "${AOM_ROOT}/test/borders_test.cc"
+      "${AOM_ROOT}/test/cpu_speed_test.cc"
+      "${AOM_ROOT}/test/end_to_end_test.cc"
+      "${AOM_ROOT}/test/error_block_test.cc"
+      "${AOM_ROOT}/test/fdct4x4_test.cc"
+      "${AOM_ROOT}/test/fdct8x8_test.cc"
+      "${AOM_ROOT}/test/frame_size_tests.cc"
+      "${AOM_ROOT}/test/hadamard_test.cc"
+      "${AOM_ROOT}/test/lossless_test.cc"
+      "${AOM_ROOT}/test/minmax_test.cc"
+      "${AOM_ROOT}/test/subtract_test.cc"
+      "${AOM_ROOT}/test/sum_squares_test.cc"
+      "${AOM_ROOT}/test/variance_test.cc")
+
+  if (CONFIG_EXT_INTER)
+    set(AOM_UNIT_TEST_ENCODER_SOURCES
+        ${AOM_UNIT_TEST_ENCODER_SOURCES}
+        "${AOM_ROOT}/test/av1_wedge_utils_test.cc"
+        "${AOM_ROOT}/test/masked_sad_test.cc"
+        "${AOM_ROOT}/test/masked_variance_test.cc")
+  endif ()
+
+  if (CONFIG_EXT_TX)
+    set(AOM_UNIT_TEST_ENCODER_SOURCES
+        ${AOM_UNIT_TEST_ENCODER_SOURCES}
+        "${AOM_ROOT}/test/av1_fht16x32_test.cc"
+        "${AOM_ROOT}/test/av1_fht16x8_test.cc"
+        "${AOM_ROOT}/test/av1_fht32x16_test.cc"
+        "${AOM_ROOT}/test/av1_fht4x4_test.cc"
+        "${AOM_ROOT}/test/av1_fht4x8_test.cc"
+        "${AOM_ROOT}/test/av1_fht8x16_test.cc"
+        "${AOM_ROOT}/test/av1_fht8x4_test.cc"
+        "${AOM_ROOT}/test/fht32x32_test.cc")
+  endif ()
+
+  if (CONFIG_MOTION_VAR)
+    set(AOM_UNIT_TEST_ENCODER_SOURCES
+        ${AOM_UNIT_TEST_ENCODER_SOURCES}
+        "${AOM_ROOT}/test/obmc_sad_test.cc"
+        "${AOM_ROOT}/test/obmc_variance_test.cc")
+  endif ()
+endif ()
+
+if (CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
+  set(AOM_UNIT_TEST_COMMON_SOURCES
+      ${AOM_UNIT_TEST_COMMON_SOURCES}
+      "${AOM_ROOT}/test/divu_small_test.cc"
+      "${AOM_ROOT}/test/ethread_test.cc"
+      "${AOM_ROOT}/test/idct8x8_test.cc"
+      "${AOM_ROOT}/test/partial_idct_test.cc"
+      "${AOM_ROOT}/test/superframe_test.cc"
+      "${AOM_ROOT}/test/binary_codes_test.cc"
+      "${AOM_ROOT}/test/tile_independence_test.cc")
+
+  if (CONFIG_ANS)
+    set(AOM_UNIT_TEST_COMMON_SOURCES
+        ${AOM_UNIT_TEST_COMMON_SOURCES}
+        "${AOM_ROOT}/test/ans_codec_test.cc"
+        "${AOM_ROOT}/test/ans_test.cc")
+  else ()
+    set(AOM_UNIT_TEST_COMMON_SOURCES
+        ${AOM_UNIT_TEST_COMMON_SOURCES}
+        "${AOM_ROOT}/test/boolcoder_test.cc")
+  endif ()
+
+  if (CONFIG_EXT_TILE)
+    set(AOM_UNIT_TEST_COMMON_SOURCES
+        ${AOM_UNIT_TEST_COMMON_SOURCES}
+        "${AOM_ROOT}/test/av1_ext_tile_test.cc")
+  endif ()
+endif ()
+
+if (CONFIG_HIGHBITDEPTH)
+  if (CONFIG_AV1)
+    set(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1
+        ${AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1}
+        "${AOM_ROOT}/test/av1_highbd_iht_test.cc"
+        "${AOM_ROOT}/test/av1_quantize_test.cc")
+  endif ()
+
+  if (CONFIG_INTERNAL_STATS)
+    set(AOM_UNIT_TEST_COMMON_SOURCES
+        ${AOM_UNIT_TEST_COMMON_SOURCES}
+        "${AOM_ROOT}/test/hbd_metrics_test.cc")
+  endif ()
+endif ()
+
+if (CONFIG_UNIT_TESTS)
+  if (MSVC)
+    # Force static run time to avoid collisions with googletest.
+    include("${AOM_ROOT}/build/cmake/msvc_runtime.cmake")
+  endif ()
+  include_directories(
+    "${AOM_ROOT}/third_party/googletest/src/googletest/src"
+    "${AOM_ROOT}/third_party/googletest/src/googletest/include")
+  add_subdirectory("${AOM_ROOT}/third_party/googletest/src/googletest"
+                   EXCLUDE_FROM_ALL)
+
+  # Generate a stub file containing the C function usage_exit(); this is
+  # required because of the test dependency on aom_common_app_util.
+  # Specifically, the function die() in tools_common.c calls usage_exit() to
+  # terminate the program on the caller's behalf.
+  file(WRITE "${AOM_CONFIG_DIR}/usage_exit.c" "void usage_exit(void) {}")
+endif ()
+
+# Setup the targets for CONFIG_UNIT_TESTS. The libaom and app util targets must
+# exist before this function is called.
+function (setup_aom_test_targets)
+  add_library(test_aom_common OBJECT ${AOM_UNIT_TEST_COMMON_SOURCES})
+  add_library(test_aom_decoder OBJECT ${AOM_UNIT_TEST_DECODER_SOURCES})
+  add_library(test_aom_encoder OBJECT ${AOM_UNIT_TEST_ENCODER_SOURCES})
+
+  set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} test_aom_common test_aom_decoder
+      test_aom_encoder PARENT_SCOPE)
+
+  add_executable(test_libaom ${AOM_UNIT_TEST_WRAPPER_SOURCES}
+                 $<TARGET_OBJECTS:aom_common_app_util>
+                 $<TARGET_OBJECTS:test_aom_common>)
+
+  if (CONFIG_DECODERS)
+    target_sources(test_libaom PUBLIC
+                   $<TARGET_OBJECTS:aom_decoder_app_util>
+                   $<TARGET_OBJECTS:test_aom_decoder>)
+
+    if (CONFIG_DECODE_PERF_TESTS AND CONFIG_WEBM_IO)
+      target_sources(test_libaom PUBLIC ${AOM_DECODE_PERF_TEST_SOURCES})
+    endif ()
+  endif ()
+
+  if (CONFIG_ENCODERS)
+    target_sources(test_libaom PUBLIC
+                   $<TARGET_OBJECTS:test_aom_encoder>
+                   $<TARGET_OBJECTS:aom_encoder_app_util>)
+
+    if (CONFIG_ENCODE_PERF_TESTS)
+      target_sources(test_libaom PUBLIC ${AOM_ENCODE_PERF_TEST_SOURCES})
+    endif ()
+  endif ()
+
+  target_link_libraries(test_libaom PUBLIC aom gtest)
+
+  add_executable(test_intra_pred_speed
+                 ${AOM_TEST_INTRA_PRED_SPEED_SOURCES}
+                 $<TARGET_OBJECTS:aom_common_app_util>)
+  target_link_libraries(test_intra_pred_speed PUBLIC aom gtest)
+
+  if (CONFIG_LIBYUV)
+    target_sources(test_libaom PUBLIC $<TARGET_OBJECTS:yuv>)
+  endif ()
+  if (CONFIG_WEBM_IO)
+    target_sources(test_libaom PUBLIC ${AOM_UNIT_TEST_WEBM_SOURCES}
+                   $<TARGET_OBJECTS:webm>)
+  endif ()
+  if (HAVE_SSE2)
+    add_intrinsics_source_to_target("-msse2" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_SSE2")
+  endif ()
+  if (HAVE_SSSE3)
+    add_intrinsics_source_to_target("-mssse3" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_SSSE3")
+  endif ()
+  if (HAVE_SSE4_1)
+    add_intrinsics_source_to_target("-msse4.1" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1")
+  endif ()
+  if (HAVE_NEON)
+    add_intrinsics_source_to_target("${AOM_NEON_INTRIN_FLAG}" "test_libaom"
+                                    "AOM_UNIT_TEST_COMMON_INTRIN_NEON")
+  endif ()
+
+  add_custom_target(testdata
+                    COMMAND ${CMAKE_COMMAND}
+                      -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}"
+                      -DAOM_ROOT="${AOM_ROOT}"
+                      -P "${AOM_ROOT}/test/test_worker.cmake"
+                    SOURCES ${AOM_TEST_DATA_LIST})
+endfunction ()
diff --git a/third_party/aom/test/test.mk b/third_party/aom/test/test.mk
new file mode 100644
index 000000000..fb0ab371e
--- /dev/null
+++ b/third_party/aom/test/test.mk
@@ -0,0 +1,241 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+
+LIBAOM_TEST_SRCS-yes += acm_random.h
+LIBAOM_TEST_SRCS-yes += clear_system_state.h
+LIBAOM_TEST_SRCS-yes += codec_factory.h
+LIBAOM_TEST_SRCS-yes += md5_helper.h
+LIBAOM_TEST_SRCS-yes += register_state_check.h
+LIBAOM_TEST_SRCS-yes += test.mk
+LIBAOM_TEST_SRCS-yes += test_libaom.cc
+LIBAOM_TEST_SRCS-yes += util.h
+LIBAOM_TEST_SRCS-yes += video_source.h
+LIBAOM_TEST_SRCS-yes += transform_test_base.h
+LIBAOM_TEST_SRCS-yes += function_equivalence_test.h
+LIBAOM_TEST_SRCS-yes += warp_filter_test_util.h
+
+##
+## BLACK BOX TESTS
+##
+## Black box tests only use the public API.
+##
+LIBAOM_TEST_SRCS-yes                   += ../md5_utils.h ../md5_utils.c
+LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += ivf_video_source.h
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += ../y4minput.h ../y4minput.c
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += altref_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_api_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
+#LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += realtime_test.cc
+#LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += resize_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_video_source.h
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += yuv_video_source.h
+
+#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += level_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += active_map_refresh_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += active_map_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += borders_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += cpu_speed_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += frame_size_tests.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += lossless_test.cc
+
+LIBAOM_TEST_SRCS-yes                   += decode_test_driver.cc
+LIBAOM_TEST_SRCS-yes                   += decode_test_driver.h
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_test_driver.cc
+LIBAOM_TEST_SRCS-yes                   += encode_test_driver.h
+
+## IVF writing.
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += ../ivfenc.c ../ivfenc.h
+
+## Y4m parsing.
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_test.cc ../y4menc.c ../y4menc.h
+
+## WebM Parsing
+ifeq ($(CONFIG_WEBM_IO), yes)
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.cc
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.cc
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.h
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.h
+LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += $(LIBWEBM_PARSER_SRCS)
+LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += ../tools_common.h
+LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.cc
+LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.h
+LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += webm_video_source.h
+endif
+
+LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += decode_api_test.cc
+
+# Currently we only support decoder perf tests for av1. Also they read from WebM
+# files, so WebM IO is required.
+ifeq ($(CONFIG_DECODE_PERF_TESTS)$(CONFIG_AV1_DECODER)$(CONFIG_WEBM_IO), \
+      yesyesyes)
+LIBAOM_TEST_SRCS-yes                   += decode_perf_test.cc
+endif
+
+# encode perf tests are av1 only
+ifeq ($(CONFIG_ENCODE_PERF_TESTS)$(CONFIG_AV1_ENCODER), yesyes)
+LIBAOM_TEST_SRCS-yes += encode_perf_test.cc
+endif
+
+## Multi-codec / unconditional black box tests.
+ifeq ($(findstring yes,$(CONFIG_AV1_ENCODER)),yes)
+LIBAOM_TEST_SRCS-yes += active_map_refresh_test.cc
+LIBAOM_TEST_SRCS-yes += active_map_test.cc
+LIBAOM_TEST_SRCS-yes += end_to_end_test.cc
+endif
+
+##
+## WHITE BOX TESTS
+##
+## Whitebox tests invoke functions not exposed via the public API. Certain
+## shared library builds don't make these functions accessible.
+##
+ifeq ($(CONFIG_SHARED),)
+
+## AV1
+ifeq ($(CONFIG_AV1),yes)
+
+# These tests require both the encoder and decoder to be built.
+ifeq ($(CONFIG_AV1_ENCODER)$(CONFIG_AV1_DECODER),yesyes)
+# IDCT test currently depends on FDCT function
+LIBAOM_TEST_SRCS-yes                   += idct8x8_test.cc
+LIBAOM_TEST_SRCS-yes                   += partial_idct_test.cc
+LIBAOM_TEST_SRCS-yes                   += superframe_test.cc
+LIBAOM_TEST_SRCS-yes                   += tile_independence_test.cc
+LIBAOM_TEST_SRCS-yes                   += ethread_test.cc
+LIBAOM_TEST_SRCS-yes                   += motion_vector_test.cc
+ifneq ($(CONFIG_ANS),yes)
+LIBAOM_TEST_SRCS-yes                   += binary_codes_test.cc
+endif
+ifeq ($(CONFIG_EXT_TILE),yes)
+LIBAOM_TEST_SRCS-yes                   += av1_ext_tile_test.cc
+endif
+ifeq ($(CONFIG_ANS),yes)
+LIBAOM_TEST_SRCS-yes                   += ans_test.cc
+LIBAOM_TEST_SRCS-yes                   += ans_codec_test.cc
+else
+LIBAOM_TEST_SRCS-yes                   += boolcoder_test.cc
+ifeq ($(CONFIG_ACCOUNTING),yes)
+LIBAOM_TEST_SRCS-yes                   += accounting_test.cc
+endif
+endif
+LIBAOM_TEST_SRCS-yes                   += divu_small_test.cc
+#LIBAOM_TEST_SRCS-yes                   += encoder_parms_get_to_decoder.cc
+endif
+
+LIBAOM_TEST_SRCS-$(CONFIG_ADAPT_SCAN)  += scan_test.cc
+LIBAOM_TEST_SRCS-yes                   += convolve_test.cc
+LIBAOM_TEST_SRCS-yes                   += lpf_8_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_CDEF)        += dering_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_CDEF)        += clpf_test.cc
+LIBAOM_TEST_SRCS-yes                   += simd_cmp_impl.h
+LIBAOM_TEST_SRCS-$(HAVE_SSE2)          += simd_cmp_sse2.cc
+LIBAOM_TEST_SRCS-$(HAVE_SSSE3)         += simd_cmp_ssse3.cc
+LIBAOM_TEST_SRCS-$(HAVE_SSE4_1)        += simd_cmp_sse4.cc
+LIBAOM_TEST_SRCS-$(HAVE_NEON)          += simd_cmp_neon.cc
+LIBAOM_TEST_SRCS-yes                   += simd_impl.h
+LIBAOM_TEST_SRCS-$(HAVE_SSE2)          += simd_sse2_test.cc
+LIBAOM_TEST_SRCS-$(HAVE_SSSE3)         += simd_ssse3_test.cc
+LIBAOM_TEST_SRCS-$(HAVE_SSE4_1)        += simd_sse4_test.cc
+LIBAOM_TEST_SRCS-$(HAVE_NEON)          += simd_neon_test.cc
+LIBAOM_TEST_SRCS-yes                   += intrapred_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_INTRABC)     += intrabc_test.cc
+#LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += av1_thread_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += dct16x16_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += dct32x32_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fdct4x4_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fdct8x8_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += hadamard_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += minmax_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += variance_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += error_block_test.cc
+#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_quantize_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += arf_freq_test.cc
+
+
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_dct_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht4x4_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x8_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x16_test.cc
+ifeq ($(CONFIG_EXT_TX),yes)
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht4x8_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x4_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x16_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x8_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x32_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht32x16_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fht32x32_test.cc
+endif
+
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += sum_squares_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_1d_test.cc
+
+ifeq ($(CONFIG_EXT_INTER),yes)
+LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc
+LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += masked_sad_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_wedge_utils_test.cc
+endif
+
+## Skip the unit test written for 4-tap filter intra predictor, because we
+## revert to 3-tap filter.
+## ifeq ($(CONFIG_FILTER_INTRA),yes)
+## LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += filterintra_predictors_test.cc
+## endif
+
+ifeq ($(CONFIG_MOTION_VAR),yes)
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += obmc_sad_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += obmc_variance_test.cc
+endif
+
+ifeq ($(CONFIG_HIGHBITDEPTH),yes)
+ifeq ($(CONFIG_AV1_ENCODER),yes)
+LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_quantize_test.cc
+LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_highbd_iht_test.cc
+endif
+endif # CONFIG_HIGHBITDEPTH
+endif # AV1
+
+## Multi-codec / unconditional whitebox tests.
+
+ifeq ($(CONFIG_AV1_ENCODER),yes)
+LIBAOM_TEST_SRCS-yes += avg_test.cc
+endif
+ifeq ($(CONFIG_INTERNAL_STATS),yes)
+LIBAOM_TEST_SRCS-$(CONFIG_HIGHBITDEPTH) += hbd_metrics_test.cc
+endif
+LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_txfm_test.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_txfm_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fwd_txfm1d_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm1d_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fwd_txfm2d_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm2d_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_optimz_test.cc
+ifneq ($(findstring yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)),)
+LIBAOM_TEST_SRCS-$(HAVE_SSE2) += warp_filter_test.cc warp_filter_test_util.cc
+endif
+ifeq ($(CONFIG_LOOP_RESTORATION),yes)
+LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += selfguided_filter_test.cc
+endif
+
+TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
+TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
+
+endif # CONFIG_SHARED
+
+include $(SRC_PATH_BARE)/test/test-data.mk
diff --git a/third_party/aom/test/test_data_util.cmake b/third_party/aom/test/test_data_util.cmake
new file mode 100644
index 000000000..f096e4e12
--- /dev/null
+++ b/third_party/aom/test/test_data_util.cmake
@@ -0,0 +1,76 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+
+# Parses test/test-data.sha1 and writes captured file names and checksums to
+# $out_files and $out_checksums as lists.
+function (make_test_data_lists out_files out_checksums)
+  if (NOT AOM_TEST_DATA_LIST OR NOT EXISTS "${AOM_TEST_DATA_LIST}")
+    message(FATAL_ERROR "AOM_TEST_DATA_LIST (${AOM_TEST_DATA_LIST}) missing or "
+            "variable empty.")
+  endif ()
+
+  # Read test-data.sha1 into $files_and_checksums. $files_and_checksums becomes
+  # a list with an entry for each line from $AOM_TEST_DATA_LIST.
+  file(STRINGS "${AOM_TEST_DATA_LIST}" files_and_checksums)
+
+  # Iterate over the list of lines and split it into $checksums and $filenames.
+  foreach (line ${files_and_checksums})
+    string(FIND "${line}" " *" delim_pos)
+
+    math(EXPR filename_pos "${delim_pos} + 2")
+    string(SUBSTRING "${line}" 0 ${delim_pos} checksum)
+    string(SUBSTRING "${line}" ${filename_pos} -1 filename)
+
+    set(checksums ${checksums} ${checksum})
+    set(filenames ${filenames} ${filename})
+  endforeach ()
+
+  if (NOT checksums OR NOT filenames)
+    message(FATAL_ERROR "Parsing of ${AOM_TEST_DATA_LIST} failed.")
+  endif ()
+
+  set(${out_checksums} ${checksums} PARENT_SCOPE)
+  set(${out_files} ${filenames} PARENT_SCOPE)
+endfunction ()
+
+# Appends each file name in $test_files to $test_dir and adds the result path to
+# $out_path_list.
+function (expand_test_file_paths test_files test_dir out_path_list)
+  foreach (filename ${${test_files}})
+    set(path_list ${path_list} "${test_dir}/${filename}")
+  endforeach ()
+  set(${out_path_list} ${path_list} PARENT_SCOPE)
+endfunction ()
+
+function (check_file local_path expected_checksum out_needs_update)
+  if (EXISTS "${local_path}")
+    file(SHA1 "${local_path}" file_checksum)
+  else ()
+    set(${out_needs_update} 1 PARENT_SCOPE)
+    return ()
+  endif ()
+
+  if ("${file_checksum}" STREQUAL "${expected_checksum}")
+    unset(${out_needs_update} PARENT_SCOPE)
+  else ()
+    set(${out_needs_update} 1 PARENT_SCOPE)
+  endif ()
+endfunction ()
+
+# Downloads data from $file_url, confirms that $file_checksum matches, and
+# writes it to $local_path.
+function (download_test_file file_url file_checksum local_path)
+  message("Downloading ${file_url} ...")
+  file(DOWNLOAD "${file_url}" "${local_path}"
+       SHOW_PROGRESS
+       EXPECTED_HASH SHA1=${file_checksum})
+  message("Download of ${file_url} complete.")
+endfunction ()
diff --git a/third_party/aom/test/test_intra_pred_speed.cc b/third_party/aom/test/test_intra_pred_speed.cc
new file mode 100644
index 000000000..c4253628e
--- /dev/null
+++ b/third_party/aom/test/test_intra_pred_speed.cc
@@ -0,0 +1,515 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+//  Test and time AOM intra-predictor functions
+
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/md5_helper.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/aom_timer.h"
+
+// -----------------------------------------------------------------------------
+
+namespace {
+
+typedef void (*AvxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
+                            const uint8_t *above, const uint8_t *left);
+
+#if CONFIG_ALT_INTRA
+const int kNumAv1IntraFuncs = 14;
+#else
+const int kNumAv1IntraFuncs = 13;
+#endif  // CONFIG_ALT_INTRA
+const char *kAv1IntraPredNames[kNumAv1IntraFuncs] = {
+  "DC_PRED",    "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED",
+  "H_PRED",     "D45_PRED",     "D135_PRED",   "D117_PRED",   "D153_PRED",
+  "D207_PRED",  "D63_PRED",     "TM_PRED",
+#if CONFIG_ALT_INTRA
+  "SMOOTH_PRED"
+#endif  // CONFIG_ALT_INTRA
+};
+
+void TestIntraPred(const char name[], AvxPredFunc const *pred_funcs,
+                   const char *const pred_func_names[], int num_funcs,
+                   const char *const signatures[], int /*block_size*/,
+                   int num_pixels_per_test) {
+  libaom_test::ACMRandom rnd(libaom_test::ACMRandom::DeterministicSeed());
+  const int kBPS = 32;
+  const int kTotalPixels = 32 * kBPS;
+  DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]);
+  DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]);
+  DECLARE_ALIGNED(16, uint8_t, left[2 * kBPS]);
+  DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]);
+  uint8_t *const above = above_mem + 16;
+  for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8();
+  for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8();
+  for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8();
+  const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
+
+  // Fill up bottom-left and top-right pixels.
+  for (int i = kBPS; i < 2 * kBPS; ++i) {
+    left[i] = rnd.Rand8();
+    above[i] = rnd.Rand8();
+  }
+
+  for (int k = 0; k < num_funcs; ++k) {
+    if (pred_funcs[k] == NULL) continue;
+    memcpy(src, ref_src, sizeof(src));
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
+      pred_funcs[k](src, kBPS, above, left);
+    }
+    libaom_test::ClearSystemState();
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time =
+        static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
+    libaom_test::MD5 md5;
+    md5.Add(src, sizeof(src));
+    printf("Mode %s[%12s]: %5d ms     MD5: %s\n", name, pred_func_names[k],
+           elapsed_time, md5.Get());
+    EXPECT_STREQ(signatures[k], md5.Get());
+  }
+}
+
+void TestIntraPred4(AvxPredFunc const *pred_funcs) {
+  static const char *const kSignatures[kNumAv1IntraFuncs] = {
+    "4334156168b34ab599d9b5b30f522fe9",
+    "bc4649d5ba47c7ff178d92e475960fb0",
+    "8d316e5933326dcac24e1064794b5d12",
+    "a27270fed024eafd762c95de85f4da51",
+    "c33dff000d4256c2b8f3bf9e9bab14d2",
+    "44d8cddc2ad8f79b8ed3306051722b4f",
+    "df62e96dfcb25d8a435482756a6fa990",
+    "ecb0d56ae5f677ea45127ce9d5c058e4",
+    "0b7936841f6813da818275944895b574",
+    "9117972ef64f91a58ff73e1731c81db2",
+    "46d493dccf6e5356c6f3c0c73b7dd141",
+    "b852f42e6c4991d415400332d567872f",
+#if CONFIG_ALT_INTRA
+    "828c49a4248993cce4876fa26eab697f",
+    "718c8cee9011f92ef31f77a9a7560010"
+#else
+    "309a618577b27c648f9c5ee45252bc8f",
+#endif  // CONFIG_ALT_INTRA
+  };
+  TestIntraPred("Intra4", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs,
+                kSignatures, 4, 4 * 4 * kNumAv1IntraFuncs);
+}
+
+void TestIntraPred8(AvxPredFunc const *pred_funcs) {
+  static const char *const kSignatures[kNumAv1IntraFuncs] = {
+    "7694ddeeefed887faf9d339d18850928",
+    "7d726b1213591b99f736be6dec65065b",
+    "19c5711281357a485591aaf9c96c0a67",
+    "ba6b66877a089e71cd938e3b8c40caac",
+    "802440c93317e0f8ba93fab02ef74265",
+    "9e09a47a15deb0b9d8372824f9805080",
+    "a2fd4b66e1a667a3e582588934a7e4bd",
+    "78339c1c60bb1d67d248ab8c4da08b7f",
+    "5c97d70f7d47de1882a6cd86c165c8a9",
+    "8182bf60688b42205acd95e59e967157",
+    "9d69fcaf12398e67242d3fcf5cf2267e",
+    "7a09adb0fa6c2bf889a99dd816622feb",
+#if CONFIG_ALT_INTRA
+    "f6ade499c626d38eb70661184b79bc57",
+    "1ad5b106c79b792e514ba25e87139b5e"
+#else
+    "815b75c8e0d91cc1ae766dc5d3e445a3",
+#endif  // CONFIG_ALT_INTRA
+  };
+  TestIntraPred("Intra8", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs,
+                kSignatures, 8, 8 * 8 * kNumAv1IntraFuncs);
+}
+
+void TestIntraPred16(AvxPredFunc const *pred_funcs) {
+  static const char *const kSignatures[kNumAv1IntraFuncs] = {
+    "b40dbb555d5d16a043dc361e6694fe53",
+    "fb08118cee3b6405d64c1fd68be878c6",
+    "6c190f341475c837cc38c2e566b64875",
+    "db5c34ccbe2c7f595d9b08b0dc2c698c",
+    "a62cbfd153a1f0b9fed13e62b8408a7a",
+    "143df5b4c89335e281103f610f5052e4",
+    "404944b521d16f6edd160feeeb31ff35",
+    "7841fae7d4d47b519322e6a03eeed9dc",
+    "f6ebed3f71cbcf8d6d0516ce87e11093",
+    "3cc480297dbfeed01a1c2d78dd03d0c5",
+    "fbd607f15da218c5390a5b183b634a10",
+    "f7063ccbc29f87303d5c3d0555b08944",
+#if CONFIG_ALT_INTRA
+    "7adcaaa3554eb71a81fc48cb9043984b",
+    "c0acea4397c1b4d54a21bbcec5731dff"
+#else
+    "b8a41aa968ec108af447af4217cba91b",
+#endif  // CONFIG_ALT_INTRA
+  };
+  TestIntraPred("Intra16", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs,
+                kSignatures, 16, 16 * 16 * kNumAv1IntraFuncs);
+}
+
+void TestIntraPred32(AvxPredFunc const *pred_funcs) {
+  static const char *const kSignatures[kNumAv1IntraFuncs] = {
+    "558541656d84f9ae7896db655826febe",
+    "b3587a1f9a01495fa38c8cd3c8e2a1bf",
+    "4c6501e64f25aacc55a2a16c7e8f0255",
+    "b3b01379ba08916ef6b1b35f7d9ad51c",
+    "0f1eb38b6cbddb3d496199ef9f329071",
+    "911c06efb9ed1c3b4c104b232b55812f",
+    "b4f9f177a8a259514f039cfb403a61e3",
+    "0a6d584a44f8db9aa7ade2e2fdb9fc9e",
+    "b01c9076525216925f3456f034fb6eee",
+    "d267e20ad9e5cd2915d1a47254d3d149",
+    "3c45418137114cb6cef4c7a7baf4855c",
+    "d520125ebd512c63c301bf67fea8e059",
+#if CONFIG_ALT_INTRA
+    "297e8fbb5d33c29b12b228fa9d7c40a4",
+    "31b9296d70dd82238c87173e6d5e65fd"
+#else
+    "9e1370c6d42e08d357d9612c93a71cfc",
+#endif  // CONFIG_ALT_INTRA
+  };
+  TestIntraPred("Intra32", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs,
+                kSignatures, 32, 32 * 32 * kNumAv1IntraFuncs);
+}
+
+}  // namespace
+
+// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
+// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
+#define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \
+                        d45e, d135, d117, d153, d207e, d63e, tm, smooth)    \
+  TEST(arch, test_func) {                                                   \
+    static const AvxPredFunc aom_intra_pred[] = {                           \
+      dc,   dc_left, dc_top, dc_128, v,    h,  d45e,                        \
+      d135, d117,    d153,   d207e,  d63e, tm, smooth                       \
+    };                                                                      \
+    test_func(aom_intra_pred);                                              \
+  }
+
+// -----------------------------------------------------------------------------
+// 4x4
+
+#if CONFIG_ALT_INTRA
+#define tm_pred_func aom_paeth_predictor_4x4_c
+#define smooth_pred_func aom_smooth_predictor_4x4_c
+#else
+#define tm_pred_func aom_tm_predictor_4x4_c
+#define smooth_pred_func NULL
+#endif  // CONFIG_ALT_INTRA
+
+INTRA_PRED_TEST(C, TestIntraPred4, aom_dc_predictor_4x4_c,
+                aom_dc_left_predictor_4x4_c, aom_dc_top_predictor_4x4_c,
+                aom_dc_128_predictor_4x4_c, aom_v_predictor_4x4_c,
+                aom_h_predictor_4x4_c, aom_d45e_predictor_4x4_c,
+                aom_d135_predictor_4x4_c, aom_d117_predictor_4x4_c,
+                aom_d153_predictor_4x4_c, aom_d207e_predictor_4x4_c,
+                aom_d63e_predictor_4x4_c, tm_pred_func, smooth_pred_func)
+
+#undef tm_pred_func
+#undef smooth_pred_func
+
+#if HAVE_SSE2
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_4x4_sse2
+#endif  // CONFIG_ALT_INTRA
+
+INTRA_PRED_TEST(SSE2, TestIntraPred4, aom_dc_predictor_4x4_sse2,
+                aom_dc_left_predictor_4x4_sse2, aom_dc_top_predictor_4x4_sse2,
+                aom_dc_128_predictor_4x4_sse2, aom_v_predictor_4x4_sse2,
+                aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+
+#undef tm_pred_func
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, aom_d153_predictor_4x4_ssse3, NULL,
+                aom_d63e_predictor_4x4_ssse3, NULL, NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_4x4_dspr2
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(DSPR2, TestIntraPred4, aom_dc_predictor_4x4_dspr2, NULL, NULL,
+                NULL, NULL, aom_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_4x4_neon
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(NEON, TestIntraPred4, aom_dc_predictor_4x4_neon,
+                aom_dc_left_predictor_4x4_neon, aom_dc_top_predictor_4x4_neon,
+                aom_dc_128_predictor_4x4_neon, aom_v_predictor_4x4_neon,
+                aom_h_predictor_4x4_neon, NULL, aom_d135_predictor_4x4_neon,
+                NULL, NULL, NULL, NULL, tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_4x4_msa
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(MSA, TestIntraPred4, aom_dc_predictor_4x4_msa,
+                aom_dc_left_predictor_4x4_msa, aom_dc_top_predictor_4x4_msa,
+                aom_dc_128_predictor_4x4_msa, aom_v_predictor_4x4_msa,
+                aom_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 8x8
+
+#if CONFIG_ALT_INTRA
+#define tm_pred_func aom_paeth_predictor_8x8_c
+#define smooth_pred_func aom_smooth_predictor_8x8_c
+#else
+#define tm_pred_func aom_tm_predictor_8x8_c
+#define smooth_pred_func NULL
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(C, TestIntraPred8, aom_dc_predictor_8x8_c,
+                aom_dc_left_predictor_8x8_c, aom_dc_top_predictor_8x8_c,
+                aom_dc_128_predictor_8x8_c, aom_v_predictor_8x8_c,
+                aom_h_predictor_8x8_c, aom_d45e_predictor_8x8_c,
+                aom_d135_predictor_8x8_c, aom_d117_predictor_8x8_c,
+                aom_d153_predictor_8x8_c, aom_d207e_predictor_8x8_c,
+                aom_d63e_predictor_8x8_c, tm_pred_func, smooth_pred_func)
+#undef tm_pred_func
+#undef smooth_pred_func
+
+#if HAVE_SSE2
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_8x8_sse2
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(SSE2, TestIntraPred8, aom_dc_predictor_8x8_sse2,
+                aom_dc_left_predictor_8x8_sse2, aom_dc_top_predictor_8x8_sse2,
+                aom_dc_128_predictor_8x8_sse2, aom_v_predictor_8x8_sse2,
+                aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, aom_d153_predictor_8x8_ssse3, NULL, NULL, NULL,
+                NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_8x8_dspr2
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(DSPR2, TestIntraPred8, aom_dc_predictor_8x8_dspr2, NULL, NULL,
+                NULL, NULL, aom_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_8x8_neon
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(NEON, TestIntraPred8, aom_dc_predictor_8x8_neon,
+                aom_dc_left_predictor_8x8_neon, aom_dc_top_predictor_8x8_neon,
+                aom_dc_128_predictor_8x8_neon, aom_v_predictor_8x8_neon,
+                aom_h_predictor_8x8_neon, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_8x8_msa
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(MSA, TestIntraPred8, aom_dc_predictor_8x8_msa,
+                aom_dc_left_predictor_8x8_msa, aom_dc_top_predictor_8x8_msa,
+                aom_dc_128_predictor_8x8_msa, aom_v_predictor_8x8_msa,
+                aom_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 16x16
+
+#if CONFIG_ALT_INTRA
+#define tm_pred_func aom_paeth_predictor_16x16_c
+#define smooth_pred_func aom_smooth_predictor_16x16_c
+#else
+#define tm_pred_func aom_tm_predictor_16x16_c
+#define smooth_pred_func NULL
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(C, TestIntraPred16, aom_dc_predictor_16x16_c,
+                aom_dc_left_predictor_16x16_c, aom_dc_top_predictor_16x16_c,
+                aom_dc_128_predictor_16x16_c, aom_v_predictor_16x16_c,
+                aom_h_predictor_16x16_c, aom_d45e_predictor_16x16_c,
+                aom_d135_predictor_16x16_c, aom_d117_predictor_16x16_c,
+                aom_d153_predictor_16x16_c, aom_d207e_predictor_16x16_c,
+                aom_d63e_predictor_16x16_c, tm_pred_func, smooth_pred_func)
+#undef tm_pred_func
+#undef smooth_pred_func
+
+#if HAVE_SSE2
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_16x16_sse2
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(SSE2, TestIntraPred16, aom_dc_predictor_16x16_sse2,
+                aom_dc_left_predictor_16x16_sse2,
+                aom_dc_top_predictor_16x16_sse2,
+                aom_dc_128_predictor_16x16_sse2, aom_v_predictor_16x16_sse2,
+                aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, aom_d153_predictor_16x16_ssse3, NULL, NULL,
+                NULL, NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred16, aom_dc_predictor_16x16_dspr2, NULL,
+                NULL, NULL, NULL, aom_h_predictor_16x16_dspr2, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_16x16_neon
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(NEON, TestIntraPred16, aom_dc_predictor_16x16_neon,
+                aom_dc_left_predictor_16x16_neon,
+                aom_dc_top_predictor_16x16_neon,
+                aom_dc_128_predictor_16x16_neon, aom_v_predictor_16x16_neon,
+                aom_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_16x16_msa
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(MSA, TestIntraPred16, aom_dc_predictor_16x16_msa,
+                aom_dc_left_predictor_16x16_msa, aom_dc_top_predictor_16x16_msa,
+                aom_dc_128_predictor_16x16_msa, aom_v_predictor_16x16_msa,
+                aom_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 32x32
+
+#if CONFIG_ALT_INTRA
+#define tm_pred_func aom_paeth_predictor_32x32_c
+#define smooth_pred_func aom_smooth_predictor_32x32_c
+#else
+#define tm_pred_func aom_tm_predictor_32x32_c
+#define smooth_pred_func NULL
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(C, TestIntraPred32, aom_dc_predictor_32x32_c,
+                aom_dc_left_predictor_32x32_c, aom_dc_top_predictor_32x32_c,
+                aom_dc_128_predictor_32x32_c, aom_v_predictor_32x32_c,
+                aom_h_predictor_32x32_c, aom_d45e_predictor_32x32_c,
+                aom_d135_predictor_32x32_c, aom_d117_predictor_32x32_c,
+                aom_d153_predictor_32x32_c, aom_d207e_predictor_32x32_c,
+                aom_d63e_predictor_32x32_c, tm_pred_func, smooth_pred_func)
+#undef tm_pred_func
+#undef smooth_pred_func
+
+#if HAVE_SSE2
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_32x32_sse2
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(SSE2, TestIntraPred32, aom_dc_predictor_32x32_sse2,
+                aom_dc_left_predictor_32x32_sse2,
+                aom_dc_top_predictor_32x32_sse2,
+                aom_dc_128_predictor_32x32_sse2, aom_v_predictor_32x32_sse2,
+                aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, aom_d153_predictor_32x32_ssse3, NULL, NULL,
+                NULL, NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_NEON
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_32x32_neon
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(NEON, TestIntraPred32, aom_dc_predictor_32x32_neon,
+                aom_dc_left_predictor_32x32_neon,
+                aom_dc_top_predictor_32x32_neon,
+                aom_dc_128_predictor_32x32_neon, aom_v_predictor_32x32_neon,
+                aom_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+#if CONFIG_ALT_INTRA
+#define tm_pred_func NULL
+#else
+#define tm_pred_func aom_tm_predictor_32x32_msa
+#endif  // CONFIG_ALT_INTRA
+INTRA_PRED_TEST(MSA, TestIntraPred32, aom_dc_predictor_32x32_msa,
+                aom_dc_left_predictor_32x32_msa, aom_dc_top_predictor_32x32_msa,
+                aom_dc_128_predictor_32x32_msa, aom_v_predictor_32x32_msa,
+                aom_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                tm_pred_func, NULL)
+#undef tm_pred_func
+#endif  // HAVE_MSA
+
+#include "test/test_libaom.cc"
diff --git a/third_party/aom/test/test_libaom.cc b/third_party/aom/test/test_libaom.cc
new file mode 100644
index 000000000..6d83ce66e
--- /dev/null
+++ b/third_party/aom/test/test_libaom.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#if ARCH_X86 || ARCH_X86_64
+#include "aom_ports/x86.h"
+#endif
+extern "C" {
+#if CONFIG_AV1
+extern void av1_rtcd();
+#endif  // CONFIG_AV1
+extern void aom_dsp_rtcd();
+extern void aom_scale_rtcd();
+}
+
+#if ARCH_X86 || ARCH_X86_64
+static void append_negative_gtest_filter(const char *str) {
+  std::string filter = ::testing::FLAGS_gtest_filter;
+  // Negative patterns begin with one '-' followed by a ':' separated list.
+  if (filter.find('-') == std::string::npos) filter += '-';
+  filter += str;
+  ::testing::FLAGS_gtest_filter = filter;
+}
+#endif  // ARCH_X86 || ARCH_X86_64
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+
+#if ARCH_X86 || ARCH_X86_64
+  const int simd_caps = x86_simd_caps();
+  if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter(":MMX.*:MMX/*");
+  if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter(":SSE.*:SSE/*");
+  if (!(simd_caps & HAS_SSE2)) append_negative_gtest_filter(":SSE2.*:SSE2/*");
+  if (!(simd_caps & HAS_SSE3)) append_negative_gtest_filter(":SSE3.*:SSE3/*");
+  if (!(simd_caps & HAS_SSSE3))
+    append_negative_gtest_filter(":SSSE3.*:SSSE3/*");
+  if (!(simd_caps & HAS_SSE4_1))
+    append_negative_gtest_filter(":SSE4_1.*:SSE4_1/*");
+  if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter(":AVX.*:AVX/*");
+  if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter(":AVX2.*:AVX2/*");
+#endif  // ARCH_X86 || ARCH_X86_64
+
+#if !CONFIG_SHARED
+// Shared library builds don't support whitebox tests
+// that exercise internal symbols.
+
+#if CONFIG_AV1
+  av1_rtcd();
+#endif  // CONFIG_AV1
+  aom_dsp_rtcd();
+  aom_scale_rtcd();
+#endif  // !CONFIG_SHARED
+
+  return RUN_ALL_TESTS();
+}
diff --git a/third_party/aom/test/test_worker.cmake b/third_party/aom/test/test_worker.cmake
new file mode 100644
index 000000000..fa1d58130
--- /dev/null
+++ b/third_party/aom/test/test_worker.cmake
@@ -0,0 +1,49 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+if (NOT AOM_ROOT OR NOT AOM_CONFIG_DIR)
+  message(FATAL_ERROR "AOM_ROOT AND AOM_CONFIG_DIR must be defined.")
+endif ()
+
+set(AOM_TEST_DATA_LIST "${AOM_ROOT}/test/test-data.sha1")
+set(AOM_TEST_DATA_URL "http://downloads.webmproject.org/test_data/libvpx")
+set(AOM_TEST_DATA_PATH "$ENV{LIBAOM_TEST_DATA_PATH}")
+
+include("${AOM_ROOT}/test/test_data_util.cmake")
+
+if (${AOM_TEST_DATA_PATH} STREQUAL "")
+  message(WARNING "Writing test data to ${AOM_CONFIG_DIR}, set "
+          "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.")
+  set(AOM_TEST_DATA_PATH "${AOM_CONFIG_DIR}")
+endif ()
+
+if (NOT EXISTS "${AOM_TEST_DATA_PATH}")
+  file(MAKE_DIRECTORY "${AOM_TEST_DATA_PATH}")
+endif ()
+
+make_test_data_lists("AOM_TEST_DATA_FILES" "AOM_TEST_DATA_CHECKSUMS")
+expand_test_file_paths("AOM_TEST_DATA_FILES" "${AOM_TEST_DATA_PATH}"
+                       "AOM_TEST_DATA_FILE_PATHS")
+expand_test_file_paths("AOM_TEST_DATA_FILES" "${AOM_TEST_DATA_URL}"
+                       "AOM_TEST_DATA_URLS")
+list(LENGTH AOM_TEST_DATA_FILES num_files)
+math(EXPR num_files "${num_files} - 1")
+
+foreach (file_num RANGE ${num_files})
+  list(GET AOM_TEST_DATA_FILES ${file_num} filename)
+  list(GET AOM_TEST_DATA_CHECKSUMS ${file_num} checksum)
+  list(GET AOM_TEST_DATA_FILE_PATHS ${file_num} filepath)
+  list(GET AOM_TEST_DATA_URLS ${file_num} url)
+
+  check_file("${filepath}" "${checksum}" "needs_download")
+  if (needs_download)
+    download_test_file("${url}" "${checksum}" "${filepath}")
+  endif ()
+endforeach ()
diff --git a/third_party/aom/test/tile_independence_test.cc b/third_party/aom/test/tile_independence_test.cc
new file mode 100644
index 000000000..a29051f2f
--- /dev/null
+++ b/third_party/aom/test/tile_independence_test.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/md5_helper.h"
+#include "aom_mem/aom_mem.h"
+
+namespace {
+class TileIndependenceTest
+    : public ::libaom_test::EncoderTest,
+      public ::libaom_test::CodecTestWith2Params<int, int> {
+ protected:
+  TileIndependenceTest()
+      : EncoderTest(GET_PARAM(0)), md5_fw_order_(), md5_inv_order_(),
+        n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 704;
+    cfg.h = 144;
+    cfg.threads = 1;
+    fw_dec_ = codec_->CreateDecoder(cfg, 0);
+    inv_dec_ = codec_->CreateDecoder(cfg, 0);
+    inv_dec_->Control(AV1_INVERT_TILE_DECODE_ORDER, 1);
+
+#if CONFIG_AV1 && CONFIG_EXT_TILE
+    if (fw_dec_->IsAV1() && inv_dec_->IsAV1()) {
+      fw_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      fw_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
+      inv_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      inv_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+#endif
+  }
+
+  virtual ~TileIndependenceTest() {
+    delete fw_dec_;
+    delete inv_dec_;
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(libaom_test::kTwoPassGood);
+  }
+
+  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                                  libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
+      encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
+#if CONFIG_EXT_TILE
+      encoder->Control(AV1E_SET_TILE_ENCODING_MODE, 0);  // TILE_NORMAL
+#endif                                                   // CONFIG_EXT_TILE
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+      SetCpuUsed(encoder);
+    }
+  }
+
+  virtual void SetCpuUsed(libaom_test::Encoder *encoder) {
+    static const int kCpuUsed = 3;
+    encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+  }
+
+  void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt,
+                 ::libaom_test::MD5 *md5) {
+    const aom_codec_err_t res = dec->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+    const aom_image_t *img = dec->GetDxData().Next();
+    md5->Add(img);
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    UpdateMD5(fw_dec_, pkt, &md5_fw_order_);
+    UpdateMD5(inv_dec_, pkt, &md5_inv_order_);
+  }
+
+  void DoTest() {
+    const aom_rational timebase = { 33333333, 1000000000 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_target_bitrate = 500;
+    cfg_.g_lag_in_frames = 12;
+    cfg_.rc_end_usage = AOM_VBR;
+
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576,
+                                       timebase.den, timebase.num, 0, 5);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+    const char *md5_fw_str = md5_fw_order_.Get();
+    const char *md5_inv_str = md5_inv_order_.Get();
+    ASSERT_STREQ(md5_fw_str, md5_inv_str);
+  }
+
+  ::libaom_test::MD5 md5_fw_order_, md5_inv_order_;
+  ::libaom_test::Decoder *fw_dec_, *inv_dec_;
+
+ private:
+  int n_tile_cols_;
+  int n_tile_rows_;
+};
+
+// run an encode with 2 or 4 tiles, and do the decode both in normal and
+// inverted tile ordering. Ensure that the MD5 of the output in both cases
+// is identical. If so, tiles are considered independent and the test passes.
+TEST_P(TileIndependenceTest, MD5Match) { DoTest(); }
+
+class TileIndependenceTestLarge : public TileIndependenceTest {
+  virtual void SetCpuUsed(libaom_test::Encoder *encoder) {
+    static const int kCpuUsed = 0;
+    encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+  }
+};
+
+TEST_P(TileIndependenceTestLarge, MD5Match) { DoTest(); }
+
+#if CONFIG_EXT_TILE
+AV1_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Values(1, 2, 32),
+                          ::testing::Values(1, 2, 32));
+AV1_INSTANTIATE_TEST_CASE(TileIndependenceTestLarge,
+                          ::testing::Values(1, 2, 32),
+                          ::testing::Values(1, 2, 32));
+#else
+AV1_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Values(0, 1),
+                          ::testing::Values(0, 1));
+AV1_INSTANTIATE_TEST_CASE(TileIndependenceTestLarge, ::testing::Values(0, 1),
+                          ::testing::Values(0, 1));
+#endif  // CONFIG_EXT_TILE
+}  // namespace
diff --git a/third_party/aom/test/tools_common.sh b/third_party/aom/test/tools_common.sh
new file mode 100755
index 000000000..254e6b296
--- /dev/null
+++ b/third_party/aom/test/tools_common.sh
@@ -0,0 +1,454 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+##  This file contains shell code shared by test scripts for libaom tools.
+
+# Use $AOM_TEST_TOOLS_COMMON_SH as a pseudo include guard.
+if [ -z "${AOM_TEST_TOOLS_COMMON_SH}" ]; then
+AOM_TEST_TOOLS_COMMON_SH=included
+
+set -e
+devnull='> /dev/null 2>&1'
+AOM_TEST_PREFIX=""
+
+elog() {
+  echo "$@" 1>&2
+}
+
+vlog() {
+  if [ "${AOM_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
+    echo "$@"
+  fi
+}
+
+# Sets $AOM_TOOL_TEST to the name specified by positional parameter one.
+test_begin() {
+  AOM_TOOL_TEST="${1}"
+}
+
+# Clears the AOM_TOOL_TEST variable after confirming that $AOM_TOOL_TEST matches
+# positional parameter one.
+test_end() {
+  if [ "$1" != "${AOM_TOOL_TEST}" ]; then
+    echo "FAIL completed test mismatch!."
+    echo "  completed test: ${1}"
+    echo "  active test: ${AOM_TOOL_TEST}."
+    return 1
+  fi
+  AOM_TOOL_TEST='<unset>'
+}
+
+# Echoes the target configuration being tested.
+test_configuration_target() {
+  aom_config_mk="${LIBAOM_CONFIG_PATH}/config.mk"
+  # Find the TOOLCHAIN line, split it using ':=' as the field separator, and
+  # print the last field to get the value. Then pipe the value to tr to consume
+  # any leading/trailing spaces while allowing tr to echo the output to stdout.
+  awk -F ':=' '/TOOLCHAIN/ { print $NF }' "${aom_config_mk}" | tr -d ' '
+}
+
+# Trap function used for failure reports and tool output directory removal.
+# When the contents of $AOM_TOOL_TEST do not match the string '<unset>', reports
+# failure of test stored in $AOM_TOOL_TEST.
+cleanup() {
+  if [ -n "${AOM_TOOL_TEST}" ] && [ "${AOM_TOOL_TEST}" != '<unset>' ]; then
+    echo "FAIL: $AOM_TOOL_TEST"
+  fi
+  if [ -n "${AOM_TEST_OUTPUT_DIR}" ] && [ -d "${AOM_TEST_OUTPUT_DIR}" ]; then
+    rm -rf "${AOM_TEST_OUTPUT_DIR}"
+  fi
+}
+
+# Echoes the git hash portion of the VERSION_STRING variable defined in
+# $LIBAOM_CONFIG_PATH/config.mk to stdout, or the version number string when
+# no git hash is contained in VERSION_STRING.
+config_hash() {
+  aom_config_mk="${LIBAOM_CONFIG_PATH}/config.mk"
+  # Find VERSION_STRING line, split it with "-g" and print the last field to
+  # output the git hash to stdout.
+  aom_version=$(awk -F -g '/VERSION_STRING/ {print $NF}' "${aom_config_mk}")
+  # Handle two situations here:
+  # 1. The default case: $aom_version is a git hash, so echo it unchanged.
+  # 2. When being run a non-dev tree, the -g portion is not present in the
+  #    version string: It's only the version number.
+  #    In this case $aom_version is something like 'VERSION_STRING=v1.3.0', so
+  #    we echo only what is after the '='.
+  echo "${aom_version##*=}"
+}
+
+# Echoes the short form of the current git hash.
+current_hash() {
+  if git --version > /dev/null 2>&1; then
+    (cd "$(dirname "${0}")"
+    git rev-parse --short HEAD)
+  else
+    # Return the config hash if git is unavailable: Fail silently, git hashes
+    # are used only for warnings.
+    config_hash
+  fi
+}
+
+# Echoes warnings to stdout when git hash in aom_config.h does not match the
+# current git hash.
+check_git_hashes() {
+  hash_at_configure_time=$(config_hash)
+  hash_now=$(current_hash)
+
+  if [ "${hash_at_configure_time}" != "${hash_now}" ]; then
+    echo "Warning: git hash has changed since last configure."
+  fi
+}
+
+# $1 is the name of an environment variable containing a directory name to
+# test.
+test_env_var_dir() {
+  local dir=$(eval echo "\${$1}")
+  if [ ! -d "${dir}" ]; then
+    elog "'${dir}': No such directory"
+    elog "The $1 environment variable must be set to a valid directory."
+    return 1
+  fi
+}
+
+# This script requires that the LIBAOM_BIN_PATH, LIBAOM_CONFIG_PATH, and
+# LIBAOM_TEST_DATA_PATH variables are in the environment: Confirm that
+# the variables are set and that they all evaluate to directory paths.
+verify_aom_test_environment() {
+  test_env_var_dir "LIBAOM_BIN_PATH" \
+    && test_env_var_dir "LIBAOM_CONFIG_PATH" \
+    && test_env_var_dir "LIBAOM_TEST_DATA_PATH"
+}
+
+# Greps aom_config.h in LIBAOM_CONFIG_PATH for positional parameter one, which
+# should be a LIBAOM preprocessor flag. Echoes yes to stdout when the feature
+# is available.
+aom_config_option_enabled() {
+  aom_config_option="${1}"
+  aom_config_file="${LIBAOM_CONFIG_PATH}/aom_config.h"
+  config_line=$(grep "${aom_config_option}" "${aom_config_file}")
+  if echo "${config_line}" | egrep -q '1$'; then
+    echo yes
+  fi
+}
+
+# Echoes yes when output of test_configuration_target() contains win32 or win64.
+is_windows_target() {
+  if test_configuration_target \
+     | grep -q -e win32 -e win64 > /dev/null 2>&1; then
+    echo yes
+  fi
+}
+
+# Echoes path to $1 when it's executable and exists in ${LIBAOM_BIN_PATH}, or an
+# empty string. Caller is responsible for testing the string once the function
+# returns.
+aom_tool_path() {
+  local readonly tool_name="$1"
+  local tool_path="${LIBAOM_BIN_PATH}/${tool_name}${AOM_TEST_EXE_SUFFIX}"
+  if [ ! -x "${tool_path}" ]; then
+    # Try one directory up: when running via examples.sh the tool could be in
+    # the parent directory of $LIBAOM_BIN_PATH.
+    tool_path="${LIBAOM_BIN_PATH}/../${tool_name}${AOM_TEST_EXE_SUFFIX}"
+  fi
+
+  if [ ! -x "${tool_path}" ]; then
+    tool_path=""
+  fi
+  echo "${tool_path}"
+}
+
+# Echoes yes to stdout when the file named by positional parameter one exists
+# in LIBAOM_BIN_PATH, and is executable.
+aom_tool_available() {
+  local tool_name="$1"
+  local tool="${LIBAOM_BIN_PATH}/${tool_name}${AOM_TEST_EXE_SUFFIX}"
+  [ -x "${tool}" ] && echo yes
+}
+
+# Echoes yes to stdout when aom_config_option_enabled() reports yes for
+# CONFIG_AV1_DECODER.
+av1_decode_available() {
+  [ "$(aom_config_option_enabled CONFIG_AV1_DECODER)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when aom_config_option_enabled() reports yes for
+# CONFIG_AV1_ENCODER.
+av1_encode_available() {
+  [ "$(aom_config_option_enabled CONFIG_AV1_ENCODER)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when aom_config_option_enabled() reports yes for
+# CONFIG_WEBM_IO.
+webm_io_available() {
+  [ "$(aom_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes
+}
+
+# Filters strings from $1 using the filter specified by $2. Filter behavior
+# depends on the presence of $3. When $3 is present, strings that match the
+# filter are excluded. When $3 is omitted, strings matching the filter are
+# included.
+# The filtered result is echoed to stdout.
+filter_strings() {
+  strings=${1}
+  filter=${2}
+  exclude=${3}
+
+  if [ -n "${exclude}" ]; then
+    # When positional parameter three exists the caller wants to remove strings.
+    # Tell grep to invert matches using the -v argument.
+    exclude='-v'
+  else
+    unset exclude
+  fi
+
+  if [ -n "${filter}" ]; then
+    for s in ${strings}; do
+      if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then
+        filtered_strings="${filtered_strings} ${s}"
+      fi
+    done
+  else
+    filtered_strings="${strings}"
+  fi
+  echo "${filtered_strings}"
+}
+
+# Runs user test functions passed via positional parameters one and two.
+# Functions in positional parameter one are treated as environment verification
+# functions and are run unconditionally. Functions in positional parameter two
+# are run according to the rules specified in aom_test_usage().
+run_tests() {
+  local env_tests="verify_aom_test_environment $1"
+  local tests_to_filter="$2"
+  local test_name="${AOM_TEST_NAME}"
+
+  if [ -z "${test_name}" ]; then
+    test_name="$(basename "${0%.*}")"
+  fi
+
+  if [ "${AOM_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then
+    # Filter out DISABLED tests.
+    tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude)
+  fi
+
+  if [ -n "${AOM_TEST_FILTER}" ]; then
+    # Remove tests not matching the user's filter.
+    tests_to_filter=$(filter_strings "${tests_to_filter}" ${AOM_TEST_FILTER})
+  fi
+
+  # User requested test listing: Dump test names and return.
+  if [ "${AOM_TEST_LIST_TESTS}" = "yes" ]; then
+    for test_name in $tests_to_filter; do
+      echo ${test_name}
+    done
+    return
+  fi
+
+  # Don't bother with the environment tests if everything else was disabled.
+  [ -z "${tests_to_filter}" ] && return
+
+  # Combine environment and actual tests.
+  local tests_to_run="${env_tests} ${tests_to_filter}"
+
+  check_git_hashes
+
+  # Run tests.
+  for test in ${tests_to_run}; do
+    test_begin "${test}"
+    vlog "  RUN  ${test}"
+    "${test}"
+    vlog "  PASS ${test}"
+    test_end "${test}"
+  done
+
+  local tested_config="$(test_configuration_target) @ $(current_hash)"
+  echo "${test_name}: Done, all tests pass for ${tested_config}."
+}
+
+aom_test_usage() {
+cat << EOF
+  Usage: ${0##*/} [arguments]
+    --bin-path <path to libaom binaries directory>
+    --config-path <path to libaom config directory>
+    --filter <filter>: User test filter. Only tests matching filter are run.
+    --run-disabled-tests: Run disabled tests.
+    --help: Display this message and exit.
+    --test-data-path <path to libaom test data directory>
+    --show-program-output: Shows output from all programs being tested.
+    --prefix: Allows for a user specified prefix to be inserted before all test
+              programs. Grants the ability, for example, to run test programs
+              within valgrind.
+    --list-tests: List all test names and exit without actually running tests.
+    --verbose: Verbose output.
+
+    When the --bin-path option is not specified the script attempts to use
+    \$LIBAOM_BIN_PATH and then the current directory.
+
+    When the --config-path option is not specified the script attempts to use
+    \$LIBAOM_CONFIG_PATH and then the current directory.
+
+    When the -test-data-path option is not specified the script attempts to use
+    \$LIBAOM_TEST_DATA_PATH and then the current directory.
+EOF
+}
+
+# Returns non-zero (failure) when required environment variables are empty
+# strings.
+aom_test_check_environment() {
+  if [ -z "${LIBAOM_BIN_PATH}" ] || \
+     [ -z "${LIBAOM_CONFIG_PATH}" ] || \
+     [ -z "${LIBAOM_TEST_DATA_PATH}" ]; then
+    return 1
+  fi
+}
+
+# Echo aomenc command line parameters allowing use of a raw yuv file as
+# input to aomenc.
+yuv_raw_input() {
+  echo ""${YUV_RAW_INPUT}"
+       --width="${YUV_RAW_INPUT_WIDTH}"
+       --height="${YUV_RAW_INPUT_HEIGHT}""
+}
+
+# Do a small encode for testing decoders.
+encode_yuv_raw_input_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    local readonly output="$1"
+    local readonly encoder="$(aom_tool_path aomenc)"
+    shift
+    eval "${encoder}" $(yuv_raw_input) \
+      --codec=av1 \
+      $@ \
+      --limit=5 \
+      --output="${output}" \
+      ${devnull}
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+# Parse the command line.
+while [ -n "$1" ]; do
+  case "$1" in
+    --bin-path)
+      LIBAOM_BIN_PATH="$2"
+      shift
+      ;;
+    --config-path)
+      LIBAOM_CONFIG_PATH="$2"
+      shift
+      ;;
+    --filter)
+      AOM_TEST_FILTER="$2"
+      shift
+      ;;
+    --run-disabled-tests)
+      AOM_TEST_RUN_DISABLED_TESTS=yes
+      ;;
+    --help)
+      aom_test_usage
+      exit
+      ;;
+    --test-data-path)
+      LIBAOM_TEST_DATA_PATH="$2"
+      shift
+      ;;
+    --prefix)
+      AOM_TEST_PREFIX="$2"
+      shift
+      ;;
+    --verbose)
+      AOM_TEST_VERBOSE_OUTPUT=yes
+      ;;
+    --show-program-output)
+      devnull=
+      ;;
+    --list-tests)
+      AOM_TEST_LIST_TESTS=yes
+      ;;
+    *)
+      aom_test_usage
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+# Handle running the tests from a build directory without arguments when running
+# the tests on *nix/macosx.
+LIBAOM_BIN_PATH="${LIBAOM_BIN_PATH:-.}"
+LIBAOM_CONFIG_PATH="${LIBAOM_CONFIG_PATH:-.}"
+LIBAOM_TEST_DATA_PATH="${LIBAOM_TEST_DATA_PATH:-.}"
+
+# Create a temporary directory for output files, and a trap to clean it up.
+if [ -n "${TMPDIR}" ]; then
+  AOM_TEST_TEMP_ROOT="${TMPDIR}"
+elif [ -n "${TEMPDIR}" ]; then
+  AOM_TEST_TEMP_ROOT="${TEMPDIR}"
+else
+  AOM_TEST_TEMP_ROOT=/tmp
+fi
+
+AOM_TEST_OUTPUT_DIR="${AOM_TEST_TEMP_ROOT}/aom_test_$$"
+
+if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \
+   [ ! -d "${AOM_TEST_OUTPUT_DIR}" ]; then
+  echo "${0##*/}: Cannot create output directory, giving up."
+  echo "${0##*/}:   AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}"
+  exit 1
+fi
+
+if [ "$(is_windows_target)" = "yes" ]; then
+  AOM_TEST_EXE_SUFFIX=".exe"
+fi
+
+# Variables shared by tests.
+VP8_IVF_FILE="${LIBAOM_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf"
+AV1_IVF_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf"
+
+AV1_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
+AV1_FPM_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm"
+AV1_LT_50_FRAMES_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-02-size-32x08.webm"
+
+YUV_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
+YUV_RAW_INPUT_WIDTH=352
+YUV_RAW_INPUT_HEIGHT=288
+
+Y4M_NOSQ_PAR_INPUT="${LIBAOM_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
+Y4M_720P_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
+
+# Setup a trap function to clean up after tests complete.
+trap cleanup EXIT
+
+vlog "$(basename "${0%.*}") test configuration:
+  LIBAOM_BIN_PATH=${LIBAOM_BIN_PATH}
+  LIBAOM_CONFIG_PATH=${LIBAOM_CONFIG_PATH}
+  LIBAOM_TEST_DATA_PATH=${LIBAOM_TEST_DATA_PATH}
+  AOM_IVF_FILE=${AOM_IVF_FILE}
+  AV1_IVF_FILE=${AV1_IVF_FILE}
+  AV1_WEBM_FILE=${AV1_WEBM_FILE}
+  AOM_TEST_EXE_SUFFIX=${AOM_TEST_EXE_SUFFIX}
+  AOM_TEST_FILTER=${AOM_TEST_FILTER}
+  AOM_TEST_LIST_TESTS=${AOM_TEST_LIST_TESTS}
+  AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}
+  AOM_TEST_PREFIX=${AOM_TEST_PREFIX}
+  AOM_TEST_RUN_DISABLED_TESTS=${AOM_TEST_RUN_DISABLED_TESTS}
+  AOM_TEST_SHOW_PROGRAM_OUTPUT=${AOM_TEST_SHOW_PROGRAM_OUTPUT}
+  AOM_TEST_TEMP_ROOT=${AOM_TEST_TEMP_ROOT}
+  AOM_TEST_VERBOSE_OUTPUT=${AOM_TEST_VERBOSE_OUTPUT}
+  YUV_RAW_INPUT=${YUV_RAW_INPUT}
+  YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
+  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
+  Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}"
+
+fi  # End $AOM_TEST_TOOLS_COMMON_SH pseudo include guard.
diff --git a/third_party/aom/test/transform_test_base.h b/third_party/aom/test/transform_test_base.h
new file mode 100644
index 000000000..4c1a55496
--- /dev/null
+++ b/third_party/aom/test/transform_test_base.h
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_TRANSFORM_TEST_BASE_H_
+#define TEST_TRANSFORM_TEST_BASE_H_
+
+#include "./aom_config.h"
+#include "aom_mem/aom_mem.h"
+#include "aom/aom_codec.h"
+
+namespace libaom_test {
+
+//  Note:
+//   Same constant are defined in av1/common/av1_entropy.h and
+//   av1/common/entropy.h.  Goal is to make this base class
+//   to use for future codec transform testing.  But including
+//   either of them would lead to compiling error when we do
+//   unit test for another codec. Suggest to move the definition
+//   to a aom header file.
+const int kDctMaxValue = 16384;
+
+typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
+                        int tx_type);
+
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+
+class TransformTestBase {
+ public:
+  virtual ~TransformTestBase() {}
+
+ protected:
+  virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
+
+  virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
+
+  void RunAccuracyCheck(uint32_t ref_max_error, double ref_avg_error) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    uint32_t max_error = 0;
+    int64_t total_error = 0;
+    const int count_test_block = 10000;
+
+    int16_t *test_input_block = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    tran_low_t *test_temp_block = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+    uint8_t *dst = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
+    uint8_t *src = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
+#if CONFIG_HIGHBITDEPTH
+    uint16_t *dst16 = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
+    uint16_t *src16 = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
+#endif
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < num_coeffs_; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          test_input_block[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          test_input_block[j] = src16[j] - dst16[j];
+#endif
+        }
+      }
+
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+      if (bit_depth_ == AOM_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < num_coeffs_; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        ASSERT_EQ(AOM_BITS_8, bit_depth_);
+        const int diff = dst[j] - src[j];
+#endif
+        const uint32_t error = diff * diff;
+        if (max_error < error) max_error = error;
+        total_error += error;
+      }
+    }
+
+    double avg_error = total_error * 1. / count_test_block / num_coeffs_;
+
+    EXPECT_GE(ref_max_error, max_error)
+        << "Error: FHT/IHT has an individual round trip error > "
+        << ref_max_error;
+
+    EXPECT_GE(ref_avg_error, avg_error)
+        << "Error: FHT/IHT has average round trip error > " << ref_avg_error
+        << " per block";
+
+    aom_free(test_input_block);
+    aom_free(test_temp_block);
+    aom_free(dst);
+    aom_free(src);
+#if CONFIG_HIGHBITDEPTH
+    aom_free(dst16);
+    aom_free(src16);
+#endif
+  }
+
+  void RunCoeffCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+
+    // Use a stride value which is not the width of any transform, to catch
+    // cases where the transforms use the stride incorrectly.
+    int stride = 96;
+
+    int16_t *input_block = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * stride * height_));
+    tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+    tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+
+    for (int i = 0; i < count_test_block; ++i) {
+      int j, k;
+      for (j = 0; j < height_; ++j) {
+        for (k = 0; k < pitch_; ++k) {
+          int in_idx = j * stride + k;
+          int out_idx = j * pitch_ + k;
+          input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+          if (bit_depth_ == AOM_BITS_8) {
+            output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8();
+#if CONFIG_HIGHBITDEPTH
+          } else {
+            output_block[out_idx] = output_ref_block[out_idx] =
+                rnd.Rand16() & mask_;
+#endif
+          }
+        }
+      }
+
+      fwd_txfm_ref(input_block, output_ref_block, stride, tx_type_);
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, stride));
+
+      // The minimum quant value is 4.
+      for (j = 0; j < height_; ++j) {
+        for (k = 0; k < pitch_; ++k) {
+          int out_idx = j * pitch_ + k;
+          ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
+              << "Error: not bit-exact result at index: " << out_idx
+              << " at test block: " << i;
+        }
+      }
+    }
+    aom_free(input_block);
+    aom_free(output_ref_block);
+    aom_free(output_block);
+  }
+
+  void RunInvCoeffCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+
+    // Use a stride value which is not the width of any transform, to catch
+    // cases where the transforms use the stride incorrectly.
+    int stride = 96;
+
+    int16_t *input_block = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    tran_low_t *trans_block = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+    uint8_t *output_block = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * stride * height_));
+    uint8_t *output_ref_block = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * stride * height_));
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      int j, k;
+      for (j = 0; j < height_; ++j) {
+        for (k = 0; k < pitch_; ++k) {
+          int in_idx = j * pitch_ + k;
+          int out_idx = j * stride + k;
+          input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+          output_ref_block[out_idx] = rnd.Rand16() & mask_;
+          output_block[out_idx] = output_ref_block[out_idx];
+        }
+      }
+
+      fwd_txfm_ref(input_block, trans_block, pitch_, tx_type_);
+
+      inv_txfm_ref(trans_block, output_ref_block, stride, tx_type_);
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(trans_block, output_block, stride));
+
+      for (j = 0; j < height_; ++j) {
+        for (k = 0; k < pitch_; ++k) {
+          int out_idx = j * stride + k;
+          ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
+              << "Error: not bit-exact result at index: " << out_idx
+              << " j = " << j << " k = " << k << " at test block: " << i;
+        }
+      }
+    }
+    aom_free(input_block);
+    aom_free(trans_block);
+    aom_free(output_ref_block);
+    aom_free(output_block);
+  }
+
+  void RunMemCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+
+    int16_t *input_extreme_block = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+    tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < num_coeffs_; ++j) {
+        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
+      }
+      if (i == 0) {
+        for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = mask_;
+      } else if (i == 1) {
+        for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = -mask_;
+      }
+
+      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(input_extreme_block, output_block, pitch_));
+
+      int row_length = FindRowLength();
+      // The minimum quant value is 4.
+      for (int j = 0; j < num_coeffs_; ++j) {
+        EXPECT_EQ(output_block[j], output_ref_block[j])
+            << "Not bit-exact at test index: " << i << ", "
+            << "j = " << j << std::endl;
+        EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8),
+                  abs(output_block[j]))
+            << "Error: NxN FDCT has coefficient larger than N*DCT_MAX_VALUE";
+      }
+    }
+    aom_free(input_extreme_block);
+    aom_free(output_ref_block);
+    aom_free(output_block);
+  }
+
+  void RunInvAccuracyCheck(int limit) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 1000;
+
+    int16_t *in = reinterpret_cast<int16_t *>(
+        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
+    tran_low_t *coeff = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
+    uint8_t *dst = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
+    uint8_t *src = reinterpret_cast<uint8_t *>(
+        aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
+
+#if CONFIG_HIGHBITDEPTH
+    uint16_t *dst16 = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
+    uint16_t *src16 = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
+#endif
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < num_coeffs_; ++j) {
+        if (bit_depth_ == AOM_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          in[j] = src[j] - dst[j];
+#if CONFIG_HIGHBITDEPTH
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          in[j] = src16[j] - dst16[j];
+#endif
+        }
+      }
+
+      fwd_txfm_ref(in, coeff, pitch_, tx_type_);
+
+      if (bit_depth_ == AOM_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < num_coeffs_; ++j) {
+#if CONFIG_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        const int diff = dst[j] - src[j];
+#endif
+        const uint32_t error = diff * diff;
+        EXPECT_GE(static_cast<uint32_t>(limit), error)
+            << "Error: 4x4 IDCT has error " << error << " at index " << j;
+      }
+    }
+    aom_free(in);
+    aom_free(coeff);
+    aom_free(dst);
+    aom_free(src);
+#if CONFIG_HIGHBITDEPTH
+    aom_free(src16);
+    aom_free(dst16);
+#endif
+  }
+
+  int pitch_;
+  int height_;
+  int tx_type_;
+  FhtFunc fwd_txfm_ref;
+  IhtFunc inv_txfm_ref;
+  aom_bit_depth_t bit_depth_;
+  int mask_;
+  int num_coeffs_;
+
+ private:
+  //  Assume transform size is 4x4, 8x8, 16x16,...
+  int FindRowLength() const {
+    int row = 4;
+    if (16 == num_coeffs_) {
+      row = 4;
+    } else if (64 == num_coeffs_) {
+      row = 8;
+    } else if (256 == num_coeffs_) {
+      row = 16;
+    } else if (1024 == num_coeffs_) {
+      row = 32;
+    }
+    return row;
+  }
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_TRANSFORM_TEST_BASE_H_
diff --git a/third_party/aom/test/twopass_encoder.sh b/third_party/aom/test/twopass_encoder.sh
new file mode 100755
index 000000000..3abb7628b
--- /dev/null
+++ b/third_party/aom/test/twopass_encoder.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom twopass_encoder example. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to twopass_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+twopass_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs twopass_encoder using the codec specified by $1 with a frame limit of
+# 100.
+twopass_encoder() {
+  local encoder="${LIBAOM_BIN_PATH}/twopass_encoder${AOM_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file="${AOM_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf"
+  local limit=7
+
+  if [ ! -x "${encoder}" ]; then
+    elog "${encoder} does not exist or is not executable."
+    return 1
+  fi
+
+  eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" "${limit}" \
+      ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+twopass_encoder_av1() {
+  if [ "$(av1_encode_available)" = "yes" ]; then
+    twopass_encoder av1 || return 1
+  fi
+}
+
+twopass_encoder_tests="twopass_encoder_av1"
+
+run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
diff --git a/third_party/aom/test/user_priv_test.cc b/third_party/aom/test/user_priv_test.cc
new file mode 100644
index 000000000..3052b27b1
--- /dev/null
+++ b/third_party/aom/test/user_priv_test.cc
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "./aom_config.h"
+#include "test/acm_random.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+#include "aom_mem/aom_mem.h"
+#include "aom/aom.h"
+
+namespace {
+
+using std::string;
+using libaom_test::ACMRandom;
+
+#if CONFIG_WEBM_IO
+
+void CheckUserPrivateData(void *user_priv, int *target) {
+  // actual pointer value should be the same as expected.
+  EXPECT_EQ(reinterpret_cast<void *>(target), user_priv)
+      << "user_priv pointer value does not match.";
+}
+
+// Decodes |filename|. Passes in user_priv data when calling DecodeFrame and
+// compares the user_priv from return img with the original user_priv to see if
+// they match. Both the pointer values and the values inside the addresses
+// should match.
+string DecodeFile(const string &filename) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  libaom_test::WebMVideoSource video(filename);
+  video.Init();
+
+  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+  libaom_test::AV1Decoder decoder(cfg, 0);
+
+  libaom_test::MD5 md5;
+  int frame_num = 0;
+  for (video.Begin(); !::testing::Test::HasFailure() && video.cxdata();
+       video.Next()) {
+    void *user_priv = reinterpret_cast<void *>(&frame_num);
+    const aom_codec_err_t res =
+        decoder.DecodeFrame(video.cxdata(), video.frame_size(),
+                            (frame_num == 0) ? NULL : user_priv);
+    if (res != AOM_CODEC_OK) {
+      EXPECT_EQ(AOM_CODEC_OK, res) << decoder.DecodeError();
+      break;
+    }
+    libaom_test::DxDataIterator dec_iter = decoder.GetDxData();
+    const aom_image_t *img = NULL;
+
+    // Get decompressed data.
+    while ((img = dec_iter.Next())) {
+      if (frame_num == 0) {
+        CheckUserPrivateData(img->user_priv, NULL);
+      } else {
+        CheckUserPrivateData(img->user_priv, &frame_num);
+
+        // Also test ctrl_get_reference api.
+        struct av1_ref_frame ref;
+        // Randomly fetch a reference frame.
+        ref.idx = rnd.Rand8() % 3;
+        decoder.Control(AV1_GET_REFERENCE, &ref);
+
+        CheckUserPrivateData(ref.img.user_priv, NULL);
+      }
+      md5.Add(img);
+    }
+
+    frame_num++;
+  }
+  return string(md5.Get());
+}
+
+TEST(UserPrivTest, VideoDecode) {
+  // no tiles or frame parallel; this exercises the decoding to test the
+  // user_priv.
+  EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
+               DecodeFile("av10-2-03-size-226x226.webm").c_str());
+}
+
+#endif  // CONFIG_WEBM_IO
+
+}  // namespace
diff --git a/third_party/aom/test/util.h b/third_party/aom/test/util.h
new file mode 100644
index 000000000..a20fab65c
--- /dev/null
+++ b/third_party/aom/test/util.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_UTIL_H_
+#define TEST_UTIL_H_
+
+#include <stdio.h>
+#include <math.h>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "aom/aom_image.h"
+
+// Macros
+#define GET_PARAM(k) std::tr1::get<k>(GetParam())
+
+inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) {
+  assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) &&
+         (img1->d_h == img2->d_h));
+
+  const unsigned int width_y = img1->d_w;
+  const unsigned int height_y = img1->d_h;
+  unsigned int i, j;
+
+  int64_t sqrerr = 0;
+  for (i = 0; i < height_y; ++i)
+    for (j = 0; j < width_y; ++j) {
+      int64_t d = img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] -
+                  img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j];
+      sqrerr += d * d;
+    }
+  double mse = static_cast<double>(sqrerr) / (width_y * height_y);
+  double psnr = 100.0;
+  if (mse > 0.0) {
+    psnr = 10 * log10(255.0 * 255.0 / mse);
+  }
+  return psnr;
+}
+
+#endif  // TEST_UTIL_H_
diff --git a/third_party/aom/test/variance_test.cc b/third_party/aom/test/variance_test.cc
new file mode 100644
index 000000000..5b1003ca7
--- /dev/null
+++ b/third_party/aom/test/variance_test.cc
@@ -0,0 +1,1385 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <cstdlib>
+#include <new>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/mem.h"
+
+namespace {
+
+typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
+                                        const uint8_t *b, int b_stride,
+                                        unsigned int *sse);
+typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
+                                         int xoffset, int yoffset,
+                                         const uint8_t *b, int b_stride,
+                                         unsigned int *sse);
+typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride,
+                                            int xoffset, int yoffset,
+                                            const uint8_t *b, int b_stride,
+                                            uint32_t *sse,
+                                            const uint8_t *second_pred);
+typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
+                                      const uint8_t *b, int b_stride);
+typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
+
+using libaom_test::ACMRandom;
+
+// Truncate high bit depth results by downshifting (with rounding) by:
+// 2 * (bit_depth - 8) for sse
+// (bit_depth - 8) for se
+static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
+  switch (bit_depth) {
+    case AOM_BITS_12:
+      *sse = (*sse + 128) >> 8;
+      *se = (*se + 8) >> 4;
+      break;
+    case AOM_BITS_10:
+      *sse = (*sse + 8) >> 4;
+      *se = (*se + 2) >> 2;
+      break;
+    case AOM_BITS_8:
+    default: break;
+  }
+}
+
+static unsigned int mb_ss_ref(const int16_t *src) {
+  unsigned int res = 0;
+  for (int i = 0; i < 256; ++i) {
+    res += src[i] * src[i];
+  }
+  return res;
+}
+
+/* Note:
+ *  Our codebase calculates the "diff" value in the variance algorithm by
+ *  (src - ref).
+ */
+static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
+                             int l2h, int src_stride, int ref_stride,
+                             uint32_t *sse_ptr, bool use_high_bit_depth_,
+                             aom_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      int diff;
+      if (!use_high_bit_depth_) {
+        diff = src[y * src_stride + x] - ref[y * ref_stride + x];
+        se += diff;
+        sse += diff * diff;
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
+               CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
+        se += diff;
+        sse += diff * diff;
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space and maintain
+ * compatibility with vp8.
+ */
+static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
+                                    int l2w, int l2h, int xoff, int yoff,
+                                    uint32_t *sse_ptr, bool use_high_bit_depth_,
+                                    aom_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // Bilinear interpolation at a 16th pel step.
+      if (!use_high_bit_depth_) {
+        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = r - src[w * y + x];
+        se += diff;
+        sse += diff * diff;
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+        uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+        const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = r - src16[w * y + x];
+        se += diff;
+        sse += diff * diff;
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+
+static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
+                                        const uint8_t *second_pred, int l2w,
+                                        int l2h, int xoff, int yoff,
+                                        uint32_t *sse_ptr,
+                                        bool use_high_bit_depth,
+                                        aom_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+      if (!use_high_bit_depth) {
+        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff =
+            ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+        se += diff;
+        sse += diff * diff;
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+        const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+        const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
+        const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
+        se += diff;
+        sse += diff * diff;
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
+ public:
+  SumOfSquaresTest() : func_(GetParam()) {}
+
+  virtual ~SumOfSquaresTest() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void ConstTest();
+  void RefTest();
+
+  SumOfSquaresFunction func_;
+  ACMRandom rnd_;
+};
+
+void SumOfSquaresTest::ConstTest() {
+  int16_t mem[256];
+  unsigned int res;
+  for (int v = 0; v < 256; ++v) {
+    for (int i = 0; i < 256; ++i) {
+      mem[i] = v;
+    }
+    ASM_REGISTER_STATE_CHECK(res = func_(mem));
+    EXPECT_EQ(256u * (v * v), res);
+  }
+}
+
+void SumOfSquaresTest::RefTest() {
+  int16_t mem[256];
+  for (int i = 0; i < 100; ++i) {
+    for (int j = 0; j < 256; ++j) {
+      mem[j] = rnd_.Rand8() - rnd_.Rand8();
+    }
+
+    const unsigned int expected = mb_ss_ref(mem);
+    unsigned int res;
+    ASM_REGISTER_STATE_CHECK(res = func_(mem));
+    EXPECT_EQ(expected, res);
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Encapsulating struct to store the function to test along with
+// some testing context.
+// Can be used for MSE, SSE, Variance, etc.
+
+template <typename Func>
+struct TestParams {
+  TestParams(int log2w = 0, int log2h = 0, Func function = NULL,
+             int bit_depth_value = 0)
+      : log2width(log2w), log2height(log2h), func(function) {
+    use_high_bit_depth = (bit_depth_value > 0);
+    if (use_high_bit_depth) {
+      bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value);
+    } else {
+      bit_depth = AOM_BITS_8;
+    }
+    width = 1 << log2width;
+    height = 1 << log2height;
+    block_size = width * height;
+    mask = (1u << bit_depth) - 1;
+  }
+
+  int log2width, log2height;
+  int width, height;
+  int block_size;
+  Func func;
+  aom_bit_depth_t bit_depth;
+  bool use_high_bit_depth;
+  uint32_t mask;
+};
+
+template <typename Func>
+std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
+  return os << "log2width/height:" << p.log2width << "/" << p.log2height
+            << " function:" << reinterpret_cast<const void *>(p.func)
+            << " bit-depth:" << p.bit_depth;
+}
+
+// Main class for testing a function type
+template <typename FunctionType>
+class MainTestClass
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  virtual void SetUp() {
+    params_ = this->GetParam();
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    const size_t unit =
+        use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
+    src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit));
+    ref_ = new uint8_t[block_size() * unit];
+    ASSERT_TRUE(src_ != NULL);
+    ASSERT_TRUE(ref_ != NULL);
+#if CONFIG_HIGHBITDEPTH
+    if (use_high_bit_depth()) {
+      // TODO(skal): remove!
+      src_ = CONVERT_TO_BYTEPTR(src_);
+      ref_ = CONVERT_TO_BYTEPTR(ref_);
+    }
+#endif
+  }
+
+  virtual void TearDown() {
+#if CONFIG_HIGHBITDEPTH
+    if (use_high_bit_depth()) {
+      // TODO(skal): remove!
+      src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
+      ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
+    }
+#endif
+
+    aom_free(src_);
+    delete[] ref_;
+    src_ = NULL;
+    ref_ = NULL;
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  // We could sub-class MainTestClass into dedicated class for Variance
+  // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
+  // to access top class fields xxx. That's cumbersome, so for now we'll just
+  // implement the testing methods here:
+
+  // Variance tests
+  void ZeroTest();
+  void RefTest();
+  void RefStrideTest();
+  void OneQuarterTest();
+
+  // MSE/SSE tests
+  void RefTestMse();
+  void RefTestSse();
+  void MaxTestMse();
+  void MaxTestSse();
+
+ protected:
+  ACMRandom rnd_;
+  uint8_t *src_;
+  uint8_t *ref_;
+  TestParams<FunctionType> params_;
+
+  // some relay helpers
+  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
+  int byte_shift() const { return params_.bit_depth - 8; }
+  int block_size() const { return params_.block_size; }
+  int width() const { return params_.width; }
+  uint32_t mask() const { return params_.mask; }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests related to variance.
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::ZeroTest() {
+  for (int i = 0; i <= 255; ++i) {
+    if (!use_high_bit_depth()) {
+      memset(src_, i, block_size());
+    } else {
+      uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
+      for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
+    }
+    for (int j = 0; j <= 255; ++j) {
+      if (!use_high_bit_depth()) {
+        memset(ref_, j, block_size());
+      } else {
+        uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
+        for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
+      }
+      unsigned int sse, var;
+      ASM_REGISTER_STATE_CHECK(
+          var = params_.func(src_, width(), ref_, width(), &sse));
+      EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
+    }
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::RefTest() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); j++) {
+      if (!use_high_bit_depth()) {
+        src_[j] = rnd_.Rand8();
+        ref_[j] = rnd_.Rand8();
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+        CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+    }
+    unsigned int sse1, sse2, var1, var2;
+    const int stride = width();
+    ASM_REGISTER_STATE_CHECK(
+        var1 = params_.func(src_, stride, ref_, stride, &sse1));
+    var2 =
+        variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                     stride, &sse2, use_high_bit_depth(), params_.bit_depth);
+    EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
+    EXPECT_EQ(var1, var2) << "Error at test index: " << i;
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::RefStrideTest() {
+  for (int i = 0; i < 10; ++i) {
+    const int ref_stride = (i & 1) * width();
+    const int src_stride = ((i >> 1) & 1) * width();
+    for (int j = 0; j < block_size(); j++) {
+      const int ref_ind = (j / width()) * ref_stride + j % width();
+      const int src_ind = (j / width()) * src_stride + j % width();
+      if (!use_high_bit_depth()) {
+        src_[src_ind] = rnd_.Rand8();
+        ref_[ref_ind] = rnd_.Rand8();
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
+        CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+    }
+    unsigned int sse1, sse2;
+    unsigned int var1, var2;
+
+    ASM_REGISTER_STATE_CHECK(
+        var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
+    var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
+                        src_stride, ref_stride, &sse2, use_high_bit_depth(),
+                        params_.bit_depth);
+    EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
+    EXPECT_EQ(var1, var2) << "Error at test index: " << i;
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
+  const int half = block_size() / 2;
+  if (!use_high_bit_depth()) {
+    memset(src_, 255, block_size());
+    memset(ref_, 255, half);
+    memset(ref_ + half, 0, half);
+#if CONFIG_HIGHBITDEPTH
+  } else {
+    aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
+    aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
+    aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
+#endif  // CONFIG_HIGHBITDEPTH
+  }
+  unsigned int sse, var, expected;
+  ASM_REGISTER_STATE_CHECK(
+      var = params_.func(src_, width(), ref_, width(), &sse));
+  expected = block_size() * 255 * 255 / 4;
+  EXPECT_EQ(expected, var);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests related to MSE / SSE.
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::RefTestMse() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); ++j) {
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+    }
+    unsigned int sse1, sse2;
+    const int stride = width();
+    ASM_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
+    variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                 stride, &sse2, false, AOM_BITS_8);
+    EXPECT_EQ(sse1, sse2);
+  }
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::RefTestSse() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); ++j) {
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+    }
+    unsigned int sse2;
+    unsigned int var1;
+    const int stride = width();
+    ASM_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
+    variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                 stride, &sse2, false, AOM_BITS_8);
+    EXPECT_EQ(var1, sse2);
+  }
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::MaxTestMse() {
+  memset(src_, 255, block_size());
+  memset(ref_, 0, block_size());
+  unsigned int sse;
+  ASM_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
+  const unsigned int expected = block_size() * 255 * 255;
+  EXPECT_EQ(expected, sse);
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::MaxTestSse() {
+  memset(src_, 255, block_size());
+  memset(ref_, 0, block_size());
+  unsigned int var;
+  ASM_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
+  const unsigned int expected = block_size() * 255 * 255;
+  EXPECT_EQ(expected, var);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+using ::std::tr1::get;
+using ::std::tr1::make_tuple;
+using ::std::tr1::tuple;
+
+template <typename SubpelVarianceFunctionType>
+class SubpelVarianceTest
+    : public ::testing::TestWithParam<
+          tuple<int, int, SubpelVarianceFunctionType, int> > {
+ public:
+  virtual void SetUp() {
+    const tuple<int, int, SubpelVarianceFunctionType, int> &params =
+        this->GetParam();
+    log2width_ = get<0>(params);
+    width_ = 1 << log2width_;
+    log2height_ = get<1>(params);
+    height_ = 1 << log2height_;
+    subpel_variance_ = get<2>(params);
+    if (get<3>(params)) {
+      bit_depth_ = (aom_bit_depth_t)get<3>(params);
+      use_high_bit_depth_ = true;
+    } else {
+      bit_depth_ = AOM_BITS_8;
+      use_high_bit_depth_ = false;
+    }
+    mask_ = (1 << bit_depth_) - 1;
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    block_size_ = width_ * height_;
+    if (!use_high_bit_depth_) {
+      src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size_));
+      sec_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size_));
+      ref_ = new uint8_t[block_size_ + width_ + height_ + 1];
+#if CONFIG_HIGHBITDEPTH
+    } else {
+      src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+          aom_memalign(16, block_size_ * sizeof(uint16_t))));
+      sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+          aom_memalign(16, block_size_ * sizeof(uint16_t))));
+      ref_ =
+          CONVERT_TO_BYTEPTR(new uint16_t[block_size_ + width_ + height_ + 1]);
+#endif  // CONFIG_HIGHBITDEPTH
+    }
+    ASSERT_TRUE(src_ != NULL);
+    ASSERT_TRUE(sec_ != NULL);
+    ASSERT_TRUE(ref_ != NULL);
+  }
+
+  virtual void TearDown() {
+    if (!use_high_bit_depth_) {
+      aom_free(src_);
+      delete[] ref_;
+      aom_free(sec_);
+#if CONFIG_HIGHBITDEPTH
+    } else {
+      aom_free(CONVERT_TO_SHORTPTR(src_));
+      delete[] CONVERT_TO_SHORTPTR(ref_);
+      aom_free(CONVERT_TO_SHORTPTR(sec_));
+#endif  // CONFIG_HIGHBITDEPTH
+    }
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RefTest();
+  void ExtremeRefTest();
+
+  ACMRandom rnd_;
+  uint8_t *src_;
+  uint8_t *ref_;
+  uint8_t *sec_;
+  bool use_high_bit_depth_;
+  aom_bit_depth_t bit_depth_;
+  int width_, log2width_;
+  int height_, log2height_;
+  int block_size_, mask_;
+  SubpelVarianceFunctionType subpel_variance_;
+};
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth_) {
+        for (int j = 0; j < block_size_; j++) {
+          src_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        for (int j = 0; j < block_size_; j++) {
+          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_;
+        }
+        for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_;
+        }
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+      unsigned int sse1, sse2;
+      unsigned int var1;
+      ASM_REGISTER_STATE_CHECK(
+          var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
+      const unsigned int var2 =
+          subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
+                              use_high_bit_depth_, bit_depth_);
+      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+    }
+  }
+}
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
+  // Compare against reference.
+  // Src: Set the first half of values to 0, the second half to the maximum.
+  // Ref: Set the first half of values to the maximum, the second half to 0.
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      const int half = block_size_ / 2;
+      if (!use_high_bit_depth_) {
+        memset(src_, 0, half);
+        memset(src_ + half, 255, half);
+        memset(ref_, 255, half);
+        memset(ref_ + half, 0, half + width_ + height_ + 1);
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        aom_memset16(CONVERT_TO_SHORTPTR(src_), mask_, half);
+        aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
+        aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
+        aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask_,
+                     half + width_ + height_ + 1);
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+      unsigned int sse1, sse2;
+      unsigned int var1;
+      ASM_REGISTER_STATE_CHECK(
+          var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
+      const unsigned int var2 =
+          subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
+                              use_high_bit_depth_, bit_depth_);
+      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
+    }
+  }
+}
+
+template <>
+void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth_) {
+        for (int j = 0; j < block_size_; j++) {
+          src_[j] = rnd_.Rand8();
+          sec_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+#if CONFIG_HIGHBITDEPTH
+      } else {
+        for (int j = 0; j < block_size_; j++) {
+          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_;
+          CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask_;
+        }
+        for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_;
+        }
+#endif  // CONFIG_HIGHBITDEPTH
+      }
+      uint32_t sse1, sse2;
+      uint32_t var1, var2;
+      ASM_REGISTER_STATE_CHECK(var1 =
+                                   subpel_variance_(ref_, width_ + 1, x, y,
+                                                    src_, width_, &sse1, sec_));
+      var2 = subpel_avg_variance_ref(ref_, src_, sec_, log2width_, log2height_,
+                                     x, y, &sse2, use_high_bit_depth_,
+                                     static_cast<aom_bit_depth_t>(bit_depth_));
+      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+    }
+  }
+}
+
+typedef MainTestClass<Get4x4SseFunc> AvxSseTest;
+typedef MainTestClass<VarianceMxNFunc> AvxMseTest;
+typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest;
+
+TEST_P(AvxSseTest, RefSse) { RefTestSse(); }
+TEST_P(AvxSseTest, MaxSse) { MaxTestSse(); }
+TEST_P(AvxMseTest, RefMse) { RefTestMse(); }
+TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); }
+TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
+TEST_P(AvxVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
+TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
+TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
+TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
+TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
+
+INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest,
+                        ::testing::Values(aom_get_mb_ss_c));
+
+typedef TestParams<Get4x4SseFunc> SseParams;
+INSTANTIATE_TEST_CASE_P(C, AvxSseTest,
+                        ::testing::Values(SseParams(2, 2,
+                                                    &aom_get4x4sse_cs_c)));
+
+typedef TestParams<VarianceMxNFunc> MseParams;
+INSTANTIATE_TEST_CASE_P(C, AvxMseTest,
+                        ::testing::Values(MseParams(4, 4, &aom_mse16x16_c),
+                                          MseParams(4, 3, &aom_mse16x8_c),
+                                          MseParams(3, 4, &aom_mse8x16_c),
+                                          MseParams(3, 3, &aom_mse8x8_c)));
+
+typedef TestParams<VarianceMxNFunc> VarianceParams;
+INSTANTIATE_TEST_CASE_P(
+    C, AvxVarianceTest,
+    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_c),
+                      VarianceParams(6, 5, &aom_variance64x32_c),
+                      VarianceParams(5, 6, &aom_variance32x64_c),
+                      VarianceParams(5, 5, &aom_variance32x32_c),
+                      VarianceParams(5, 4, &aom_variance32x16_c),
+                      VarianceParams(4, 5, &aom_variance16x32_c),
+                      VarianceParams(4, 4, &aom_variance16x16_c),
+                      VarianceParams(4, 3, &aom_variance16x8_c),
+                      VarianceParams(3, 4, &aom_variance8x16_c),
+                      VarianceParams(3, 3, &aom_variance8x8_c),
+                      VarianceParams(3, 2, &aom_variance8x4_c),
+                      VarianceParams(2, 3, &aom_variance4x8_c),
+                      VarianceParams(2, 2, &aom_variance4x4_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    C, AvxSubpelVarianceTest,
+    ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_c, 0),
+                      make_tuple(6, 5, &aom_sub_pixel_variance64x32_c, 0),
+                      make_tuple(5, 6, &aom_sub_pixel_variance32x64_c, 0),
+                      make_tuple(5, 5, &aom_sub_pixel_variance32x32_c, 0),
+                      make_tuple(5, 4, &aom_sub_pixel_variance32x16_c, 0),
+                      make_tuple(4, 5, &aom_sub_pixel_variance16x32_c, 0),
+                      make_tuple(4, 4, &aom_sub_pixel_variance16x16_c, 0),
+                      make_tuple(4, 3, &aom_sub_pixel_variance16x8_c, 0),
+                      make_tuple(3, 4, &aom_sub_pixel_variance8x16_c, 0),
+                      make_tuple(3, 3, &aom_sub_pixel_variance8x8_c, 0),
+                      make_tuple(3, 2, &aom_sub_pixel_variance8x4_c, 0),
+                      make_tuple(2, 3, &aom_sub_pixel_variance4x8_c, 0),
+                      make_tuple(2, 2, &aom_sub_pixel_variance4x4_c, 0)));
+
+INSTANTIATE_TEST_CASE_P(
+    C, AvxSubpelAvgVarianceTest,
+    ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
+                      make_tuple(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
+                      make_tuple(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
+                      make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0),
+                      make_tuple(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0),
+                      make_tuple(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0),
+                      make_tuple(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0),
+                      make_tuple(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0),
+                      make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0),
+                      make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
+                      make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
+                      make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
+                      make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0)));
+
+#if CONFIG_HIGHBITDEPTH
+typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
+typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest;
+
+TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); }
+TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); }
+TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); }
+TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); }
+TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); }
+TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
+
+/* TODO(debargha): This test does not support the highbd version
+INSTANTIATE_TEST_CASE_P(
+    C, AvxHBDMseTest,
+    ::testing::Values(make_tuple(4, 4, &aom_highbd_12_mse16x16_c),
+                      make_tuple(4, 4, &aom_highbd_12_mse16x8_c),
+                      make_tuple(4, 4, &aom_highbd_12_mse8x16_c),
+                      make_tuple(4, 4, &aom_highbd_12_mse8x8_c),
+                      make_tuple(4, 4, &aom_highbd_10_mse16x16_c),
+                      make_tuple(4, 4, &aom_highbd_10_mse16x8_c),
+                      make_tuple(4, 4, &aom_highbd_10_mse8x16_c),
+                      make_tuple(4, 4, &aom_highbd_10_mse8x8_c),
+                      make_tuple(4, 4, &aom_highbd_8_mse16x16_c),
+                      make_tuple(4, 4, &aom_highbd_8_mse16x8_c),
+                      make_tuple(4, 4, &aom_highbd_8_mse8x16_c),
+                      make_tuple(4, 4, &aom_highbd_8_mse8x8_c)));
+*/
+
+const VarianceParams kArrayHBDVariance_c[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
+  VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
+  VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
+  VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
+  VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
+  VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12),
+  VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12),
+  VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12),
+  VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12),
+  VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12),
+  VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12),
+  VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12),
+  VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
+  VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
+  VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
+  VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
+  VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
+  VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
+  VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
+  VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10),
+  VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10),
+  VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10),
+  VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10),
+  VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10),
+  VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10),
+  VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10),
+  VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
+  VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
+  VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
+  VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
+  VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
+  VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
+  VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
+  VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8),
+  VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8),
+  VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8),
+  VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8),
+  VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8),
+  VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8),
+  VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
+  VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
+  VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
+  VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8)
+};
+INSTANTIATE_TEST_CASE_P(C, AvxHBDVarianceTest,
+                        ::testing::ValuesIn(kArrayHBDVariance_c));
+
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AvxHBDVarianceTest,
+    ::testing::Values(
+        VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
+        VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
+        VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+
+const AvxHBDSubpelVarianceTest::ParamType kArrayHBDSubpelVariance_c[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
+  make_tuple(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
+  make_tuple(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
+  make_tuple(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
+  make_tuple(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
+  make_tuple(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8),
+  make_tuple(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8),
+  make_tuple(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8),
+  make_tuple(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8),
+  make_tuple(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8),
+  make_tuple(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8),
+  make_tuple(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8),
+  make_tuple(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
+  make_tuple(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
+  make_tuple(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
+  make_tuple(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
+  make_tuple(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
+  make_tuple(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
+  make_tuple(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
+  make_tuple(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10),
+  make_tuple(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10),
+  make_tuple(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10),
+  make_tuple(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10),
+  make_tuple(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10),
+  make_tuple(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10),
+  make_tuple(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10),
+  make_tuple(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
+  make_tuple(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
+  make_tuple(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
+  make_tuple(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
+  make_tuple(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
+  make_tuple(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
+  make_tuple(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
+  make_tuple(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12),
+  make_tuple(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12),
+  make_tuple(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12),
+  make_tuple(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12),
+  make_tuple(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12),
+  make_tuple(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12),
+  make_tuple(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12),
+  make_tuple(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
+  make_tuple(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
+  make_tuple(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
+};
+INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelVarianceTest,
+                        ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
+
+const AvxHBDSubpelAvgVarianceTest::ParamType kArrayHBDSubpelAvgVariance_c[] = {
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c, 8),
+  make_tuple(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c, 8),
+  make_tuple(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c, 8),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
+  make_tuple(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
+  make_tuple(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
+  make_tuple(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8),
+  make_tuple(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8),
+  make_tuple(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8),
+  make_tuple(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8),
+  make_tuple(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8),
+  make_tuple(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8),
+  make_tuple(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8),
+  make_tuple(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
+  make_tuple(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
+  make_tuple(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c, 10),
+  make_tuple(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c, 10),
+  make_tuple(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c, 10),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c, 10),
+  make_tuple(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c, 10),
+  make_tuple(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c, 10),
+  make_tuple(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c, 10),
+  make_tuple(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c, 10),
+  make_tuple(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c, 10),
+  make_tuple(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c, 10),
+  make_tuple(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c, 10),
+  make_tuple(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c, 10),
+  make_tuple(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10),
+  make_tuple(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
+  make_tuple(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
+  make_tuple(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c, 12),
+  make_tuple(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c, 12),
+  make_tuple(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c, 12),
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
+  make_tuple(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c, 12),
+  make_tuple(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c, 12),
+  make_tuple(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c, 12),
+  make_tuple(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c, 12),
+  make_tuple(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c, 12),
+  make_tuple(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c, 12),
+  make_tuple(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c, 12),
+  make_tuple(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c, 12),
+  make_tuple(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c, 12),
+  make_tuple(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
+  make_tuple(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
+  make_tuple(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
+  make_tuple(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12)
+};
+INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelAvgVarianceTest,
+                        ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
+#endif  // CONFIG_HIGHBITDEPTH
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
+                        ::testing::Values(aom_get_mb_ss_sse2));
+
+INSTANTIATE_TEST_CASE_P(SSE2, AvxMseTest,
+                        ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2),
+                                          MseParams(4, 3, &aom_mse16x8_sse2),
+                                          MseParams(3, 4, &aom_mse8x16_sse2),
+                                          MseParams(3, 3, &aom_mse8x8_sse2)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AvxVarianceTest,
+    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_sse2),
+                      VarianceParams(6, 5, &aom_variance64x32_sse2),
+                      VarianceParams(5, 6, &aom_variance32x64_sse2),
+                      VarianceParams(5, 5, &aom_variance32x32_sse2),
+                      VarianceParams(5, 4, &aom_variance32x16_sse2),
+                      VarianceParams(4, 5, &aom_variance16x32_sse2),
+                      VarianceParams(4, 4, &aom_variance16x16_sse2),
+                      VarianceParams(4, 3, &aom_variance16x8_sse2),
+                      VarianceParams(3, 4, &aom_variance8x16_sse2),
+                      VarianceParams(3, 3, &aom_variance8x8_sse2),
+                      VarianceParams(3, 2, &aom_variance8x4_sse2),
+                      VarianceParams(2, 3, &aom_variance4x8_sse2),
+                      VarianceParams(2, 2, &aom_variance4x4_sse2)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AvxSubpelVarianceTest,
+    ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_sse2, 0),
+                      make_tuple(6, 5, &aom_sub_pixel_variance64x32_sse2, 0),
+                      make_tuple(5, 6, &aom_sub_pixel_variance32x64_sse2, 0),
+                      make_tuple(5, 5, &aom_sub_pixel_variance32x32_sse2, 0),
+                      make_tuple(5, 4, &aom_sub_pixel_variance32x16_sse2, 0),
+                      make_tuple(4, 5, &aom_sub_pixel_variance16x32_sse2, 0),
+                      make_tuple(4, 4, &aom_sub_pixel_variance16x16_sse2, 0),
+                      make_tuple(4, 3, &aom_sub_pixel_variance16x8_sse2, 0),
+                      make_tuple(3, 4, &aom_sub_pixel_variance8x16_sse2, 0),
+                      make_tuple(3, 3, &aom_sub_pixel_variance8x8_sse2, 0),
+                      make_tuple(3, 2, &aom_sub_pixel_variance8x4_sse2, 0),
+                      make_tuple(2, 3, &aom_sub_pixel_variance4x8_sse2, 0),
+                      make_tuple(2, 2, &aom_sub_pixel_variance4x4_sse2, 0)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AvxSubpelAvgVarianceTest,
+    ::testing::Values(
+        make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_sse2, 0),
+        make_tuple(6, 5, &aom_sub_pixel_avg_variance64x32_sse2, 0),
+        make_tuple(5, 6, &aom_sub_pixel_avg_variance32x64_sse2, 0),
+        make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_sse2, 0),
+        make_tuple(5, 4, &aom_sub_pixel_avg_variance32x16_sse2, 0),
+        make_tuple(4, 5, &aom_sub_pixel_avg_variance16x32_sse2, 0),
+        make_tuple(4, 4, &aom_sub_pixel_avg_variance16x16_sse2, 0),
+        make_tuple(4, 3, &aom_sub_pixel_avg_variance16x8_sse2, 0),
+        make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_sse2, 0),
+        make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_sse2, 0),
+        make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_sse2, 0),
+        make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0),
+        make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0)));
+
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AvxSubpelVarianceTest,
+    ::testing::Values(
+        make_tuple(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1, 8),
+        make_tuple(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1, 10),
+        make_tuple(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1, 12)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AvxSubpelAvgVarianceTest,
+    ::testing::Values(
+        make_tuple(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1, 8),
+        make_tuple(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1, 10),
+        make_tuple(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1, 12)));
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+
+#if CONFIG_HIGHBITDEPTH
+/* TODO(debargha): This test does not support the highbd version
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AvxHBDMseTest,
+    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2),
+                      MseParams(4, 3, &aom_highbd_12_mse16x8_sse2),
+                      MseParams(3, 4, &aom_highbd_12_mse8x16_sse2),
+                      MseParams(3, 3, &aom_highbd_12_mse8x8_sse2),
+                      MseParams(4, 4, &aom_highbd_10_mse16x16_sse2),
+                      MseParams(4, 3, &aom_highbd_10_mse16x8_sse2),
+                      MseParams(3, 4, &aom_highbd_10_mse8x16_sse2),
+                      MseParams(3, 3, &aom_highbd_10_mse8x8_sse2),
+                      MseParams(4, 4, &aom_highbd_8_mse16x16_sse2),
+                      MseParams(4, 3, &aom_highbd_8_mse16x8_sse2),
+                      MseParams(3, 4, &aom_highbd_8_mse8x16_sse2),
+                      MseParams(3, 3, &aom_highbd_8_mse8x8_sse2)));
+*/
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AvxHBDVarianceTest,
+    ::testing::Values(
+        VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12),
+        VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12),
+        VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12),
+        VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12),
+        VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12),
+        VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12),
+        VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12),
+        VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12),
+        VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12),
+        VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12),
+        VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10),
+        VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10),
+        VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10),
+        VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10),
+        VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10),
+        VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10),
+        VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10),
+        VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10),
+        VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10),
+        VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10),
+        VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8),
+        VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8),
+        VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8),
+        VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8),
+        VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8),
+        VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8),
+        VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
+        VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
+        VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
+        VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AvxHBDSubpelVarianceTest,
+    ::testing::Values(
+        make_tuple(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12),
+        make_tuple(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12),
+        make_tuple(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12),
+        make_tuple(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
+        make_tuple(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
+        make_tuple(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
+        make_tuple(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
+        make_tuple(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
+        make_tuple(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
+        make_tuple(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
+        make_tuple(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
+        make_tuple(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10),
+        make_tuple(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10),
+        make_tuple(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10),
+        make_tuple(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
+        make_tuple(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
+        make_tuple(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
+        make_tuple(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
+        make_tuple(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
+        make_tuple(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
+        make_tuple(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
+        make_tuple(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
+        make_tuple(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8),
+        make_tuple(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8),
+        make_tuple(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8),
+        make_tuple(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
+        make_tuple(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
+        make_tuple(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
+        make_tuple(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
+        make_tuple(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
+        make_tuple(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
+        make_tuple(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
+        make_tuple(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AvxHBDSubpelAvgVarianceTest,
+    ::testing::Values(
+        make_tuple(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2, 12),
+        make_tuple(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2, 12),
+        make_tuple(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2, 12),
+        make_tuple(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2, 12),
+        make_tuple(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2, 12),
+        make_tuple(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2, 12),
+        make_tuple(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2, 12),
+        make_tuple(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2, 12),
+        make_tuple(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2, 12),
+        make_tuple(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2, 12),
+        make_tuple(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2, 12),
+        make_tuple(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2, 10),
+        make_tuple(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2, 10),
+        make_tuple(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2, 10),
+        make_tuple(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2, 10),
+        make_tuple(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2, 10),
+        make_tuple(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2, 10),
+        make_tuple(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2, 10),
+        make_tuple(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2, 10),
+        make_tuple(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2, 10),
+        make_tuple(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2, 10),
+        make_tuple(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2, 10),
+        make_tuple(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2, 8),
+        make_tuple(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2, 8),
+        make_tuple(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2, 8),
+        make_tuple(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2, 8),
+        make_tuple(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2, 8),
+        make_tuple(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2, 8),
+        make_tuple(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2, 8),
+        make_tuple(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2, 8),
+        make_tuple(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2, 8),
+        make_tuple(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2, 8),
+        make_tuple(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2, 8)));
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AvxSubpelVarianceTest,
+    ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
+                      make_tuple(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
+                      make_tuple(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
+                      make_tuple(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0),
+                      make_tuple(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0),
+                      make_tuple(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0),
+                      make_tuple(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0),
+                      make_tuple(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0),
+                      make_tuple(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0),
+                      make_tuple(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
+                      make_tuple(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
+                      make_tuple(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
+                      make_tuple(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AvxSubpelAvgVarianceTest,
+    ::testing::Values(
+        make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0),
+        make_tuple(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0),
+        make_tuple(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0),
+        make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0),
+        make_tuple(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0),
+        make_tuple(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0),
+        make_tuple(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0),
+        make_tuple(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0),
+        make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0),
+        make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
+        make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
+        make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
+        make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0)));
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2, AvxMseTest,
+                        ::testing::Values(MseParams(4, 4, &aom_mse16x16_avx2)));
+
+INSTANTIATE_TEST_CASE_P(
+    AVX2, AvxVarianceTest,
+    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_avx2),
+                      VarianceParams(6, 5, &aom_variance64x32_avx2),
+                      VarianceParams(5, 5, &aom_variance32x32_avx2),
+                      VarianceParams(5, 4, &aom_variance32x16_avx2),
+                      VarianceParams(4, 4, &aom_variance16x16_avx2)));
+
+INSTANTIATE_TEST_CASE_P(
+    AVX2, AvxSubpelVarianceTest,
+    ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
+                      make_tuple(5, 5, &aom_sub_pixel_variance32x32_avx2, 0)));
+
+INSTANTIATE_TEST_CASE_P(
+    AVX2, AvxSubpelAvgVarianceTest,
+    ::testing::Values(
+        make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
+        make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0)));
+#endif  // HAVE_AVX2
+
+#if HAVE_MEDIA
+INSTANTIATE_TEST_CASE_P(MEDIA, AvxMseTest,
+                        ::testing::Values(MseParams(4, 4,
+                                                    &aom_mse16x16_media)));
+
+INSTANTIATE_TEST_CASE_P(
+    MEDIA, AvxVarianceTest,
+    ::testing::Values(VarianceParams(4, 4, &aom_variance16x16_media),
+                      VarianceParams(3, 3, &aom_variance8x8_media)));
+
+INSTANTIATE_TEST_CASE_P(
+    MEDIA, AvxSubpelVarianceTest,
+    ::testing::Values(make_tuple(4, 4, &aom_sub_pixel_variance16x16_media, 0),
+                      make_tuple(3, 3, &aom_sub_pixel_variance8x8_media, 0)));
+#endif  // HAVE_MEDIA
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, AvxSseTest,
+                        ::testing::Values(SseParams(2, 2,
+                                                    &aom_get4x4sse_cs_neon)));
+
+INSTANTIATE_TEST_CASE_P(NEON, AvxMseTest,
+                        ::testing::Values(MseParams(4, 4, &aom_mse16x16_neon)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, AvxVarianceTest,
+    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_neon),
+                      VarianceParams(6, 5, &aom_variance64x32_neon),
+                      VarianceParams(5, 6, &aom_variance32x64_neon),
+                      VarianceParams(5, 5, &aom_variance32x32_neon),
+                      VarianceParams(4, 4, &aom_variance16x16_neon),
+                      VarianceParams(4, 3, &aom_variance16x8_neon),
+                      VarianceParams(3, 4, &aom_variance8x16_neon),
+                      VarianceParams(3, 3, &aom_variance8x8_neon)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, AvxSubpelVarianceTest,
+    ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_variance64x64_neon, 0),
+                      make_tuple(5, 5, &aom_sub_pixel_variance32x32_neon, 0),
+                      make_tuple(4, 4, &aom_sub_pixel_variance16x16_neon, 0),
+                      make_tuple(3, 3, &aom_sub_pixel_variance8x8_neon, 0)));
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, SumOfSquaresTest,
+                        ::testing::Values(aom_get_mb_ss_msa));
+
+INSTANTIATE_TEST_CASE_P(MSA, AvxSseTest,
+                        ::testing::Values(SseParams(2, 2,
+                                                    &aom_get4x4sse_cs_msa)));
+
+INSTANTIATE_TEST_CASE_P(MSA, AvxMseTest,
+                        ::testing::Values(MseParams(4, 4, &aom_mse16x16_msa),
+                                          MseParams(4, 3, &aom_mse16x8_msa),
+                                          MseParams(3, 4, &aom_mse8x16_msa),
+                                          MseParams(3, 3, &aom_mse8x8_msa)));
+
+INSTANTIATE_TEST_CASE_P(
+    MSA, AvxVarianceTest,
+    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_msa),
+                      VarianceParams(6, 5, &aom_variance64x32_msa),
+                      VarianceParams(5, 6, &aom_variance32x64_msa),
+                      VarianceParams(5, 5, &aom_variance32x32_msa),
+                      VarianceParams(5, 4, &aom_variance32x16_msa),
+                      VarianceParams(4, 5, &aom_variance16x32_msa),
+                      VarianceParams(4, 4, &aom_variance16x16_msa),
+                      VarianceParams(4, 3, &aom_variance16x8_msa),
+                      VarianceParams(3, 4, &aom_variance8x16_msa),
+                      VarianceParams(3, 3, &aom_variance8x8_msa),
+                      VarianceParams(3, 2, &aom_variance8x4_msa),
+                      VarianceParams(2, 3, &aom_variance4x8_msa),
+                      VarianceParams(2, 2, &aom_variance4x4_msa)));
+
+INSTANTIATE_TEST_CASE_P(
+    MSA, AvxSubpelVarianceTest,
+    ::testing::Values(make_tuple(2, 2, &aom_sub_pixel_variance4x4_msa, 0),
+                      make_tuple(2, 3, &aom_sub_pixel_variance4x8_msa, 0),
+                      make_tuple(3, 2, &aom_sub_pixel_variance8x4_msa, 0),
+                      make_tuple(3, 3, &aom_sub_pixel_variance8x8_msa, 0),
+                      make_tuple(3, 4, &aom_sub_pixel_variance8x16_msa, 0),
+                      make_tuple(4, 3, &aom_sub_pixel_variance16x8_msa, 0),
+                      make_tuple(4, 4, &aom_sub_pixel_variance16x16_msa, 0),
+                      make_tuple(4, 5, &aom_sub_pixel_variance16x32_msa, 0),
+                      make_tuple(5, 4, &aom_sub_pixel_variance32x16_msa, 0),
+                      make_tuple(5, 5, &aom_sub_pixel_variance32x32_msa, 0),
+                      make_tuple(5, 6, &aom_sub_pixel_variance32x64_msa, 0),
+                      make_tuple(6, 5, &aom_sub_pixel_variance64x32_msa, 0),
+                      make_tuple(6, 6, &aom_sub_pixel_variance64x64_msa, 0)));
+
+INSTANTIATE_TEST_CASE_P(
+    MSA, AvxSubpelAvgVarianceTest,
+    ::testing::Values(make_tuple(6, 6, &aom_sub_pixel_avg_variance64x64_msa, 0),
+                      make_tuple(6, 5, &aom_sub_pixel_avg_variance64x32_msa, 0),
+                      make_tuple(5, 6, &aom_sub_pixel_avg_variance32x64_msa, 0),
+                      make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_msa, 0),
+                      make_tuple(5, 4, &aom_sub_pixel_avg_variance32x16_msa, 0),
+                      make_tuple(4, 5, &aom_sub_pixel_avg_variance16x32_msa, 0),
+                      make_tuple(4, 4, &aom_sub_pixel_avg_variance16x16_msa, 0),
+                      make_tuple(4, 3, &aom_sub_pixel_avg_variance16x8_msa, 0),
+                      make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_msa, 0),
+                      make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_msa, 0),
+                      make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_msa, 0),
+                      make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_msa, 0),
+                      make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_msa, 0)));
+#endif  // HAVE_MSA
+}  // namespace
diff --git a/third_party/aom/test/video_source.h b/third_party/aom/test/video_source.h
new file mode 100644
index 000000000..e986ffb37
--- /dev/null
+++ b/third_party/aom/test/video_source.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_VIDEO_SOURCE_H_
+#define TEST_VIDEO_SOURCE_H_
+
+#if defined(_WIN32)
+#undef NOMINMAX
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "test/acm_random.h"
+#include "aom/aom_encoder.h"
+
+namespace libaom_test {
+
+// Helper macros to ensure LIBAOM_TEST_DATA_PATH is a quoted string.
+// These are undefined right below GetDataPath
+// NOTE: LIBAOM_TEST_DATA_PATH MUST NOT be a quoted string before
+// Stringification or the GetDataPath will fail at runtime
+#define TO_STRING(S) #S
+#define STRINGIFY(S) TO_STRING(S)
+
+// A simple function to encapsulate cross platform retrieval of test data path
+static std::string GetDataPath() {
+  const char *const data_path = getenv("LIBAOM_TEST_DATA_PATH");
+  if (data_path == NULL) {
+#ifdef LIBAOM_TEST_DATA_PATH
+    // In some environments, we cannot set environment variables
+    // Instead, we set the data path by using a preprocessor symbol
+    // which can be set from make files
+    return STRINGIFY(LIBAOM_TEST_DATA_PATH);
+#else
+    return ".";
+#endif
+  }
+  return data_path;
+}
+
+// Undefining stringification macros because they are not used elsewhere
+#undef TO_STRING
+#undef STRINGIFY
+
+inline FILE *OpenTestDataFile(const std::string &file_name) {
+  const std::string path_to_source = GetDataPath() + "/" + file_name;
+  return fopen(path_to_source.c_str(), "rb");
+}
+
+static FILE *GetTempOutFile(std::string *file_name) {
+  file_name->clear();
+#if defined(_WIN32)
+  char fname[MAX_PATH];
+  char tmppath[MAX_PATH];
+  if (GetTempPathA(MAX_PATH, tmppath)) {
+    // Assume for now that the filename generated is unique per process
+    if (GetTempFileNameA(tmppath, "lvx", 0, fname)) {
+      file_name->assign(fname);
+      return fopen(fname, "wb+");
+    }
+  }
+  return NULL;
+#else
+  return tmpfile();
+#endif
+}
+
+class TempOutFile {
+ public:
+  TempOutFile() { file_ = GetTempOutFile(&file_name_); }
+  ~TempOutFile() {
+    CloseFile();
+    if (!file_name_.empty()) {
+      EXPECT_EQ(0, remove(file_name_.c_str()));
+    }
+  }
+  FILE *file() { return file_; }
+  const std::string &file_name() { return file_name_; }
+
+ protected:
+  void CloseFile() {
+    if (file_) {
+      fclose(file_);
+      file_ = NULL;
+    }
+  }
+  FILE *file_;
+  std::string file_name_;
+};
+
+// Abstract base class for test video sources, which provide a stream of
+// aom_image_t images with associated timestamps and duration.
+class VideoSource {
+ public:
+  virtual ~VideoSource() {}
+
+  // Prepare the stream for reading, rewind/open as necessary.
+  virtual void Begin() = 0;
+
+  // Advance the cursor to the next frame
+  virtual void Next() = 0;
+
+  // Get the current video frame, or NULL on End-Of-Stream.
+  virtual aom_image_t *img() const = 0;
+
+  // Get the presentation timestamp of the current frame.
+  virtual aom_codec_pts_t pts() const = 0;
+
+  // Get the current frame's duration
+  virtual unsigned long duration() const = 0;
+
+  // Get the timebase for the stream
+  virtual aom_rational_t timebase() const = 0;
+
+  // Get the current frame counter, starting at 0.
+  virtual unsigned int frame() const = 0;
+
+  // Get the current file limit.
+  virtual unsigned int limit() const = 0;
+};
+
+class DummyVideoSource : public VideoSource {
+ public:
+  DummyVideoSource()
+      : img_(NULL), limit_(100), width_(80), height_(64),
+        format_(AOM_IMG_FMT_I420) {
+    ReallocImage();
+  }
+
+  virtual ~DummyVideoSource() { aom_img_free(img_); }
+
+  virtual void Begin() {
+    frame_ = 0;
+    FillFrame();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  virtual aom_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  virtual aom_codec_pts_t pts() const { return frame_; }
+
+  virtual unsigned long duration() const { return 1; }
+
+  virtual aom_rational_t timebase() const {
+    const aom_rational_t t = { 1, 30 };
+    return t;
+  }
+
+  virtual unsigned int frame() const { return frame_; }
+
+  virtual unsigned int limit() const { return limit_; }
+
+  void set_limit(unsigned int limit) { limit_ = limit; }
+
+  void SetSize(unsigned int width, unsigned int height) {
+    if (width != width_ || height != height_) {
+      width_ = width;
+      height_ = height;
+      ReallocImage();
+    }
+  }
+
+  void SetImageFormat(aom_img_fmt_t format) {
+    if (format_ != format) {
+      format_ = format;
+      ReallocImage();
+    }
+  }
+
+ protected:
+  virtual void FillFrame() {
+    if (img_) memset(img_->img_data, 0, raw_sz_);
+  }
+
+  void ReallocImage() {
+    aom_img_free(img_);
+    img_ = aom_img_alloc(NULL, format_, width_, height_, 32);
+    raw_sz_ = ((img_->w + 31) & ~31) * img_->h * img_->bps / 8;
+  }
+
+  aom_image_t *img_;
+  size_t raw_sz_;
+  unsigned int limit_;
+  unsigned int frame_;
+  unsigned int width_;
+  unsigned int height_;
+  aom_img_fmt_t format_;
+};
+
+class RandomVideoSource : public DummyVideoSource {
+ public:
+  RandomVideoSource(int seed = ACMRandom::DeterministicSeed())
+      : rnd_(seed), seed_(seed) {}
+
+ protected:
+  // Reset the RNG to get a matching stream for the second pass
+  virtual void Begin() {
+    frame_ = 0;
+    rnd_.Reset(seed_);
+    FillFrame();
+  }
+
+  // 15 frames of noise, followed by 15 static frames. Reset to 0 rather
+  // than holding previous frames to encourage keyframes to be thrown.
+  virtual void FillFrame() {
+    if (img_) {
+      if (frame_ % 30 < 15)
+        for (size_t i = 0; i < raw_sz_; ++i) img_->img_data[i] = rnd_.Rand8();
+      else
+        memset(img_->img_data, 0, raw_sz_);
+    }
+  }
+
+  ACMRandom rnd_;
+  int seed_;
+};
+
+// Abstract base class for test video sources, which provide a stream of
+// decompressed images to the decoder.
+class CompressedVideoSource {
+ public:
+  virtual ~CompressedVideoSource() {}
+
+  virtual void Init() = 0;
+
+  // Prepare the stream for reading, rewind/open as necessary.
+  virtual void Begin() = 0;
+
+  // Advance the cursor to the next frame
+  virtual void Next() = 0;
+
+  virtual const uint8_t *cxdata() const = 0;
+
+  virtual size_t frame_size() const = 0;
+
+  virtual unsigned int frame_number() const = 0;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/warp_filter_test.cc b/third_party/aom/test/warp_filter_test.cc
new file mode 100644
index 000000000..fd6608bfc
--- /dev/null
+++ b/third_party/aom/test/warp_filter_test.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/warp_filter_test_util.h"
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+using libaom_test::ACMRandom;
+using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
+#if CONFIG_HIGHBITDEPTH
+using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
+#endif
+
+namespace {
+
+TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(av1_warp_affine_sse2); }
+
+INSTANTIATE_TEST_CASE_P(SSE2, AV1WarpFilterTest,
+                        libaom_test::AV1WarpFilter::GetDefaultParams());
+
+#if CONFIG_HIGHBITDEPTH
+TEST_P(AV1HighbdWarpFilterTest, CheckOutput) {
+  RunCheckOutput(av1_highbd_warp_affine_ssse3);
+}
+
+INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdWarpFilterTest,
+                        libaom_test::AV1HighbdWarpFilter::GetDefaultParams());
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/warp_filter_test_util.cc b/third_party/aom/test/warp_filter_test_util.cc
new file mode 100644
index 000000000..1ce265b60
--- /dev/null
+++ b/third_party/aom/test/warp_filter_test_util.cc
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/warp_filter_test_util.h"
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+using std::vector;
+using libaom_test::ACMRandom;
+using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
+using libaom_test::AV1WarpFilter::WarpTestParam;
+#if CONFIG_HIGHBITDEPTH
+using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
+using libaom_test::AV1HighbdWarpFilter::HighbdWarpTestParam;
+#endif
+
+::testing::internal::ParamGenerator<WarpTestParam>
+libaom_test::AV1WarpFilter::GetDefaultParams() {
+  const WarpTestParam defaultParams[] = {
+    make_tuple(4, 4, 50000),  make_tuple(8, 8, 50000),
+    make_tuple(64, 64, 1000), make_tuple(4, 16, 20000),
+    make_tuple(32, 8, 10000),
+  };
+  return ::testing::ValuesIn(defaultParams);
+}
+
+AV1WarpFilterTest::~AV1WarpFilterTest() {}
+void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+void AV1WarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
+
+int32_t AV1WarpFilterTest::random_param(int bits) {
+  // 1 in 8 chance of generating zero (arbitrarily chosen)
+  if (((rnd_.Rand8()) & 7) == 0) return 0;
+  // Otherwise, enerate uniform values in the range
+  // [-(1 << bits), 1] U [1, 1<<bits]
+  int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1));
+  if ((rnd_.Rand8()) & 1) return -v;
+  return v;
+}
+
+void AV1WarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
+                                       int16_t *beta, int16_t *gamma,
+                                       int16_t *delta) {
+  while (1) {
+    mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
+    mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
+    mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+             (1 << WARPEDMODEL_PREC_BITS);
+    mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3);
+    // 50/50 chance of generating ROTZOOM vs. AFFINE models
+    if (rnd_.Rand8() & 1) {
+      // AFFINE
+      mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3);
+      mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+               (1 << WARPEDMODEL_PREC_BITS);
+    } else {
+      mat[4] = -mat[3];
+      mat[5] = mat[2];
+    }
+
+    // Calculate the derived parameters and check that they are suitable
+    // for the warp filter.
+    assert(mat[2] != 0);
+
+    *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
+    *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
+    *gamma = clamp(((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) / mat[2],
+                   INT16_MIN, INT16_MAX);
+    *delta =
+        clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
+                  (1 << WARPEDMODEL_PREC_BITS),
+              INT16_MIN, INT16_MAX);
+
+    if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
+        (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
+      continue;
+
+    // We have a valid model, so finish
+    return;
+  }
+}
+
+void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  int i, j, sub_x, sub_y;
+
+  uint8_t *input_ = new uint8_t[h * stride];
+  uint8_t *input = input_ + border;
+
+  // The warp functions always write rows with widths that are multiples of 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  uint8_t *output = new uint8_t[output_n];
+  uint8_t *output2 = new uint8_t[output_n];
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
+
+  // Generate an input block and extend its borders horizontally
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * stride + j] = rnd_.Rand8();
+  for (i = 0; i < h; ++i) {
+    memset(input + i * stride - border, input[i * stride], border);
+    memset(input + i * stride + w, input[i * stride + (w - 1)], border);
+  }
+
+  for (i = 0; i < num_iters; ++i) {
+    for (sub_x = 0; sub_x < 2; ++sub_x)
+      for (sub_y = 0; sub_y < 2; ++sub_y) {
+        generate_model(mat, &alpha, &beta, &gamma, &delta);
+        av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
+                          out_h, out_w, sub_x, sub_y, 0, alpha, beta, gamma,
+                          delta);
+        test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
+                  out_w, sub_x, sub_y, 0, alpha, beta, gamma, delta);
+
+        for (j = 0; j < out_w * out_h; ++j)
+          ASSERT_EQ(output[j], output2[j])
+              << "Pixel mismatch at index " << j << " = (" << (j % out_w)
+              << ", " << (j / out_w) << ") on iteration " << i;
+      }
+  }
+  delete[] input_;
+  delete[] output;
+  delete[] output2;
+}
+
+#if CONFIG_HIGHBITDEPTH
+::testing::internal::ParamGenerator<HighbdWarpTestParam>
+libaom_test::AV1HighbdWarpFilter::GetDefaultParams() {
+  const HighbdWarpTestParam defaultParams[] = {
+    make_tuple(4, 4, 50000, 8),   make_tuple(8, 8, 50000, 8),
+    make_tuple(64, 64, 1000, 8),  make_tuple(4, 16, 20000, 8),
+    make_tuple(32, 8, 10000, 8),  make_tuple(4, 4, 50000, 10),
+    make_tuple(8, 8, 50000, 10),  make_tuple(64, 64, 1000, 10),
+    make_tuple(4, 16, 20000, 10), make_tuple(32, 8, 10000, 10),
+    make_tuple(4, 4, 50000, 12),  make_tuple(8, 8, 50000, 12),
+    make_tuple(64, 64, 1000, 12), make_tuple(4, 16, 20000, 12),
+    make_tuple(32, 8, 10000, 12),
+  };
+  return ::testing::ValuesIn(defaultParams);
+}
+
+AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() {}
+void AV1HighbdWarpFilterTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HighbdWarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
+
+int32_t AV1HighbdWarpFilterTest::random_param(int bits) {
+  // 1 in 8 chance of generating zero (arbitrarily chosen)
+  if (((rnd_.Rand8()) & 7) == 0) return 0;
+  // Otherwise, enerate uniform values in the range
+  // [-(1 << bits), 1] U [1, 1<<bits]
+  int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1));
+  if ((rnd_.Rand8()) & 1) return -v;
+  return v;
+}
+
+void AV1HighbdWarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
+                                             int16_t *beta, int16_t *gamma,
+                                             int16_t *delta) {
+  while (1) {
+    mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
+    mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
+    mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+             (1 << WARPEDMODEL_PREC_BITS);
+    mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3);
+    // 50/50 chance of generating ROTZOOM vs. AFFINE models
+    if (rnd_.Rand8() & 1) {
+      // AFFINE
+      mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3);
+      mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+               (1 << WARPEDMODEL_PREC_BITS);
+    } else {
+      mat[4] = -mat[3];
+      mat[5] = mat[2];
+    }
+
+    // Calculate the derived parameters and check that they are suitable
+    // for the warp filter.
+    assert(mat[2] != 0);
+
+    *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
+    *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
+    *gamma = clamp(((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) / mat[2],
+                   INT16_MIN, INT16_MAX);
+    *delta =
+        clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
+                  (1 << WARPEDMODEL_PREC_BITS),
+              INT16_MIN, INT16_MAX);
+
+    if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
+        (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
+      continue;
+
+    // We have a valid model, so finish
+    return;
+  }
+}
+
+void AV1HighbdWarpFilterTest::RunCheckOutput(
+    highbd_warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  const int bd = GET_PARAM(3);
+  const int mask = (1 << bd) - 1;
+  int i, j, sub_x, sub_y;
+
+  // The warp functions always write rows with widths that are multiples of 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  uint16_t *input_ = new uint16_t[h * stride];
+  uint16_t *input = input_ + border;
+  uint16_t *output = new uint16_t[output_n];
+  uint16_t *output2 = new uint16_t[output_n];
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
+
+  // Generate an input block and extend its borders horizontally
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * stride + j] = rnd_.Rand16() & mask;
+  for (i = 0; i < h; ++i) {
+    for (j = 0; j < border; ++j) {
+      input[i * stride - border + j] = input[i * stride];
+      input[i * stride + w + j] = input[i * stride + (w - 1)];
+    }
+  }
+
+  for (i = 0; i < num_iters; ++i) {
+    for (sub_x = 0; sub_x < 2; ++sub_x)
+      for (sub_y = 0; sub_y < 2; ++sub_y) {
+        generate_model(mat, &alpha, &beta, &gamma, &delta);
+
+        av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
+                                 out_w, out_h, out_w, sub_x, sub_y, bd, 0,
+                                 alpha, beta, gamma, delta);
+        test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
+                  out_w, sub_x, sub_y, bd, 0, alpha, beta, gamma, delta);
+
+        for (j = 0; j < out_w * out_h; ++j)
+          ASSERT_EQ(output[j], output2[j])
+              << "Pixel mismatch at index " << j << " = (" << (j % out_w)
+              << ", " << (j / out_w) << ") on iteration " << i;
+      }
+  }
+
+  delete[] input_;
+  delete[] output;
+  delete[] output2;
+}
+#endif  // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/test/warp_filter_test_util.h b/third_party/aom/test/warp_filter_test_util.h
new file mode 100644
index 000000000..6a87e46d0
--- /dev/null
+++ b/third_party/aom/test/warp_filter_test_util.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_WARP_FILTER_TEST_UTIL_H_
+#define TEST_WARP_FILTER_TEST_UTIL_H_
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+
+#include "av1/common/mv.h"
+
+namespace libaom_test {
+
+namespace AV1WarpFilter {
+
+typedef void (*warp_affine_func)(int32_t *mat, uint8_t *ref, int width,
+                                 int height, int stride, uint8_t *pred,
+                                 int p_col, int p_row, int p_width,
+                                 int p_height, int p_stride, int subsampling_x,
+                                 int subsampling_y, int ref_frm, int16_t alpha,
+                                 int16_t beta, int16_t gamma, int16_t delta);
+
+typedef std::tr1::tuple<int, int, int> WarpTestParam;
+
+::testing::internal::ParamGenerator<WarpTestParam> GetDefaultParams();
+
+class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParam> {
+ public:
+  virtual ~AV1WarpFilterTest();
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+ protected:
+  int32_t random_param(int bits);
+  void generate_model(int32_t *mat, int16_t *alpha, int16_t *beta,
+                      int16_t *gamma, int16_t *delta);
+
+  void RunCheckOutput(warp_affine_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1WarpFilter
+
+#if CONFIG_HIGHBITDEPTH
+namespace AV1HighbdWarpFilter {
+typedef void (*highbd_warp_affine_func)(
+    int32_t *mat, uint16_t *ref, int width, int height, int stride,
+    uint16_t *pred, int p_col, int p_row, int p_width, int p_height,
+    int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm,
+    int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
+
+typedef std::tr1::tuple<int, int, int, int> HighbdWarpTestParam;
+
+::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams();
+
+class AV1HighbdWarpFilterTest
+    : public ::testing::TestWithParam<HighbdWarpTestParam> {
+ public:
+  virtual ~AV1HighbdWarpFilterTest();
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+ protected:
+  int32_t random_param(int bits);
+  void generate_model(int32_t *mat, int16_t *alpha, int16_t *beta,
+                      int16_t *gamma, int16_t *delta);
+
+  void RunCheckOutput(highbd_warp_affine_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1HighbdWarpFilter
+#endif  // CONFIG_HIGHBITDEPTH
+
+}  // namespace libaom_test
+
+#endif  // TEST_WARP_FILTER_TEST_UTIL_H_
diff --git a/third_party/aom/test/webm_video_source.h b/third_party/aom/test/webm_video_source.h
new file mode 100644
index 000000000..286f69cbf
--- /dev/null
+++ b/third_party/aom/test/webm_video_source.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_WEBM_VIDEO_SOURCE_H_
+#define TEST_WEBM_VIDEO_SOURCE_H_
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+#include <new>
+#include <string>
+#include "../tools_common.h"
+#include "../webmdec.h"
+#include "test/video_source.h"
+
+namespace libaom_test {
+
+// This class extends VideoSource to allow parsing of WebM files,
+// so that we can do actual file decodes.
+class WebMVideoSource : public CompressedVideoSource {
+ public:
+  explicit WebMVideoSource(const std::string &file_name)
+      : file_name_(file_name), aom_ctx_(new AvxInputContext()),
+        webm_ctx_(new WebmInputContext()), buf_(NULL), buf_sz_(0), frame_(0),
+        end_of_file_(false) {}
+
+  virtual ~WebMVideoSource() {
+    if (aom_ctx_->file != NULL) fclose(aom_ctx_->file);
+    webm_free(webm_ctx_);
+    delete aom_ctx_;
+    delete webm_ctx_;
+  }
+
+  virtual void Init() {}
+
+  virtual void Begin() {
+    aom_ctx_->file = OpenTestDataFile(file_name_);
+    ASSERT_TRUE(aom_ctx_->file != NULL) << "Input file open failed. Filename: "
+                                        << file_name_;
+
+    ASSERT_EQ(file_is_webm(webm_ctx_, aom_ctx_), 1) << "file is not WebM";
+
+    FillFrame();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  void FillFrame() {
+    ASSERT_TRUE(aom_ctx_->file != NULL);
+    const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
+    ASSERT_GE(status, 0) << "webm_read_frame failed";
+    if (status == 1) {
+      end_of_file_ = true;
+    }
+  }
+
+  void SeekToNextKeyFrame() {
+    ASSERT_TRUE(aom_ctx_->file != NULL);
+    do {
+      const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
+      ASSERT_GE(status, 0) << "webm_read_frame failed";
+      ++frame_;
+      if (status == 1) {
+        end_of_file_ = true;
+      }
+    } while (!webm_ctx_->is_key_frame && !end_of_file_);
+  }
+
+  virtual const uint8_t *cxdata() const { return end_of_file_ ? NULL : buf_; }
+  virtual size_t frame_size() const { return buf_sz_; }
+  virtual unsigned int frame_number() const { return frame_; }
+
+ protected:
+  std::string file_name_;
+  AvxInputContext *aom_ctx_;
+  WebmInputContext *webm_ctx_;
+  uint8_t *buf_;
+  size_t buf_sz_;
+  unsigned int frame_;
+  bool end_of_file_;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_WEBM_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/y4m_test.cc b/third_party/aom/test/y4m_test.cc
new file mode 100644
index 000000000..fc9fff514
--- /dev/null
+++ b/third_party/aom/test/y4m_test.cc
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./y4menc.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+using std::string;
+
+static const unsigned int kWidth = 160;
+static const unsigned int kHeight = 90;
+static const unsigned int kFrames = 10;
+
+struct Y4mTestParam {
+  const char *filename;
+  unsigned int bit_depth;
+  aom_img_fmt format;
+  const char *md5raw;
+};
+
+const Y4mTestParam kY4mTestVectors[] = {
+  { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420,
+    "e5406275b9fc6bb3436c31d4a05c1cab" },
+  { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422,
+    "284a47a47133b12884ec3a14e959a0b6" },
+  { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444,
+    "90517ff33843d85de712fd4fe60dbed0" },
+  { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016,
+    "63f21f9f717d8b8631bd2288ee87137b" },
+  { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216,
+    "48ab51fb540aed07f7ff5af130c9b605" },
+  { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416,
+    "067bfd75aa85ff9bae91fa3e0edd1e3e" },
+  { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016,
+    "9e6d8f6508c6e55625f6b697bc461cef" },
+  { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216,
+    "b239c6b301c0b835485be349ca83a7e3" },
+  { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416,
+    "5a6481a550821dab6d0192f5c63845e9" },
+};
+
+static void write_image_file(const aom_image_t *img, FILE *file) {
+  int plane, y;
+  for (plane = 0; plane < 3; ++plane) {
+    const unsigned char *buf = img->planes[plane];
+    const int stride = img->stride[plane];
+    const int bytes_per_sample = (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
+    const int h =
+        (plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift
+               : img->d_h);
+    const int w =
+        (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift
+               : img->d_w);
+    for (y = 0; y < h; ++y) {
+      fwrite(buf, bytes_per_sample, w, file);
+      buf += stride;
+    }
+  }
+}
+
+class Y4mVideoSourceTest : public ::testing::TestWithParam<Y4mTestParam>,
+                           public ::libaom_test::Y4mVideoSource {
+ protected:
+  Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {}
+
+  virtual ~Y4mVideoSourceTest() { CloseSource(); }
+
+  virtual void Init(const std::string &file_name, int limit) {
+    file_name_ = file_name;
+    start_ = 0;
+    limit_ = limit;
+    frame_ = 0;
+    Begin();
+  }
+
+  // Checks y4m header information
+  void HeaderChecks(unsigned int bit_depth, aom_img_fmt_t fmt) {
+    ASSERT_TRUE(input_file_ != NULL);
+    ASSERT_EQ(y4m_.pic_w, (int)kWidth);
+    ASSERT_EQ(y4m_.pic_h, (int)kHeight);
+    ASSERT_EQ(img()->d_w, kWidth);
+    ASSERT_EQ(img()->d_h, kHeight);
+    ASSERT_EQ(y4m_.bit_depth, bit_depth);
+    ASSERT_EQ(y4m_.aom_fmt, fmt);
+    if (fmt == AOM_IMG_FMT_I420 || fmt == AOM_IMG_FMT_I42016) {
+      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2);
+      ASSERT_EQ(img()->x_chroma_shift, 1U);
+      ASSERT_EQ(img()->y_chroma_shift, 1U);
+    }
+    if (fmt == AOM_IMG_FMT_I422 || fmt == AOM_IMG_FMT_I42216) {
+      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2);
+      ASSERT_EQ(img()->x_chroma_shift, 1U);
+      ASSERT_EQ(img()->y_chroma_shift, 0U);
+    }
+    if (fmt == AOM_IMG_FMT_I444 || fmt == AOM_IMG_FMT_I44416) {
+      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3);
+      ASSERT_EQ(img()->x_chroma_shift, 0U);
+      ASSERT_EQ(img()->y_chroma_shift, 0U);
+    }
+  }
+
+  // Checks MD5 of the raw frame data
+  void Md5Check(const string &expected_md5) {
+    ASSERT_TRUE(input_file_ != NULL);
+    libaom_test::MD5 md5;
+    for (unsigned int i = start_; i < limit_; i++) {
+      md5.Add(img());
+      Next();
+    }
+    ASSERT_EQ(string(md5.Get()), expected_md5);
+  }
+};
+
+TEST_P(Y4mVideoSourceTest, SourceTest) {
+  const Y4mTestParam t = GetParam();
+  Init(t.filename, kFrames);
+  HeaderChecks(t.bit_depth, t.format);
+  Md5Check(t.md5raw);
+}
+
+INSTANTIATE_TEST_CASE_P(C, Y4mVideoSourceTest,
+                        ::testing::ValuesIn(kY4mTestVectors));
+
+class Y4mVideoWriteTest : public Y4mVideoSourceTest {
+ protected:
+  Y4mVideoWriteTest() : tmpfile_(NULL) {}
+
+  virtual ~Y4mVideoWriteTest() {
+    delete tmpfile_;
+    input_file_ = NULL;
+  }
+
+  void ReplaceInputFile(FILE *input_file) {
+    CloseSource();
+    frame_ = 0;
+    input_file_ = input_file;
+    rewind(input_file_);
+    ReadSourceToStart();
+  }
+
+  // Writes out a y4m file and then reads it back
+  void WriteY4mAndReadBack() {
+    ASSERT_TRUE(input_file_ != NULL);
+    char buf[Y4M_BUFFER_SIZE] = { 0 };
+    const struct AvxRational framerate = { y4m_.fps_n, y4m_.fps_d };
+    tmpfile_ = new libaom_test::TempOutFile;
+    ASSERT_TRUE(tmpfile_->file() != NULL);
+    y4m_write_file_header(buf, sizeof(buf), kWidth, kHeight, &framerate,
+                          y4m_.aom_fmt, y4m_.bit_depth);
+    fputs(buf, tmpfile_->file());
+    for (unsigned int i = start_; i < limit_; i++) {
+      y4m_write_frame_header(buf, sizeof(buf));
+      fputs(buf, tmpfile_->file());
+      write_image_file(img(), tmpfile_->file());
+      Next();
+    }
+    ReplaceInputFile(tmpfile_->file());
+  }
+
+  virtual void Init(const std::string &file_name, int limit) {
+    Y4mVideoSourceTest::Init(file_name, limit);
+    WriteY4mAndReadBack();
+  }
+  libaom_test::TempOutFile *tmpfile_;
+};
+
+TEST_P(Y4mVideoWriteTest, WriteTest) {
+  const Y4mTestParam t = GetParam();
+  Init(t.filename, kFrames);
+  HeaderChecks(t.bit_depth, t.format);
+  Md5Check(t.md5raw);
+}
+
+INSTANTIATE_TEST_CASE_P(C, Y4mVideoWriteTest,
+                        ::testing::ValuesIn(kY4mTestVectors));
+}  // namespace
diff --git a/third_party/aom/test/y4m_video_source.h b/third_party/aom/test/y4m_video_source.h
new file mode 100644
index 000000000..2279d7970
--- /dev/null
+++ b/third_party/aom/test/y4m_video_source.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_Y4M_VIDEO_SOURCE_H_
+#define TEST_Y4M_VIDEO_SOURCE_H_
+#include <algorithm>
+#include <string>
+
+#include "test/video_source.h"
+#include "./y4minput.h"
+
+namespace libaom_test {
+
+// This class extends VideoSource to allow parsing of raw yv12
+// so that we can do actual file encodes.
+class Y4mVideoSource : public VideoSource {
+ public:
+  Y4mVideoSource(const std::string &file_name, unsigned int start, int limit)
+      : file_name_(file_name), input_file_(NULL), img_(new aom_image_t()),
+        start_(start), limit_(limit), frame_(0), framerate_numerator_(0),
+        framerate_denominator_(0), y4m_() {}
+
+  virtual ~Y4mVideoSource() {
+    aom_img_free(img_.get());
+    CloseSource();
+  }
+
+  virtual void OpenSource() {
+    CloseSource();
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+                                     << file_name_;
+  }
+
+  virtual void ReadSourceToStart() {
+    ASSERT_TRUE(input_file_ != NULL);
+    ASSERT_FALSE(y4m_input_open(&y4m_, input_file_, NULL, 0, 0));
+    framerate_numerator_ = y4m_.fps_n;
+    framerate_denominator_ = y4m_.fps_d;
+    frame_ = 0;
+    for (unsigned int i = 0; i < start_; i++) {
+      Next();
+    }
+    FillFrame();
+  }
+
+  virtual void Begin() {
+    OpenSource();
+    ReadSourceToStart();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  virtual aom_image_t *img() const {
+    return (frame_ < limit_) ? img_.get() : NULL;
+  }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  virtual aom_codec_pts_t pts() const { return frame_; }
+
+  virtual unsigned long duration() const { return 1; }
+
+  virtual aom_rational_t timebase() const {
+    const aom_rational_t t = { framerate_denominator_, framerate_numerator_ };
+    return t;
+  }
+
+  virtual unsigned int frame() const { return frame_; }
+
+  virtual unsigned int limit() const { return limit_; }
+
+  virtual void FillFrame() {
+    ASSERT_TRUE(input_file_ != NULL);
+    // Read a frame from input_file.
+    y4m_input_fetch_frame(&y4m_, input_file_, img_.get());
+  }
+
+  // Swap buffers with another y4m source. This allows reading a new frame
+  // while keeping the old frame around. A whole Y4mSource is required and
+  // not just a aom_image_t because of how the y4m reader manipulates
+  // aom_image_t internals,
+  void SwapBuffers(Y4mVideoSource *other) {
+    std::swap(other->y4m_.dst_buf, y4m_.dst_buf);
+    aom_image_t *tmp;
+    tmp = other->img_.release();
+    other->img_.reset(img_.release());
+    img_.reset(tmp);
+  }
+
+ protected:
+  void CloseSource() {
+    y4m_input_close(&y4m_);
+    y4m_ = y4m_input();
+    if (input_file_ != NULL) {
+      fclose(input_file_);
+      input_file_ = NULL;
+    }
+  }
+
+  std::string file_name_;
+  FILE *input_file_;
+  testing::internal::scoped_ptr<aom_image_t> img_;
+  unsigned int start_;
+  unsigned int limit_;
+  unsigned int frame_;
+  int framerate_numerator_;
+  int framerate_denominator_;
+  y4m_input y4m_;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_Y4M_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/yuv_video_source.h b/third_party/aom/test/yuv_video_source.h
new file mode 100644
index 000000000..9ff76a8d8
--- /dev/null
+++ b/third_party/aom/test/yuv_video_source.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef TEST_YUV_VIDEO_SOURCE_H_
+#define TEST_YUV_VIDEO_SOURCE_H_
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "test/video_source.h"
+#include "aom/aom_image.h"
+
+namespace libaom_test {
+
+// This class extends VideoSource to allow parsing of raw YUV
+// formats of various color sampling and bit-depths so that we can
+// do actual file encodes.
+class YUVVideoSource : public VideoSource {
+ public:
+  YUVVideoSource(const std::string &file_name, aom_img_fmt format,
+                 unsigned int width, unsigned int height, int rate_numerator,
+                 int rate_denominator, unsigned int start, int limit)
+      : file_name_(file_name), input_file_(NULL), img_(NULL), start_(start),
+        limit_(limit), frame_(0), width_(0), height_(0),
+        format_(AOM_IMG_FMT_NONE), framerate_numerator_(rate_numerator),
+        framerate_denominator_(rate_denominator) {
+    // This initializes format_, raw_size_, width_, height_ and allocates img.
+    SetSize(width, height, format);
+  }
+
+  virtual ~YUVVideoSource() {
+    aom_img_free(img_);
+    if (input_file_) fclose(input_file_);
+  }
+
+  virtual void Begin() {
+    if (input_file_) fclose(input_file_);
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+                                     << file_name_;
+    if (start_)
+      fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET);
+
+    frame_ = start_;
+    FillFrame();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  virtual aom_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  virtual aom_codec_pts_t pts() const { return frame_; }
+
+  virtual unsigned long duration() const { return 1; }
+
+  virtual aom_rational_t timebase() const {
+    const aom_rational_t t = { framerate_denominator_, framerate_numerator_ };
+    return t;
+  }
+
+  virtual unsigned int frame() const { return frame_; }
+
+  virtual unsigned int limit() const { return limit_; }
+
+  virtual void SetSize(unsigned int width, unsigned int height,
+                       aom_img_fmt format) {
+    if (width != width_ || height != height_ || format != format_) {
+      aom_img_free(img_);
+      img_ = aom_img_alloc(NULL, format, width, height, 1);
+      ASSERT_TRUE(img_ != NULL);
+      width_ = width;
+      height_ = height;
+      format_ = format;
+      switch (format) {
+        case AOM_IMG_FMT_I420: raw_size_ = width * height * 3 / 2; break;
+        case AOM_IMG_FMT_I422: raw_size_ = width * height * 2; break;
+        case AOM_IMG_FMT_I440: raw_size_ = width * height * 2; break;
+        case AOM_IMG_FMT_I444: raw_size_ = width * height * 3; break;
+        case AOM_IMG_FMT_I42016: raw_size_ = width * height * 3; break;
+        case AOM_IMG_FMT_I42216: raw_size_ = width * height * 4; break;
+        case AOM_IMG_FMT_I44016: raw_size_ = width * height * 4; break;
+        case AOM_IMG_FMT_I44416: raw_size_ = width * height * 6; break;
+        default: ASSERT_TRUE(0);
+      }
+    }
+  }
+
+  virtual void FillFrame() {
+    ASSERT_TRUE(input_file_ != NULL);
+    // Read a frame from input_file.
+    if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) {
+      limit_ = frame_;
+    }
+  }
+
+ protected:
+  std::string file_name_;
+  FILE *input_file_;
+  aom_image_t *img_;
+  size_t raw_size_;
+  unsigned int start_;
+  unsigned int limit_;
+  unsigned int frame_;
+  unsigned int width_;
+  unsigned int height_;
+  aom_img_fmt format_;
+  int framerate_numerator_;
+  int framerate_denominator_;
+};
+
+}  // namespace libaom_test
+
+#endif  // TEST_YUV_VIDEO_SOURCE_H_