49 files changed, 2419 insertions, 968 deletions
diff --git a/third_party/aom/test/altref_test.cc b/third_party/aom/test/altref_test.cc
index 6dd8b5186..28b400121 100644
--- a/third_party/aom/test/altref_test.cc
+++ b/third_party/aom/test/altref_test.cc
@@ -92,6 +92,6 @@ TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) {
 
 AV1_INSTANTIATE_TEST_CASE(AltRefForcedKeyTestLarge,
                           ::testing::Values(::libaom_test::kOnePassGood),
-                          ::testing::Range(0, 9));
+                          ::testing::Values(2, 5));
 
 }  // namespace
diff --git a/third_party/aom/test/aq_segment_test.cc b/third_party/aom/test/aq_segment_test.cc
index 5dc93ec79..b01f140a0 100644
--- a/third_party/aom/test/aq_segment_test.cc
+++ b/third_party/aom/test/aq_segment_test.cc
@@ -98,7 +98,7 @@ TEST_P(AqSegmentTest, TestNoMisMatchAQ4) {
   aq_mode_ = 4;
 
   ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 100);
+                                       30, 1, 0, 15);
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
@@ -112,7 +112,7 @@ TEST_P(AqSegmentTest, TestNoMisMatchExtDeltaQ) {
   aq_mode_ = 0;
   deltaq_mode_ = 2;
   ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 100);
+                                       30, 1, 0, 15);
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
diff --git a/third_party/aom/test/av1_convolve_optimz_test.cc b/third_party/aom/test/av1_convolve_optimz_test.cc
index fd0f6dbce..c32f4cb95 100644
--- a/third_party/aom/test/av1_convolve_optimz_test.cc
+++ b/third_party/aom/test/av1_convolve_optimz_test.cc
@@ -218,7 +218,7 @@ const BlockDimension kBlockDim[] = {
 };
 
 // 10/12-tap filters
-const InterpFilter kFilter[] = { FILTER_REGULAR_UV, BILINEAR, MULTITAP_SHARP };
+const InterpFilter kFilter[] = { EIGHTTAP_REGULAR, BILINEAR, MULTITAP_SHARP };
 
 const int kSubpelQ4[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
 
diff --git a/third_party/aom/test/av1_convolve_test.cc b/third_party/aom/test/av1_convolve_test.cc
index 02ac8e7bb..9ea662381 100644
--- a/third_party/aom/test/av1_convolve_test.cc
+++ b/third_party/aom/test/av1_convolve_test.cc
@@ -262,6 +262,7 @@ INSTANTIATE_TEST_CASE_P(
                        ::testing::ValuesIn(filter_ls)));
 
 #if CONFIG_HIGHBITDEPTH
+#ifndef __clang_analyzer__
 TEST(AV1ConvolveTest, av1_highbd_convolve) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
 #if CONFIG_DUAL_FILTER
@@ -322,6 +323,7 @@ TEST(AV1ConvolveTest, av1_highbd_convolve) {
     }
   }
 }
+#endif
 
 TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
diff --git a/third_party/aom/test/fht32x32_test.cc b/third_party/aom/test/av1_fht32x32_test.cc
index 56ac597c0..cdd915337 100644
--- a/third_party/aom/test/fht32x32_test.cc
+++ b/third_party/aom/test/av1_fht32x32_test.cc
@@ -49,7 +49,7 @@ void highbd_fht32x32_ref(const int16_t *in, int32_t *out, int stride,
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
-#if HAVE_AVX2
+#if HAVE_SSE2 || HAVE_AVX2
 void dummy_inv_txfm(const tran_low_t *in, uint8_t *out, int stride,
                     int tx_type) {
   (void)in;
diff --git a/third_party/aom/test/av1_fht64x64_test.cc b/third_party/aom/test/av1_fht64x64_test.cc
new file mode 100644
index 000000000..cde1d0ca3
--- /dev/null
+++ b/third_party/aom/test/av1_fht64x64_test.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+
+#if CONFIG_TX64X64
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+using std::tr1::tuple;
+using libaom_test::FhtFunc;
+typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht64x64Param;
+
+void fht64x64_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  av1_fht64x64_c(in, out, stride, tx_type);
+}
+
+void iht64x64_ref(const tran_low_t *in, uint8_t *dest, int stride,
+                  int tx_type) {
+  av1_iht64x64_4096_add_c(in, dest, stride, tx_type);
+}
+
+class AV1Trans64x64HT : public libaom_test::TransformTestBase,
+                        public ::testing::TestWithParam<Ht64x64Param> {
+ public:
+  virtual ~AV1Trans64x64HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 64;
+    height_ = 64;
+    fwd_txfm_ref = fht64x64_ref;
+    inv_txfm_ref = iht64x64_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(AV1Trans64x64HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); }
+TEST_P(AV1Trans64x64HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans64x64HT, MemCheck) { RunMemCheck(); }
+TEST_P(AV1Trans64x64HT, InvCoeffCheck) { RunInvCoeffCheck(); }
+TEST_P(AV1Trans64x64HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
+
+using std::tr1::make_tuple;
+
+const Ht64x64Param kArrayHt64x64Param_c[] = {
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 0, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 1, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 2, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 3, AOM_BITS_8, 4096),
+#if CONFIG_EXT_TX
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 4, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 5, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 6, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 7, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 8, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 9, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 10, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 11, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 12, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 13, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 14, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 15, AOM_BITS_8, 4096)
+#endif  // CONFIG_EXT_TX
+};
+INSTANTIATE_TEST_CASE_P(C, AV1Trans64x64HT,
+                        ::testing::ValuesIn(kArrayHt64x64Param_c));
+
+}  // namespace
+
+#endif  // CONFIG_TX64X64
diff --git a/third_party/aom/test/av1_fwd_txfm1d_test.cc b/third_party/aom/test/av1_fwd_txfm1d_test.cc
index a9b3f8e40..511e057fa 100644
--- a/third_party/aom/test/av1_fwd_txfm1d_test.cc
+++ b/third_party/aom/test/av1_fwd_txfm1d_test.cc
@@ -56,10 +56,10 @@ TEST(av1_fwd_txfm1d, get_max_bit) {
   EXPECT_EQ(max_bit, 3);
 }
 
-TEST(av1_fwd_txfm1d, cospi_arr) {
+TEST(av1_fwd_txfm1d, cospi_arr_data) {
   for (int i = 0; i < 7; i++) {
     for (int j = 0; j < 64; j++) {
-      EXPECT_EQ(cospi_arr[i][j],
+      EXPECT_EQ(cospi_arr_data[i][j],
                 (int32_t)round(cos(M_PI * j / 128) * (1 << (cos_bit_min + i))));
     }
   }
diff --git a/third_party/aom/test/av1_fwd_txfm2d_test.cc b/third_party/aom/test/av1_fwd_txfm2d_test.cc
index 25cf5ad53..af3c8ff44 100644
--- a/third_party/aom/test/av1_fwd_txfm2d_test.cc
+++ b/third_party/aom/test/av1_fwd_txfm2d_test.cc
@@ -41,9 +41,11 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
     count_ = 500;
     TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg =
         av1_get_fwd_txfm_cfg(tx_type_, tx_size_);
-    const TXFM_2D_CFG *fwd_txfm_cfg = fwd_txfm_flip_cfg.cfg;
-    int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] +
-                      fwd_txfm_cfg->shift[2];
+    // TODO(sarahparker) this test will need to be updated when these
+    // functions are extended to support rectangular transforms
+    int amplify_bit = fwd_txfm_flip_cfg.row_cfg->shift[0] +
+                      fwd_txfm_flip_cfg.row_cfg->shift[1] +
+                      fwd_txfm_flip_cfg.row_cfg->shift[2];
     ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
     lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
     amplify_factor_ =
diff --git a/third_party/aom/test/av1_inv_txfm2d_test.cc b/third_party/aom/test/av1_inv_txfm2d_test.cc
index bb2743af1..89ae34b01 100644
--- a/third_party/aom/test/av1_inv_txfm2d_test.cc
+++ b/third_party/aom/test/av1_inv_txfm2d_test.cc
@@ -17,7 +17,7 @@
 #include "test/acm_random.h"
 #include "test/util.h"
 #include "test/av1_txfm_test.h"
-#include "av1/common/av1_inv_txfm2d_cfg.h"
+#include "av1/common/av1_inv_txfm1d_cfg.h"
 
 using libaom_test::ACMRandom;
 using libaom_test::input_base;
diff --git a/third_party/aom/test/av1_quantize_test.cc b/third_party/aom/test/av1_quantize_test.cc
index b5d1531f5..239b041b2 100644
--- a/third_party/aom/test/av1_quantize_test.cc
+++ b/third_party/aom/test/av1_quantize_test.cc
@@ -196,16 +196,18 @@ TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
 
 #if HAVE_SSE4_1
 #if !CONFIG_AOM_QM
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, AV1QuantizeTest,
-    ::testing::Values(QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1,
-                                         &av1_highbd_quantize_fp_c, 16),
-                      QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1,
-                                         &av1_highbd_quantize_fp_c, 64),
-                      QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1,
-                                         &av1_highbd_quantize_fp_c, 256),
-                      QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1,
-                                         &av1_highbd_quantize_fp_c, 1024)));
+const QuantizeFuncParams qfps[4] = {
+  QuantizeFuncParams(av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+                     16),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+                     64),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+                     256),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+                     1024),
+};
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps));
 #endif  // !CONFIG_AOM_QM
 #endif  // HAVE_SSE4_1
 }  // namespace
diff --git a/third_party/aom/test/av1_txfm_test.h b/third_party/aom/test/av1_txfm_test.h
index 70f971d09..d46f0bba7 100644
--- a/third_party/aom/test/av1_txfm_test.h
+++ b/third_party/aom/test/av1_txfm_test.h
@@ -79,7 +79,7 @@ static const int input_base = (1 << bd);
 #if CONFIG_HIGHBITDEPTH
 #if CONFIG_AV1_ENCODER
 static const Fwd_Txfm2d_Func fwd_txfm_func_ls[TX_SIZES] = {
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
   NULL,
 #endif
   av1_fwd_txfm2d_4x4_c, av1_fwd_txfm2d_8x8_c, av1_fwd_txfm2d_16x16_c,
@@ -88,7 +88,7 @@ static const Fwd_Txfm2d_Func fwd_txfm_func_ls[TX_SIZES] = {
 #endif
 
 static const Inv_Txfm2d_Func inv_txfm_func_ls[TX_SIZES] = {
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
   NULL,
 #endif
   av1_inv_txfm2d_add_4x4_c, av1_inv_txfm2d_add_8x8_c,
diff --git a/third_party/aom/test/avg_test.cc b/third_party/aom/test/avg_test.cc
index b040f6a34..e83a75c1c 100644
--- a/third_party/aom/test/avg_test.cc
+++ b/third_party/aom/test/avg_test.cc
@@ -53,21 +53,6 @@ class AverageTestBase : public ::testing::Test {
     rnd_.Reset(ACMRandom::DeterministicSeed());
   }
 
-  // Sum Pixels
-  static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
-    unsigned int average = 0;
-    for (int h = 0; h < 8; ++h)
-      for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
-    return ((average + 32) >> 6);
-  }
-
-  static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
-    unsigned int average = 0;
-    for (int h = 0; h < 4; ++h)
-      for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
-    return ((average + 8) >> 4);
-  }
-
   void FillConstant(uint8_t fill_constant) {
     for (int i = 0; i < width_ * height_; ++i) {
       source_data_[i] = fill_constant;
@@ -86,35 +71,6 @@ class AverageTestBase : public ::testing::Test {
 
   ACMRandom rnd_;
 };
-typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
-
-typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;
-
-class AverageTest : public AverageTestBase,
-                    public ::testing::WithParamInterface<AvgFunc> {
- public:
-  AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
-
- protected:
-  void CheckAverages() {
-    const int block_size = GET_PARAM(3);
-    unsigned int expected = 0;
-    if (block_size == 8) {
-      expected =
-          ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_);
-    } else if (block_size == 4) {
-      expected =
-          ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_);
-    }
-
-    ASM_REGISTER_STATE_CHECK(
-        GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_));
-    unsigned int actual =
-        GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_);
-
-    EXPECT_EQ(expected, actual);
-  }
-};
 
 typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
                               const int ref_stride, const int height);
@@ -229,25 +185,6 @@ class SatdTest : public ::testing::Test,
 
 uint8_t *AverageTestBase::source_data_ = NULL;
 
-TEST_P(AverageTest, MinValue) {
-  FillConstant(0);
-  CheckAverages();
-}
-
-TEST_P(AverageTest, MaxValue) {
-  FillConstant(255);
-  CheckAverages();
-}
-
-TEST_P(AverageTest, Random) {
-  // The reference frame, but not the source frame, may be unaligned for
-  // certain types of searches.
-  for (int i = 0; i < 1000; i++) {
-    FillRandom();
-    CheckAverages();
-  }
-}
-
 TEST_P(IntProRowTest, MinValue) {
   FillConstant(0);
   RunComparison();
@@ -309,11 +246,6 @@ TEST_P(SatdTest, Random) {
 
 using std::tr1::make_tuple;
 
-INSTANTIATE_TEST_CASE_P(
-    C, AverageTest,
-    ::testing::Values(make_tuple(16, 16, 1, 8, &aom_avg_8x8_c),
-                      make_tuple(16, 16, 1, 4, &aom_avg_4x4_c)));
-
 INSTANTIATE_TEST_CASE_P(C, SatdTest,
                         ::testing::Values(make_tuple(16, &aom_satd_c),
                                           make_tuple(64, &aom_satd_c),
@@ -322,15 +254,6 @@ INSTANTIATE_TEST_CASE_P(C, SatdTest,
 
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
-    SSE2, AverageTest,
-    ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_sse2),
-                      make_tuple(16, 16, 5, 8, &aom_avg_8x8_sse2),
-                      make_tuple(32, 32, 15, 8, &aom_avg_8x8_sse2),
-                      make_tuple(16, 16, 0, 4, &aom_avg_4x4_sse2),
-                      make_tuple(16, 16, 5, 4, &aom_avg_4x4_sse2),
-                      make_tuple(32, 32, 15, 4, &aom_avg_4x4_sse2)));
-
-INSTANTIATE_TEST_CASE_P(
     SSE2, IntProRowTest,
     ::testing::Values(make_tuple(16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
                       make_tuple(32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
@@ -353,15 +276,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
 
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
-    NEON, AverageTest,
-    ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_neon),
-                      make_tuple(16, 16, 5, 8, &aom_avg_8x8_neon),
-                      make_tuple(32, 32, 15, 8, &aom_avg_8x8_neon),
-                      make_tuple(16, 16, 0, 4, &aom_avg_4x4_neon),
-                      make_tuple(16, 16, 5, 4, &aom_avg_4x4_neon),
-                      make_tuple(32, 32, 15, 4, &aom_avg_4x4_neon)));
-
-INSTANTIATE_TEST_CASE_P(
     NEON, IntProRowTest,
     ::testing::Values(make_tuple(16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
                       make_tuple(32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
@@ -382,15 +296,4 @@ INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
                                           make_tuple(1024, &aom_satd_neon)));
 #endif
 
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(
-    MSA, AverageTest,
-    ::testing::Values(make_tuple(16, 16, 0, 8, &aom_avg_8x8_msa),
-                      make_tuple(16, 16, 5, 8, &aom_avg_8x8_msa),
-                      make_tuple(32, 32, 15, 8, &aom_avg_8x8_msa),
-                      make_tuple(16, 16, 0, 4, &aom_avg_4x4_msa),
-                      make_tuple(16, 16, 5, 4, &aom_avg_4x4_msa),
-                      make_tuple(32, 32, 15, 4, &aom_avg_4x4_msa)));
-#endif
-
 }  // namespace
diff --git a/third_party/aom/test/binary_codes_test.cc b/third_party/aom/test/binary_codes_test.cc
index 385ec7687..41efec781 100644
--- a/third_party/aom/test/binary_codes_test.cc
+++ b/third_party/aom/test/binary_codes_test.cc
@@ -15,6 +15,7 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
+#include "./aom_config.h"
 #include "test/acm_random.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/bitreader.h"
@@ -22,6 +23,8 @@
 #include "aom_dsp/binary_codes_reader.h"
 #include "aom_dsp/binary_codes_writer.h"
 
+#define ACCT_STR __func__
+
 using libaom_test::ACMRandom;
 
 namespace {
@@ -68,8 +71,8 @@ TEST(AV1, TestPrimitiveRefbilivel) {
           const uint16_t range = enc_values[n][p][r][v][0];
           const uint16_t near_range = enc_values[n][p][r][v][1];
           const uint16_t ref = enc_values[n][p][r][v][2];
-          const uint16_t value =
-              aom_read_primitive_refbilevel(&br, range, near_range, ref);
+          const uint16_t value = aom_read_primitive_refbilevel(
+              &br, range, near_range, ref, ACCT_STR);
           GTEST_ASSERT_EQ(value, enc_values[n][p][r][v][3]);
         }
       }
@@ -119,7 +122,7 @@ TEST(AV1, TestPrimitiveRefsubexpfin) {
           assert(k == enc_values[n][k][r][v][1]);
           const uint16_t ref = enc_values[n][k][r][v][2];
           const uint16_t value =
-              aom_read_primitive_refsubexpfin(&br, range, k, ref);
+              aom_read_primitive_refsubexpfin(&br, range, k, ref, ACCT_STR);
           GTEST_ASSERT_EQ(value, enc_values[n][k][r][v][3]);
         }
       }
diff --git a/third_party/aom/test/boolcoder_test.cc b/third_party/aom/test/boolcoder_test.cc
index 4d9d7aaf4..7abe1b1b6 100644
--- a/third_party/aom/test/boolcoder_test.cc
+++ b/third_party/aom/test/boolcoder_test.cc
@@ -68,11 +68,6 @@ TEST(AV1, TestBitIO) {
 
         aom_stop_encode(&bw);
 
-#if !CONFIG_DAALA_EC
-        // First bit should be zero
-        GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
-#endif
-
         aom_reader br;
         aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
         bit_rnd.Reset(random_seed);
@@ -91,10 +86,10 @@ TEST(AV1, TestBitIO) {
   }
 }
 
-#if CONFIG_DAALA_EC
-#define FRAC_DIFF_TOTAL_ERROR 0.07
+#if CONFIG_EC_SMALLMUL
+#define FRAC_DIFF_TOTAL_ERROR 0.16
 #else
-#define FRAC_DIFF_TOTAL_ERROR 0.2
+#define FRAC_DIFF_TOTAL_ERROR 0.07
 #endif
 
 TEST(AV1, TestTell) {
diff --git a/third_party/aom/test/borders_test.cc b/third_party/aom/test/borders_test.cc
index 076f91404..d4e8dea23 100644
--- a/third_party/aom/test/borders_test.cc
+++ b/third_party/aom/test/borders_test.cc
@@ -59,7 +59,7 @@ TEST_P(BordersTest, TestEncodeHighBitrate) {
   cfg_.rc_max_quantizer = 10;
 
   ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
+                                       10);
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
@@ -75,7 +75,7 @@ TEST_P(BordersTest, TestLowBitrate) {
   cfg_.rc_min_quantizer = 40;
 
   ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
+                                       10);
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
diff --git a/third_party/aom/test/convolve_test.cc b/third_party/aom/test/convolve_test.cc
index a84ef4ec8..a1fb2087d 100644
--- a/third_party/aom/test/convolve_test.cc
+++ b/third_party/aom/test/convolve_test.cc
@@ -212,7 +212,7 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
    *                               = 23
    * and filter_max_width = 16
    */
-  uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
+  uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
   const int intermediate_next_stride =
       1 - static_cast<int>(intermediate_height * output_width);
 
@@ -368,10 +368,17 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 #endif
     /* Set up guard blocks for an inner block centered in the outer block */
     for (int i = 0; i < kOutputBufferSize; ++i) {
-      if (IsIndexInBorder(i))
+      if (IsIndexInBorder(i)) {
         output_[i] = 255;
-      else
+#if CONFIG_HIGHBITDEPTH
+        output16_[i] = mask_;
+#endif
+      } else {
         output_[i] = 0;
+#if CONFIG_HIGHBITDEPTH
+        output16_[i] = 0;
+#endif
+      }
     }
 
     ::libaom_test::ACMRandom prng;
diff --git a/third_party/aom/test/corner_match_test.cc b/third_party/aom/test/corner_match_test.cc
new file mode 100644
index 000000000..2197fffee
--- /dev/null
+++ b/third_party/aom/test/corner_match_test.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "./av1_rtcd.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+
+#include "av1/encoder/corner_match.h"
+
+namespace test_libaom {
+
+namespace AV1CornerMatch {
+
+using libaom_test::ACMRandom;
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+typedef tuple<int> CornerMatchParam;
+
+class AV1CornerMatchTest : public ::testing::TestWithParam<CornerMatchParam> {
+ public:
+  virtual ~AV1CornerMatchTest();
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+ protected:
+  void RunCheckOutput();
+
+  libaom_test::ACMRandom rnd_;
+};
+
+AV1CornerMatchTest::~AV1CornerMatchTest() {}
+void AV1CornerMatchTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+void AV1CornerMatchTest::TearDown() { libaom_test::ClearSystemState(); }
+
+void AV1CornerMatchTest::RunCheckOutput() {
+  const int w = 128, h = 128;
+  const int num_iters = 10000;
+  int i, j;
+
+  uint8_t *input1 = new uint8_t[w * h];
+  uint8_t *input2 = new uint8_t[w * h];
+
+  // Test the two extreme cases:
+  // i) Random data, should have correlation close to 0
+  // ii) Linearly related data + noise, should have correlation close to 1
+  int mode = GET_PARAM(0);
+  if (mode == 0) {
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) {
+        input1[i * w + j] = rnd_.Rand8();
+        input2[i * w + j] = rnd_.Rand8();
+      }
+  } else if (mode == 1) {
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) {
+        int v = rnd_.Rand8();
+        input1[i * w + j] = v;
+        input2[i * w + j] = (v / 2) + (rnd_.Rand8() & 15);
+      }
+  }
+
+  for (i = 0; i < num_iters; ++i) {
+    int x1 = MATCH_SZ_BY2 + rnd_.PseudoUniform(w - 2 * MATCH_SZ_BY2);
+    int y1 = MATCH_SZ_BY2 + rnd_.PseudoUniform(h - 2 * MATCH_SZ_BY2);
+    int x2 = MATCH_SZ_BY2 + rnd_.PseudoUniform(w - 2 * MATCH_SZ_BY2);
+    int y2 = MATCH_SZ_BY2 + rnd_.PseudoUniform(h - 2 * MATCH_SZ_BY2);
+
+    double res_c =
+        compute_cross_correlation_c(input1, w, x1, y1, input2, w, x2, y2);
+    double res_sse4 =
+        compute_cross_correlation_sse4_1(input1, w, x1, y1, input2, w, x2, y2);
+
+    ASSERT_EQ(res_sse4, res_c);
+  }
+
+  delete[] input1;
+  delete[] input2;
+}
+
+TEST_P(AV1CornerMatchTest, CheckOutput) { RunCheckOutput(); }
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1CornerMatchTest,
+                        ::testing::Values(make_tuple(0), make_tuple(1)));
+
+}  // namespace AV1CornerMatch
+
+}  // namespace test_libaom
diff --git a/third_party/aom/test/cpu_speed_test.cc b/third_party/aom/test/cpu_speed_test.cc
index 9b7966462..ad0f2a874 100644
--- a/third_party/aom/test/cpu_speed_test.cc
+++ b/third_party/aom/test/cpu_speed_test.cc
@@ -94,7 +94,7 @@ void CpuSpeedTest::TestQ0() {
 }
 
 void CpuSpeedTest::TestScreencastQ0() {
-  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 10);
+  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 3);
   cfg_.g_timebase = video.timebase();
   cfg_.rc_2pass_vbr_minsection_pct = 5;
   cfg_.rc_2pass_vbr_maxsection_pct = 2000;
@@ -109,7 +109,7 @@ void CpuSpeedTest::TestScreencastQ0() {
 }
 
 void CpuSpeedTest::TestTuneScreen() {
-  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 10);
+  ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 3);
   cfg_.g_timebase = video.timebase();
   cfg_.rc_2pass_vbr_minsection_pct = 5;
   cfg_.rc_2pass_vbr_minsection_pct = 2000;
diff --git a/third_party/aom/test/datarate_test.cc b/third_party/aom/test/datarate_test.cc
index 48be4a46d..a4a682681 100644
--- a/third_party/aom/test/datarate_test.cc
+++ b/third_party/aom/test/datarate_test.cc
@@ -249,5 +249,5 @@ TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
 AV1_INSTANTIATE_TEST_CASE(DatarateTestLarge,
                           ::testing::Values(::libaom_test::kOnePassGood,
                                             ::libaom_test::kRealTime),
-                          ::testing::Range(2, 9, 2));
+                          ::testing::Values(2, 5));
 }  // namespace
diff --git a/third_party/aom/test/decode_test_driver.cc b/third_party/aom/test/decode_test_driver.cc
index 35c28eafd..5f109e092 100644
--- a/third_party/aom/test/decode_test_driver.cc
+++ b/third_party/aom/test/decode_test_driver.cc
@@ -82,8 +82,6 @@ void DecoderTest::RunLoop(CompressedVideoSource *video,
     PreDecodeFrameHook(*video, decoder);
 
     aom_codec_stream_info_t stream_info;
-    stream_info.sz = sizeof(stream_info);
-
     if (video->cxdata() != NULL) {
       const aom_codec_err_t res_peek = decoder->PeekStream(
           video->cxdata(), video->frame_size(), &stream_info);
diff --git a/third_party/aom/test/error_resilience_test.cc b/third_party/aom/test/error_resilience_test.cc
index 63f10012f..b1d93a01f 100644
--- a/third_party/aom/test/error_resilience_test.cc
+++ b/third_party/aom/test/error_resilience_test.cc
@@ -150,7 +150,7 @@ TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
   init_flags_ = AOM_CODEC_USE_PSNR;
 
   libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 30);
+                                     timebase.den, timebase.num, 0, 12);
 
   // Error resilient mode OFF.
   cfg_.g_error_resilient = 0;
@@ -187,7 +187,7 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
   init_flags_ = AOM_CODEC_USE_PSNR;
 
   libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 40);
+                                     timebase.den, timebase.num, 0, 20);
 
   // Error resilient mode ON.
   cfg_.g_error_resilient = 1;
@@ -196,9 +196,8 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
   // Set an arbitrary set of error frames same as droppable frames.
   // In addition to isolated loss/drop, add a long consecutive series
   // (of size 9) of dropped frames.
-  unsigned int num_droppable_frames = 11;
-  unsigned int droppable_frame_list[] = { 5,  16, 22, 23, 24, 25,
-                                          26, 27, 28, 29, 30 };
+  unsigned int num_droppable_frames = 5;
+  unsigned int droppable_frame_list[] = { 5, 10, 13, 16, 19 };
   SetDroppableFrames(num_droppable_frames, droppable_frame_list);
   SetErrorFrames(num_droppable_frames, droppable_frame_list);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
diff --git a/third_party/aom/test/frame_size_tests.cc b/third_party/aom/test/frame_size_tests.cc
index 73cc9c075..d2e762ff9 100644
--- a/third_party/aom/test/frame_size_tests.cc
+++ b/third_party/aom/test/frame_size_tests.cc
@@ -46,43 +46,25 @@ class AV1FrameSizeTests : public ::libaom_test::EncoderTest,
   int expected_res_;
 };
 
+#if CONFIG_SIZE_LIMIT
 TEST_F(AV1FrameSizeTests, TestInvalidSizes) {
   ::libaom_test::RandomVideoSource video;
 
-#if CONFIG_SIZE_LIMIT
   video.SetSize(DECODE_WIDTH_LIMIT + 16, DECODE_HEIGHT_LIMIT + 16);
   video.set_limit(2);
   expected_res_ = AOM_CODEC_CORRUPT_FRAME;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-#endif
 }
 
 TEST_F(AV1FrameSizeTests, LargeValidSizes) {
   ::libaom_test::RandomVideoSource video;
 
-#if CONFIG_SIZE_LIMIT
   video.SetSize(DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
   video.set_limit(2);
   expected_res_ = AOM_CODEC_OK;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-#else
-  // This test produces a pretty large single frame allocation,  (roughly
-  // 25 megabits). The encoder allocates a good number of these frames
-  // one for each lag in frames (for 2 pass), and then one for each possible
-  // reference buffer (8) - we can end up with up to 30 buffers of roughly this
-  // size or almost 1 gig of memory.
-  // In total the allocations will exceed 2GiB which may cause a failure with
-  // non-64 bit platforms, use a smaller size in that case.
-  if (sizeof(void *) < 8)
-    video.SetSize(2560, 1440);
-  else
-    video.SetSize(4096, 4096);
-
-  video.set_limit(2);
-  expected_res_ = AOM_CODEC_OK;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-#endif
 }
+#endif
 
 TEST_F(AV1FrameSizeTests, OneByOneVideo) {
   ::libaom_test::RandomVideoSource video;
diff --git a/third_party/aom/test/hiprec_convolve_test.cc b/third_party/aom/test/hiprec_convolve_test.cc
new file mode 100644
index 000000000..0b34c99c9
--- /dev/null
+++ b/third_party/aom/test/hiprec_convolve_test.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/hiprec_convolve_test_util.h"
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+using libaom_test::ACMRandom;
+using libaom_test::AV1HiprecConvolve::AV1HiprecConvolveTest;
+#if CONFIG_HIGHBITDEPTH
+using libaom_test::AV1HighbdHiprecConvolve::AV1HighbdHiprecConvolveTest;
+#endif
+
+namespace {
+
+TEST_P(AV1HiprecConvolveTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
+
+INSTANTIATE_TEST_CASE_P(SSE2, AV1HiprecConvolveTest,
+                        libaom_test::AV1HiprecConvolve::BuildParams(
+                            aom_convolve8_add_src_hip_sse2));
+
+#if CONFIG_HIGHBITDEPTH && HAVE_SSSE3
+TEST_P(AV1HighbdHiprecConvolveTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(4));
+}
+
+INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdHiprecConvolveTest,
+                        libaom_test::AV1HighbdHiprecConvolve::BuildParams(
+                            aom_highbd_convolve8_add_src_hip_ssse3));
+
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
new file mode 100644
index 000000000..d53384c5b
--- /dev/null
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/hiprec_convolve_test_util.h"
+
+#include "av1/common/restoration.h"
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+
+namespace libaom_test {
+
+// Generate a random pair of filter kernels, using the ranges
+// of possible values from the loop-restoration experiment
+static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
+                             InterpKernel vkernel) {
+  hkernel[0] = hkernel[6] =
+      WIENER_FILT_TAP0_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
+  hkernel[1] = hkernel[5] =
+      WIENER_FILT_TAP1_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
+  hkernel[2] = hkernel[4] =
+      WIENER_FILT_TAP2_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
+  hkernel[3] = -(hkernel[0] + hkernel[1] + hkernel[2]);
+  hkernel[7] = 0;
+
+  vkernel[0] = vkernel[6] =
+      WIENER_FILT_TAP0_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
+  vkernel[1] = vkernel[5] =
+      WIENER_FILT_TAP1_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
+  vkernel[2] = vkernel[4] =
+      WIENER_FILT_TAP2_MINV +
+      rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
+  vkernel[3] = -(vkernel[0] + vkernel[1] + vkernel[2]);
+  vkernel[7] = 0;
+}
+
+namespace AV1HiprecConvolve {
+
+::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
+    hiprec_convolve_func filter) {
+  const HiprecConvolveParam params[] = {
+    make_tuple(8, 8, 50000, filter), make_tuple(64, 64, 1000, filter),
+    make_tuple(32, 8, 10000, filter),
+  };
+  return ::testing::ValuesIn(params);
+}
+
+AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
+void AV1HiprecConvolveTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HiprecConvolveTest::TearDown() { libaom_test::ClearSystemState(); }
+
+void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  int i, j;
+
+  uint8_t *input_ = new uint8_t[h * w];
+  uint8_t *input = input_;
+
+  // The convolve functions always write rows with widths that are multiples of
+  // 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  uint8_t *output = new uint8_t[output_n];
+  uint8_t *output2 = new uint8_t[output_n];
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+
+  for (i = 0; i < num_iters; ++i) {
+    // Choose random locations within the source block
+    int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7);
+    int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7);
+    aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output,
+                                out_w, hkernel, 16, vkernel, 16, out_w, out_h);
+    test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
+              vkernel, 16, out_w, out_h);
+
+    for (j = 0; j < out_w * out_h; ++j)
+      ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j
+                                       << " = (" << (j % out_w) << ", "
+                                       << (j / out_w) << ") on iteration " << i;
+  }
+  delete[] input_;
+  delete[] output;
+  delete[] output2;
+}
+}  // namespace AV1HiprecConvolve
+
+#if CONFIG_HIGHBITDEPTH
+namespace AV1HighbdHiprecConvolve {
+
+::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
+    highbd_hiprec_convolve_func filter) {
+  const HighbdHiprecConvolveParam params[] = {
+    make_tuple(8, 8, 50000, 8, filter),   make_tuple(64, 64, 1000, 8, filter),
+    make_tuple(32, 8, 10000, 8, filter),  make_tuple(8, 8, 50000, 10, filter),
+    make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
+    make_tuple(8, 8, 50000, 12, filter),  make_tuple(64, 64, 1000, 12, filter),
+    make_tuple(32, 8, 10000, 12, filter),
+  };
+  return ::testing::ValuesIn(params);
+}
+
+AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
+void AV1HighbdHiprecConvolveTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HighbdHiprecConvolveTest::TearDown() {
+  libaom_test::ClearSystemState();
+}
+
+void AV1HighbdHiprecConvolveTest::RunCheckOutput(
+    highbd_hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  const int bd = GET_PARAM(3);
+  int i, j;
+
+  uint16_t *input = new uint16_t[h * w];
+
+  // The convolve functions always write rows with widths that are multiples of
+  // 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  uint16_t *output = new uint16_t[output_n];
+  uint16_t *output2 = new uint16_t[output_n];
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+
+  uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
+  uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
+  uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
+
+  for (i = 0; i < num_iters; ++i) {
+    // Choose random locations within the source block
+    int offset_r = 3 + rnd_.PseudoUniform(w - out_w - 7);
+    int offset_c = 3 + rnd_.PseudoUniform(h - out_h - 7);
+    aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w,
+                                       output_ptr, out_w, hkernel, 16, vkernel,
+                                       16, out_w, out_h, bd);
+    test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
+              hkernel, 16, vkernel, 16, out_w, out_h, bd);
+
+    for (j = 0; j < out_w * out_h; ++j)
+      ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j
+                                       << " = (" << (j % out_w) << ", "
+                                       << (j / out_w) << ") on iteration " << i;
+  }
+  delete[] input;
+  delete[] output;
+  delete[] output2;
+}
+}  // namespace AV1HighbdHiprecConvolve
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace libaom_test
diff --git a/third_party/aom/test/hiprec_convolve_test_util.h b/third_party/aom/test/hiprec_convolve_test_util.h
new file mode 100644
index 000000000..fe31570f5
--- /dev/null
+++ b/third_party/aom/test/hiprec_convolve_test_util.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
+#define TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "./av1_rtcd.h"
+#include "./aom_dsp_rtcd.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+
+#include "av1/common/mv.h"
+
+namespace libaom_test {
+
+namespace AV1HiprecConvolve {
+
+typedef void (*hiprec_convolve_func)(const uint8_t *src, ptrdiff_t src_stride,
+                                     uint8_t *dst, ptrdiff_t dst_stride,
+                                     const int16_t *filter_x, int x_step_q4,
+                                     const int16_t *filter_y, int y_step_q4,
+                                     int w, int h);
+
+typedef std::tr1::tuple<int, int, int, hiprec_convolve_func>
+    HiprecConvolveParam;
+
+::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
+    hiprec_convolve_func filter);
+
+class AV1HiprecConvolveTest
+    : public ::testing::TestWithParam<HiprecConvolveParam> {
+ public:
+  virtual ~AV1HiprecConvolveTest();
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+ protected:
+  void RunCheckOutput(hiprec_convolve_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1HiprecConvolve
+
+#if CONFIG_HIGHBITDEPTH
+namespace AV1HighbdHiprecConvolve {
+typedef void (*highbd_hiprec_convolve_func)(
+    const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
+    ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
+    const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
+
+typedef std::tr1::tuple<int, int, int, int, highbd_hiprec_convolve_func>
+    HighbdHiprecConvolveParam;
+
+::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
+    highbd_hiprec_convolve_func filter);
+
+class AV1HighbdHiprecConvolveTest
+    : public ::testing::TestWithParam<HighbdHiprecConvolveParam> {
+ public:
+  virtual ~AV1HighbdHiprecConvolveTest();
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+ protected:
+  void RunCheckOutput(highbd_hiprec_convolve_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+}  // namespace AV1HighbdHiprecConvolve
+#endif  // CONFIG_HIGHBITDEPTH
+
+}  // namespace libaom_test
+
+#endif  // TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
diff --git a/third_party/aom/test/intrapred_test.cc b/third_party/aom/test/intrapred_test.cc
index 4efed57b6..5dd8c00be 100644
--- a/third_party/aom/test/intrapred_test.cc
+++ b/third_party/aom/test/intrapred_test.cc
@@ -126,105 +126,111 @@ TEST_P(AV1IntraPredTest, IntraPredTests) {
 
 #if HAVE_SSE2
 #if CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    SSE2_TO_C_8, AV1IntraPredTest,
-    ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
-                                    &aom_highbd_dc_predictor_32x32_c, 32, 8),
+const IntraPredFunc IntraPredTestVector8[] = {
+  IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
+                &aom_highbd_dc_predictor_32x32_c, 32, 8),
 #if !CONFIG_ALT_INTRA
-                      IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
-                                    &aom_highbd_tm_predictor_16x16_c, 16, 8),
-                      IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
-                                    &aom_highbd_tm_predictor_32x32_c, 32, 8),
+  IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
+                &aom_highbd_tm_predictor_16x16_c, 16, 8),
+  IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
+                &aom_highbd_tm_predictor_32x32_c, 32, 8),
 #endif  // !CONFIG_ALT_INTRA
 
-                      IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
-                                    &aom_highbd_dc_predictor_4x4_c, 4, 8),
-                      IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
-                                    &aom_highbd_dc_predictor_8x8_c, 8, 8),
-                      IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
-                                    &aom_highbd_dc_predictor_16x16_c, 16, 8),
-                      IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2,
-                                    &aom_highbd_v_predictor_4x4_c, 4, 8),
-                      IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2,
-                                    &aom_highbd_v_predictor_8x8_c, 8, 8),
-                      IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
-                                    &aom_highbd_v_predictor_16x16_c, 16, 8),
-                      IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
-                                    &aom_highbd_v_predictor_32x32_c, 32, 8)
+  IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
+                &aom_highbd_dc_predictor_4x4_c, 4, 8),
+  IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
+                &aom_highbd_dc_predictor_8x8_c, 8, 8),
+  IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
+                &aom_highbd_dc_predictor_16x16_c, 16, 8),
+  IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, &aom_highbd_v_predictor_4x4_c,
+                4, 8),
+  IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, &aom_highbd_v_predictor_8x8_c,
+                8, 8),
+  IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
+                &aom_highbd_v_predictor_16x16_c, 16, 8),
+  IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
+                &aom_highbd_v_predictor_32x32_c, 32, 8)
 #if !CONFIG_ALT_INTRA
-                          ,
-                      IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
-                                    &aom_highbd_tm_predictor_4x4_c, 4, 8),
-                      IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
-                                    &aom_highbd_tm_predictor_8x8_c, 8, 8)
+      ,
+  IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
+                &aom_highbd_tm_predictor_4x4_c, 4, 8),
+  IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
+                &aom_highbd_tm_predictor_8x8_c, 8, 8)
 #endif  // !CONFIG_ALT_INTRA
-                          ));
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, AV1IntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVector8));
 
-INSTANTIATE_TEST_CASE_P(
-    SSE2_TO_C_10, AV1IntraPredTest,
-    ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
-                                    &aom_highbd_dc_predictor_32x32_c, 32, 10),
+const IntraPredFunc IntraPredTestVector10[] = {
+  IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
+                &aom_highbd_dc_predictor_32x32_c, 32, 10),
 #if !CONFIG_ALT_INTRA
-                      IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
-                                    &aom_highbd_tm_predictor_16x16_c, 16, 10),
-                      IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
-                                    &aom_highbd_tm_predictor_32x32_c, 32, 10),
+  IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
+                &aom_highbd_tm_predictor_16x16_c, 16, 10),
+  IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
+                &aom_highbd_tm_predictor_32x32_c, 32, 10),
 #endif  // !CONFIG_ALT_INTRA
-                      IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
-                                    &aom_highbd_dc_predictor_4x4_c, 4, 10),
-                      IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
-                                    &aom_highbd_dc_predictor_8x8_c, 8, 10),
-                      IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
-                                    &aom_highbd_dc_predictor_16x16_c, 16, 10),
-                      IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2,
-                                    &aom_highbd_v_predictor_4x4_c, 4, 10),
-                      IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2,
-                                    &aom_highbd_v_predictor_8x8_c, 8, 10),
-                      IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
-                                    &aom_highbd_v_predictor_16x16_c, 16, 10),
-                      IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
-                                    &aom_highbd_v_predictor_32x32_c, 32, 10)
+  IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
+                &aom_highbd_dc_predictor_4x4_c, 4, 10),
+  IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
+                &aom_highbd_dc_predictor_8x8_c, 8, 10),
+  IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
+                &aom_highbd_dc_predictor_16x16_c, 16, 10),
+  IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, &aom_highbd_v_predictor_4x4_c,
+                4, 10),
+  IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, &aom_highbd_v_predictor_8x8_c,
+                8, 10),
+  IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
+                &aom_highbd_v_predictor_16x16_c, 16, 10),
+  IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
+                &aom_highbd_v_predictor_32x32_c, 32, 10)
 #if !CONFIG_ALT_INTRA
-                          ,
-                      IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
-                                    &aom_highbd_tm_predictor_4x4_c, 4, 10),
-                      IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
-                                    &aom_highbd_tm_predictor_8x8_c, 8, 10)
+      ,
+  IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
+                &aom_highbd_tm_predictor_4x4_c, 4, 10),
+  IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
+                &aom_highbd_tm_predictor_8x8_c, 8, 10)
 #endif  // !CONFIG_ALT_INTRA
-                          ));
+};
 
-INSTANTIATE_TEST_CASE_P(
-    SSE2_TO_C_12, AV1IntraPredTest,
-    ::testing::Values(IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
-                                    &aom_highbd_dc_predictor_32x32_c, 32, 12),
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, AV1IntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVector10));
+
+const IntraPredFunc IntraPredTestVector12[] = {
+  IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
+                &aom_highbd_dc_predictor_32x32_c, 32, 12),
 #if !CONFIG_ALT_INTRA
-                      IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
-                                    &aom_highbd_tm_predictor_16x16_c, 16, 12),
-                      IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
-                                    &aom_highbd_tm_predictor_32x32_c, 32, 12),
+  IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
+                &aom_highbd_tm_predictor_16x16_c, 16, 12),
+  IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
+                &aom_highbd_tm_predictor_32x32_c, 32, 12),
 #endif  // !CONFIG_ALT_INTRA
-                      IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
-                                    &aom_highbd_dc_predictor_4x4_c, 4, 12),
-                      IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
-                                    &aom_highbd_dc_predictor_8x8_c, 8, 12),
-                      IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
-                                    &aom_highbd_dc_predictor_16x16_c, 16, 12),
-                      IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2,
-                                    &aom_highbd_v_predictor_4x4_c, 4, 12),
-                      IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2,
-                                    &aom_highbd_v_predictor_8x8_c, 8, 12),
-                      IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
-                                    &aom_highbd_v_predictor_16x16_c, 16, 12),
-                      IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
-                                    &aom_highbd_v_predictor_32x32_c, 32, 12)
+  IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
+                &aom_highbd_dc_predictor_4x4_c, 4, 12),
+  IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
+                &aom_highbd_dc_predictor_8x8_c, 8, 12),
+  IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
+                &aom_highbd_dc_predictor_16x16_c, 16, 12),
+  IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, &aom_highbd_v_predictor_4x4_c,
+                4, 12),
+  IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, &aom_highbd_v_predictor_8x8_c,
+                8, 12),
+  IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
+                &aom_highbd_v_predictor_16x16_c, 16, 12),
+  IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
+                &aom_highbd_v_predictor_32x32_c, 32, 12)
 #if !CONFIG_ALT_INTRA
-                          ,
-                      IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
-                                    &aom_highbd_tm_predictor_4x4_c, 4, 12),
-                      IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
-                                    &aom_highbd_tm_predictor_8x8_c, 8, 12)
+      ,
+  IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
+                &aom_highbd_tm_predictor_4x4_c, 4, 12),
+  IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
+                &aom_highbd_tm_predictor_8x8_c, 8, 12)
 #endif  // !CONFIG_ALT_INTRA
-                          ));
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, AV1IntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVector12));
 
 #endif  // CONFIG_HIGHBITDEPTH
 #endif  // HAVE_SSE2
diff --git a/third_party/aom/test/masked_sad_test.cc b/third_party/aom/test/masked_sad_test.cc
index 53f85eef7..2dde3c537 100644
--- a/third_party/aom/test/masked_sad_test.cc
+++ b/third_party/aom/test/masked_sad_test.cc
@@ -25,11 +25,13 @@
 using libaom_test::ACMRandom;
 
 namespace {
-const int number_of_iterations = 500;
+const int number_of_iterations = 200;
 
-typedef unsigned int (*MaskedSADFunc)(const uint8_t *a, int a_stride,
-                                      const uint8_t *b, int b_stride,
-                                      const uint8_t *m, int m_stride);
+typedef unsigned int (*MaskedSADFunc)(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride,
+                                      const uint8_t *second_pred,
+                                      const uint8_t *msk, int msk_stride,
+                                      int invert_mask);
 typedef std::tr1::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam;
 
 class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
@@ -52,6 +54,7 @@ TEST_P(MaskedSADTest, OperationCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
   DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
   DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
   int err_count = 0;
   int first_failure = -1;
@@ -62,18 +65,23 @@ TEST_P(MaskedSADTest, OperationCheck) {
     for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
       src_ptr[j] = rnd.Rand8();
       ref_ptr[j] = rnd.Rand8();
+      second_pred_ptr[j] = rnd.Rand8();
       msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
       assert(msk_ptr[j] <= 64);
     }
 
-    ref_ret = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
-                                msk_ptr, msk_stride);
-    ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride, ref_ptr,
-                                                 ref_stride, msk_ptr,
-                                                 msk_stride));
-    if (ret != ref_ret) {
-      err_count++;
-      if (first_failure == -1) first_failure = i;
+    for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+      ref_ret =
+          ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
+                            second_pred_ptr, msk_ptr, msk_stride, invert_mask);
+      ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride, ref_ptr,
+                                                   ref_stride, second_pred_ptr,
+                                                   msk_ptr, msk_stride,
+                                                   invert_mask));
+      if (ret != ref_ret) {
+        err_count++;
+        if (first_failure == -1) first_failure = i;
+      }
     }
   }
   EXPECT_EQ(0, err_count)
@@ -82,9 +90,11 @@ TEST_P(MaskedSADTest, OperationCheck) {
 }
 
 #if CONFIG_HIGHBITDEPTH
-typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *a, int a_stride,
-                                            const uint8_t *b, int b_stride,
-                                            const uint8_t *m, int m_stride);
+typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *src, int src_stride,
+                                            const uint8_t *ref, int ref_stride,
+                                            const uint8_t *second_pred,
+                                            const uint8_t *msk, int msk_stride,
+                                            int invert_mask);
 typedef std::tr1::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc>
     HighbdMaskedSADParam;
 
@@ -109,9 +119,11 @@ TEST_P(HighbdMaskedSADTest, OperationCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
   DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint16_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
   DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
   uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
   uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
   int err_count = 0;
   int first_failure = -1;
   int src_stride = MAX_SB_SIZE;
@@ -121,17 +133,22 @@ TEST_P(HighbdMaskedSADTest, OperationCheck) {
     for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
       src_ptr[j] = rnd.Rand16() & 0xfff;
       ref_ptr[j] = rnd.Rand16() & 0xfff;
+      second_pred_ptr[j] = rnd.Rand16() & 0xfff;
       msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
     }
 
-    ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
-                                msk_ptr, msk_stride);
-    ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride, ref8_ptr,
-                                                 ref_stride, msk_ptr,
-                                                 msk_stride));
-    if (ret != ref_ret) {
-      err_count++;
-      if (first_failure == -1) first_failure = i;
+    for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+      ref_ret =
+          ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
+                            second_pred8_ptr, msk_ptr, msk_stride, invert_mask);
+      ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride,
+                                                   ref8_ptr, ref_stride,
+                                                   second_pred8_ptr, msk_ptr,
+                                                   msk_stride, invert_mask));
+      if (ret != ref_ret) {
+        err_count++;
+        if (first_failure == -1) first_failure = i;
+      }
     }
   }
   EXPECT_EQ(0, err_count)
diff --git a/third_party/aom/test/masked_variance_test.cc b/third_party/aom/test/masked_variance_test.cc
index 65e852aea..bf113c69e 100644
--- a/third_party/aom/test/masked_variance_test.cc
+++ b/third_party/aom/test/masked_variance_test.cc
@@ -29,107 +29,12 @@
 using libaom_test::ACMRandom;
 
 namespace {
-const int number_of_iterations = 500;
-
-typedef unsigned int (*MaskedVarianceFunc)(const uint8_t *a, int a_stride,
-                                           const uint8_t *b, int b_stride,
-                                           const uint8_t *m, int m_stride,
-                                           unsigned int *sse);
-
-typedef std::tr1::tuple<MaskedVarianceFunc, MaskedVarianceFunc>
-    MaskedVarianceParam;
-
-class MaskedVarianceTest
-    : public ::testing::TestWithParam<MaskedVarianceParam> {
- public:
-  virtual ~MaskedVarianceTest() {}
-  virtual void SetUp() {
-    opt_func_ = GET_PARAM(0);
-    ref_func_ = GET_PARAM(1);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  MaskedVarianceFunc opt_func_;
-  MaskedVarianceFunc ref_func_;
-};
-
-TEST_P(MaskedVarianceTest, OperationCheck) {
-  unsigned int ref_ret, opt_ret;
-  unsigned int ref_sse, opt_sse;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  int err_count = 0;
-  int first_failure = -1;
-  int src_stride = MAX_SB_SIZE;
-  int ref_stride = MAX_SB_SIZE;
-  int msk_stride = MAX_SB_SIZE;
-
-  for (int i = 0; i < number_of_iterations; ++i) {
-    for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
-      src_ptr[j] = rnd.Rand8();
-      ref_ptr[j] = rnd.Rand8();
-      msk_ptr[j] = rnd(65);
-    }
-
-    ref_ret = ref_func_(src_ptr, src_stride, ref_ptr, ref_stride, msk_ptr,
-                        msk_stride, &ref_sse);
-    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride, ref_ptr,
-                                                 ref_stride, msk_ptr,
-                                                 msk_stride, &opt_sse));
-
-    if (opt_ret != ref_ret || opt_sse != ref_sse) {
-      err_count++;
-      if (first_failure == -1) first_failure = i;
-    }
-  }
-
-  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test OperationCheck,"
-                          << "C output doesn't match SSSE3 output. "
-                          << "First failed at test case " << first_failure;
-}
-
-TEST_P(MaskedVarianceTest, ExtremeValues) {
-  unsigned int ref_ret, opt_ret;
-  unsigned int ref_sse, opt_sse;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  int err_count = 0;
-  int first_failure = -1;
-  int src_stride = MAX_SB_SIZE;
-  int ref_stride = MAX_SB_SIZE;
-  int msk_stride = MAX_SB_SIZE;
-
-  for (int i = 0; i < 8; ++i) {
-    memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SB_SIZE * MAX_SB_SIZE);
-    memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SB_SIZE * MAX_SB_SIZE);
-    memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE * MAX_SB_SIZE);
-
-    ref_ret = ref_func_(src_ptr, src_stride, ref_ptr, ref_stride, msk_ptr,
-                        msk_stride, &ref_sse);
-    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride, ref_ptr,
-                                                 ref_stride, msk_ptr,
-                                                 msk_stride, &opt_sse));
-
-    if (opt_ret != ref_ret || opt_sse != ref_sse) {
-      err_count++;
-      if (first_failure == -1) first_failure = i;
-    }
-  }
-
-  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
-                          << "C output doesn't match SSSE3 output. "
-                          << "First failed at test case " << first_failure;
-}
+const int number_of_iterations = 200;
 
 typedef unsigned int (*MaskedSubPixelVarianceFunc)(
-    const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
-    int b_stride, const uint8_t *m, int m_stride, unsigned int *sse);
+    const uint8_t *src, int src_stride, int xoffset, int yoffset,
+    const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
+    const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
 
 typedef std::tr1::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc>
     MaskedSubPixelVarianceParam;
@@ -154,9 +59,18 @@ TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
   unsigned int ref_ret, opt_ret;
   unsigned int ref_sse, opt_sse;
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
-  DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
-  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  // Note: We pad the input arrays out with 15 extra elements, since the SSE
+  // implementations can read up to 15 elements off the end of the main data.
+  // The extra data is never actually used, but it simplifies the code
+  // if we can do this.
+  DECLARE_ALIGNED(16, uint8_t,
+                  src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
   int err_count = 0;
   int first_failure = -1;
   int src_stride = (MAX_SB_SIZE + 1);
@@ -171,23 +85,26 @@ TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
     for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) {
       src_ptr[j] = rnd.Rand8();
       ref_ptr[j] = rnd.Rand8();
+      second_pred_ptr[j] = rnd.Rand8();
       msk_ptr[j] = rnd(65);
     }
     for (int k = 0; k < 3; k++) {
-      xoffset = xoffsets[k];
       for (int l = 0; l < 3; l++) {
         xoffset = xoffsets[k];
         yoffset = yoffsets[l];
-
-        ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
-                            ref_stride, msk_ptr, msk_stride, &ref_sse);
-        ASM_REGISTER_STATE_CHECK(
-            opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
-                                ref_stride, msk_ptr, msk_stride, &opt_sse));
-
-        if (opt_ret != ref_ret || opt_sse != ref_sse) {
-          err_count++;
-          if (first_failure == -1) first_failure = i;
+        for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+          ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
+                              ref_stride, second_pred_ptr, msk_ptr, msk_stride,
+                              invert_mask, &ref_sse);
+          ASM_REGISTER_STATE_CHECK(
+              opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset,
+                                  ref_ptr, ref_stride, second_pred_ptr, msk_ptr,
+                                  msk_stride, invert_mask, &opt_sse));
+
+          if (opt_ret != ref_ret || opt_sse != ref_sse) {
+            err_count++;
+            if (first_failure == -1) first_failure = i;
+          }
         }
       }
     }
@@ -203,9 +120,14 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
   unsigned int ref_ret, opt_ret;
   unsigned int ref_sse, opt_sse;
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
-  DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
-  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
   int first_failure_x = -1;
   int first_failure_y = -1;
   int err_count = 0;
@@ -216,26 +138,32 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
 
   for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
     for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
-      for (int i = 0; i < 8; ++i) {
+      for (int i = 0; i < 16; ++i) {
         memset(src_ptr, (i & 0x1) ? 255 : 0,
                (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
         memset(ref_ptr, (i & 0x2) ? 255 : 0,
                (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
-        memset(msk_ptr, (i & 0x4) ? 64 : 0,
+        memset(second_pred_ptr, (i & 0x4) ? 255 : 0,
+               (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
+        memset(msk_ptr, (i & 0x8) ? 64 : 0,
                (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
 
-        ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
-                            ref_stride, msk_ptr, msk_stride, &ref_sse);
-        ASM_REGISTER_STATE_CHECK(
-            opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
-                                ref_stride, msk_ptr, msk_stride, &opt_sse));
-
-        if (opt_ret != ref_ret || opt_sse != ref_sse) {
-          err_count++;
-          if (first_failure == -1) {
-            first_failure = i;
-            first_failure_x = xoffset;
-            first_failure_y = yoffset;
+        for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+          ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
+                              ref_stride, second_pred_ptr, msk_ptr, msk_stride,
+                              invert_mask, &ref_sse);
+          ASM_REGISTER_STATE_CHECK(
+              opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset,
+                                  ref_ptr, ref_stride, second_pred_ptr, msk_ptr,
+                                  msk_stride, invert_mask, &opt_sse));
+
+          if (opt_ret != ref_ret || opt_sse != ref_sse) {
+            err_count++;
+            if (first_failure == -1) {
+              first_failure = i;
+              first_failure_x = xoffset;
+              first_failure_y = yoffset;
+            }
           }
         }
       }
@@ -250,105 +178,6 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
 }
 
 #if CONFIG_HIGHBITDEPTH
-typedef std::tr1::tuple<MaskedVarianceFunc, MaskedVarianceFunc, aom_bit_depth_t>
-    HighbdMaskedVarianceParam;
-
-class HighbdMaskedVarianceTest
-    : public ::testing::TestWithParam<HighbdMaskedVarianceParam> {
- public:
-  virtual ~HighbdMaskedVarianceTest() {}
-  virtual void SetUp() {
-    opt_func_ = GET_PARAM(0);
-    ref_func_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  MaskedVarianceFunc opt_func_;
-  MaskedVarianceFunc ref_func_;
-  aom_bit_depth_t bit_depth_;
-};
-
-TEST_P(HighbdMaskedVarianceTest, OperationCheck) {
-  unsigned int ref_ret, opt_ret;
-  unsigned int ref_sse, opt_sse;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
-  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
-  int err_count = 0;
-  int first_failure = -1;
-  int src_stride = MAX_SB_SIZE;
-  int ref_stride = MAX_SB_SIZE;
-  int msk_stride = MAX_SB_SIZE;
-
-  for (int i = 0; i < number_of_iterations; ++i) {
-    for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
-      src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
-      ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
-      msk_ptr[j] = rnd(65);
-    }
-
-    ref_ret = ref_func_(src8_ptr, src_stride, ref8_ptr, ref_stride, msk_ptr,
-                        msk_stride, &ref_sse);
-    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride, ref8_ptr,
-                                                 ref_stride, msk_ptr,
-                                                 msk_stride, &opt_sse));
-
-    if (opt_ret != ref_ret || opt_sse != ref_sse) {
-      err_count++;
-      if (first_failure == -1) first_failure = i;
-    }
-  }
-
-  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test OperationCheck,"
-                          << "C output doesn't match SSSE3 output. "
-                          << "First failed at test case " << first_failure;
-}
-
-TEST_P(HighbdMaskedVarianceTest, ExtremeValues) {
-  unsigned int ref_ret, opt_ret;
-  unsigned int ref_sse, opt_sse;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
-  uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
-  uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
-  int err_count = 0;
-  int first_failure = -1;
-  int src_stride = MAX_SB_SIZE;
-  int ref_stride = MAX_SB_SIZE;
-  int msk_stride = MAX_SB_SIZE;
-
-  for (int i = 0; i < 8; ++i) {
-    aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
-                 MAX_SB_SIZE * MAX_SB_SIZE);
-    aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
-                 MAX_SB_SIZE * MAX_SB_SIZE);
-    memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE * MAX_SB_SIZE);
-
-    ref_ret = ref_func_(src8_ptr, src_stride, ref8_ptr, ref_stride, msk_ptr,
-                        msk_stride, &ref_sse);
-    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride, ref8_ptr,
-                                                 ref_stride, msk_ptr,
-                                                 msk_stride, &opt_sse));
-
-    if (opt_ret != ref_ret || opt_sse != ref_sse) {
-      err_count++;
-      if (first_failure == -1) first_failure = i;
-    }
-  }
-
-  EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
-                          << "C output doesn't match SSSE3 output. "
-                          << "First failed at test case " << first_failure;
-}
-
 typedef std::tr1::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc,
                         aom_bit_depth_t>
     HighbdMaskedSubPixelVarianceParam;
@@ -375,11 +204,21 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
   unsigned int ref_ret, opt_ret;
   unsigned int ref_sse, opt_sse;
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
-  DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
-  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  // Note: We pad the input arrays out with 7 extra elements, since the SSE
+  // implementations can read up to 7 elements off the end of the main data.
+  // The extra data is never actually used, but it simplifies the code
+  // if we can do this.
+  DECLARE_ALIGNED(16, uint16_t,
+                  src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
+  DECLARE_ALIGNED(16, uint16_t,
+                  ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
+  DECLARE_ALIGNED(16, uint16_t,
+                  second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
   uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
   uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
   int err_count = 0;
   int first_failure = -1;
   int first_failure_x = -1;
@@ -390,27 +229,30 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
   int xoffset, yoffset;
 
   for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) {
+      src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      second_pred_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      msk_ptr[j] = rnd(65);
+    }
     for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
       for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
-        for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) {
-          src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
-          ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
-          msk_ptr[j] = rnd(65);
-        }
-
-        ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
-                            ref_stride, msk_ptr, msk_stride, &ref_sse);
-        ASM_REGISTER_STATE_CHECK(opt_ret =
-                                     opt_func_(src8_ptr, src_stride, xoffset,
-                                               yoffset, ref8_ptr, ref_stride,
-                                               msk_ptr, msk_stride, &opt_sse));
-
-        if (opt_ret != ref_ret || opt_sse != ref_sse) {
-          err_count++;
-          if (first_failure == -1) {
-            first_failure = i;
-            first_failure_x = xoffset;
-            first_failure_y = yoffset;
+        for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+          ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
+                              ref_stride, second_pred8_ptr, msk_ptr, msk_stride,
+                              invert_mask, &ref_sse);
+          ASM_REGISTER_STATE_CHECK(
+              opt_ret = opt_func_(src8_ptr, src_stride, xoffset, yoffset,
+                                  ref8_ptr, ref_stride, second_pred8_ptr,
+                                  msk_ptr, msk_stride, invert_mask, &opt_sse));
+
+          if (opt_ret != ref_ret || opt_sse != ref_sse) {
+            err_count++;
+            if (first_failure == -1) {
+              first_failure = i;
+              first_failure_x = xoffset;
+              first_failure_y = yoffset;
+            }
           }
         }
       }
@@ -428,11 +270,17 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
   unsigned int ref_ret, opt_ret;
   unsigned int ref_sse, opt_sse;
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
-  DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
-  DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1)]);
+  DECLARE_ALIGNED(16, uint16_t,
+                  src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
+  DECLARE_ALIGNED(16, uint16_t,
+                  ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
+  DECLARE_ALIGNED(16, uint8_t,
+                  msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
+  DECLARE_ALIGNED(16, uint16_t,
+                  second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
   uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
   uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
   int first_failure_x = -1;
   int first_failure_y = -1;
   int err_count = 0;
@@ -443,27 +291,32 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
 
   for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
     for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
-      for (int i = 0; i < 8; ++i) {
+      for (int i = 0; i < 16; ++i) {
         aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
                      (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
         aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
                      (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
-        memset(msk_ptr, (i & 0x4) ? 64 : 0,
+        aom_memset16(second_pred_ptr, (i & 0x4) ? ((1 << bit_depth_) - 1) : 0,
+                     (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
+        memset(msk_ptr, (i & 0x8) ? 64 : 0,
                (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
 
-        ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
-                            ref_stride, msk_ptr, msk_stride, &ref_sse);
-        ASM_REGISTER_STATE_CHECK(opt_ret =
-                                     opt_func_(src8_ptr, src_stride, xoffset,
-                                               yoffset, ref8_ptr, ref_stride,
-                                               msk_ptr, msk_stride, &opt_sse));
-
-        if (opt_ret != ref_ret || opt_sse != ref_sse) {
-          err_count++;
-          if (first_failure == -1) {
-            first_failure = i;
-            first_failure_x = xoffset;
-            first_failure_y = yoffset;
+        for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
+          ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
+                              ref_stride, second_pred8_ptr, msk_ptr, msk_stride,
+                              invert_mask, &ref_sse);
+          ASM_REGISTER_STATE_CHECK(
+              opt_ret = opt_func_(src8_ptr, src_stride, xoffset, yoffset,
+                                  ref8_ptr, ref_stride, second_pred8_ptr,
+                                  msk_ptr, msk_stride, invert_mask, &opt_sse));
+
+          if (opt_ret != ref_ret || opt_sse != ref_sse) {
+            err_count++;
+            if (first_failure == -1) {
+              first_failure = i;
+              first_failure_x = xoffset;
+              first_failure_y = yoffset;
+            }
           }
         }
       }
@@ -482,38 +335,6 @@ using std::tr1::make_tuple;
 
 #if HAVE_SSSE3
 INSTANTIATE_TEST_CASE_P(
-    SSSE3_C_COMPARE, MaskedVarianceTest,
-    ::testing::Values(
-#if CONFIG_EXT_PARTITION
-        make_tuple(&aom_masked_variance128x128_ssse3,
-                   &aom_masked_variance128x128_c),
-        make_tuple(&aom_masked_variance128x64_ssse3,
-                   &aom_masked_variance128x64_c),
-        make_tuple(&aom_masked_variance64x128_ssse3,
-                   &aom_masked_variance64x128_c),
-#endif  // CONFIG_EXT_PARTITION
-        make_tuple(&aom_masked_variance64x64_ssse3,
-                   &aom_masked_variance64x64_c),
-        make_tuple(&aom_masked_variance64x32_ssse3,
-                   &aom_masked_variance64x32_c),
-        make_tuple(&aom_masked_variance32x64_ssse3,
-                   &aom_masked_variance32x64_c),
-        make_tuple(&aom_masked_variance32x32_ssse3,
-                   &aom_masked_variance32x32_c),
-        make_tuple(&aom_masked_variance32x16_ssse3,
-                   &aom_masked_variance32x16_c),
-        make_tuple(&aom_masked_variance16x32_ssse3,
-                   &aom_masked_variance16x32_c),
-        make_tuple(&aom_masked_variance16x16_ssse3,
-                   &aom_masked_variance16x16_c),
-        make_tuple(&aom_masked_variance16x8_ssse3, &aom_masked_variance16x8_c),
-        make_tuple(&aom_masked_variance8x16_ssse3, &aom_masked_variance8x16_c),
-        make_tuple(&aom_masked_variance8x8_ssse3, &aom_masked_variance8x8_c),
-        make_tuple(&aom_masked_variance8x4_ssse3, &aom_masked_variance8x4_c),
-        make_tuple(&aom_masked_variance4x8_ssse3, &aom_masked_variance4x8_c),
-        make_tuple(&aom_masked_variance4x4_ssse3, &aom_masked_variance4x4_c)));
-
-INSTANTIATE_TEST_CASE_P(
     SSSE3_C_COMPARE, MaskedSubPixelVarianceTest,
     ::testing::Values(
 #if CONFIG_EXT_PARTITION
@@ -553,148 +374,43 @@ INSTANTIATE_TEST_CASE_P(
 
 #if CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSSE3_C_COMPARE, HighbdMaskedVarianceTest,
-    ::testing::Values(
-#if CONFIG_EXT_PARTITION
-        make_tuple(&aom_highbd_masked_variance128x128_ssse3,
-                   &aom_highbd_masked_variance128x128_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance128x64_ssse3,
-                   &aom_highbd_masked_variance128x64_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance64x128_ssse3,
-                   &aom_highbd_masked_variance64x128_c, AOM_BITS_8),
-#endif  // CONFIG_EXT_PARTITION
-        make_tuple(&aom_highbd_masked_variance64x64_ssse3,
-                   &aom_highbd_masked_variance64x64_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance64x32_ssse3,
-                   &aom_highbd_masked_variance64x32_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance32x64_ssse3,
-                   &aom_highbd_masked_variance32x64_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance32x32_ssse3,
-                   &aom_highbd_masked_variance32x32_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance32x16_ssse3,
-                   &aom_highbd_masked_variance32x16_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance16x32_ssse3,
-                   &aom_highbd_masked_variance16x32_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance16x16_ssse3,
-                   &aom_highbd_masked_variance16x16_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance16x8_ssse3,
-                   &aom_highbd_masked_variance16x8_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance8x16_ssse3,
-                   &aom_highbd_masked_variance8x16_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance8x8_ssse3,
-                   &aom_highbd_masked_variance8x8_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance8x4_ssse3,
-                   &aom_highbd_masked_variance8x4_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance4x8_ssse3,
-                   &aom_highbd_masked_variance4x8_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_variance4x4_ssse3,
-                   &aom_highbd_masked_variance4x4_c, AOM_BITS_8),
-#if CONFIG_EXT_PARTITION
-        make_tuple(&aom_highbd_10_masked_variance128x128_ssse3,
-                   &aom_highbd_10_masked_variance128x128_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance128x64_ssse3,
-                   &aom_highbd_10_masked_variance128x64_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance64x128_ssse3,
-                   &aom_highbd_10_masked_variance64x128_c, AOM_BITS_10),
-#endif  // CONFIG_EXT_PARTITION
-        make_tuple(&aom_highbd_10_masked_variance64x64_ssse3,
-                   &aom_highbd_10_masked_variance64x64_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance64x32_ssse3,
-                   &aom_highbd_10_masked_variance64x32_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance32x64_ssse3,
-                   &aom_highbd_10_masked_variance32x64_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance32x32_ssse3,
-                   &aom_highbd_10_masked_variance32x32_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance32x16_ssse3,
-                   &aom_highbd_10_masked_variance32x16_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance16x32_ssse3,
-                   &aom_highbd_10_masked_variance16x32_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance16x16_ssse3,
-                   &aom_highbd_10_masked_variance16x16_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance16x8_ssse3,
-                   &aom_highbd_10_masked_variance16x8_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance8x16_ssse3,
-                   &aom_highbd_10_masked_variance8x16_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance8x8_ssse3,
-                   &aom_highbd_10_masked_variance8x8_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance8x4_ssse3,
-                   &aom_highbd_10_masked_variance8x4_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance4x8_ssse3,
-                   &aom_highbd_10_masked_variance4x8_c, AOM_BITS_10),
-        make_tuple(&aom_highbd_10_masked_variance4x4_ssse3,
-                   &aom_highbd_10_masked_variance4x4_c, AOM_BITS_10),
-#if CONFIG_EXT_PARTITION
-        make_tuple(&aom_highbd_12_masked_variance128x128_ssse3,
-                   &aom_highbd_12_masked_variance128x128_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance128x64_ssse3,
-                   &aom_highbd_12_masked_variance128x64_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance64x128_ssse3,
-                   &aom_highbd_12_masked_variance64x128_c, AOM_BITS_12),
-#endif  // CONFIG_EXT_PARTITION
-        make_tuple(&aom_highbd_12_masked_variance64x64_ssse3,
-                   &aom_highbd_12_masked_variance64x64_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance64x32_ssse3,
-                   &aom_highbd_12_masked_variance64x32_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance32x64_ssse3,
-                   &aom_highbd_12_masked_variance32x64_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance32x32_ssse3,
-                   &aom_highbd_12_masked_variance32x32_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance32x16_ssse3,
-                   &aom_highbd_12_masked_variance32x16_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance16x32_ssse3,
-                   &aom_highbd_12_masked_variance16x32_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance16x16_ssse3,
-                   &aom_highbd_12_masked_variance16x16_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance16x8_ssse3,
-                   &aom_highbd_12_masked_variance16x8_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance8x16_ssse3,
-                   &aom_highbd_12_masked_variance8x16_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance8x8_ssse3,
-                   &aom_highbd_12_masked_variance8x8_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance8x4_ssse3,
-                   &aom_highbd_12_masked_variance8x4_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance4x8_ssse3,
-                   &aom_highbd_12_masked_variance4x8_c, AOM_BITS_12),
-        make_tuple(&aom_highbd_12_masked_variance4x4_ssse3,
-                   &aom_highbd_12_masked_variance4x4_c, AOM_BITS_12)));
-
-INSTANTIATE_TEST_CASE_P(
     SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
     ::testing::Values(
 #if CONFIG_EXT_PARTITION
-        make_tuple(&aom_highbd_masked_sub_pixel_variance128x128_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance128x128_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance128x64_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance128x64_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance64x128_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance64x128_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x128_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance128x128_c,
+                   AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x64_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance128x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x128_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance64x128_c, AOM_BITS_8),
 #endif  // CONFIG_EXT_PARTITION
-        make_tuple(&aom_highbd_masked_sub_pixel_variance64x64_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance64x64_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance64x32_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance64x32_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance32x64_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance32x64_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance32x32_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance32x32_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance32x16_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance32x16_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance16x32_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance16x32_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance16x16_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance16x16_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance16x8_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance16x8_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance8x16_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance8x16_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance8x8_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance8x8_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance8x4_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance8x4_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance4x8_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance4x8_c, AOM_BITS_8),
-        make_tuple(&aom_highbd_masked_sub_pixel_variance4x4_ssse3,
-                   &aom_highbd_masked_sub_pixel_variance4x4_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x64_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance64x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x32_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance64x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x64_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance32x64_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x32_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance32x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x16_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance32x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x32_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance16x32_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x16_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance16x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x8_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance16x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x16_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance8x16_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x8_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance8x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x4_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance8x4_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x8_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance4x8_c, AOM_BITS_8),
+        make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x4_ssse3,
+                   &aom_highbd_8_masked_sub_pixel_variance4x4_c, AOM_BITS_8),
 #if CONFIG_EXT_PARTITION
         make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_ssse3,
                    &aom_highbd_10_masked_sub_pixel_variance128x128_c,
diff --git a/third_party/aom/test/motion_vector_test.cc b/third_party/aom/test/motion_vector_test.cc
index 403a8f1a7..fa47494e8 100644
--- a/third_party/aom/test/motion_vector_test.cc
+++ b/third_party/aom/test/motion_vector_test.cc
@@ -17,12 +17,6 @@
 #include "test/yuv_video_source.h"
 
 namespace {
-#if defined(__has_feature)
-#if __has_feature(address_sanitizer)
-#define BUILDING_WITH_ASAN
-#endif
-#endif
-
 #define MAX_EXTREME_MV 1
 #define MIN_EXTREME_MV 2
 
@@ -32,7 +26,7 @@ const libaom_test::TestMode kEncodingModeVectors[] = {
 };
 
 // Encoding speeds
-const int kCpuUsedVectors[] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+const int kCpuUsedVectors[] = { 1, 5 };
 
 // MV test modes: 1 - always use maximum MV; 2 - always use minimum MV.
 const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV };
@@ -85,16 +79,11 @@ TEST_P(MotionVectorTestLarge, OverallTest) {
   int width = 3840;
   int height = 2160;
 
-#ifdef BUILDING_WITH_ASAN
-  // On the 32-bit system, if using 4k test clip, an "out of memory" error
-  // occurs because of the AddressSanitizer instrumentation memory overhead.
-  // Here, reduce the test clip's resolution while testing on 32-bit system
-  // and AddressSanitizer is enabled.
+  // Reduce the test clip's resolution while testing on 32-bit system.
   if (sizeof(void *) == 4) {
     width = 2048;
     height = 1080;
   }
-#endif
 
   cfg_.rc_target_bitrate = 24000;
   cfg_.g_profile = 0;
@@ -102,7 +91,7 @@ TEST_P(MotionVectorTestLarge, OverallTest) {
 
   testing::internal::scoped_ptr<libaom_test::VideoSource> video;
   video.reset(new libaom_test::YUVVideoSource(
-      "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, width, height, 30, 1, 0, 5));
+      "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, width, height, 30, 1, 0, 3));
 
   ASSERT_TRUE(video.get() != NULL);
   ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
diff --git a/third_party/aom/test/partial_idct_test.cc b/third_party/aom/test/partial_idct_test.cc
index 0899b60c3..033f18294 100644
--- a/third_party/aom/test/partial_idct_test.cc
+++ b/third_party/aom/test/partial_idct_test.cc
@@ -52,7 +52,7 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc,
                         TX_SIZE, int, int, int>
     PartialInvTxfmParam;
 const int kMaxNumCoeffs = 1024;
-const int kCountTestBlock = 1000;
+const int kCountTestBlock = 10000;
 
 class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
  public:
@@ -231,8 +231,8 @@ TEST_P(PartialIDctTest, AddOutputBlock) {
 }
 
 TEST_P(PartialIDctTest, SingleExtremeCoeff) {
-  const int16_t max_coeff = std::numeric_limits<int16_t>::max();
-  const int16_t min_coeff = std::numeric_limits<int16_t>::min();
+  const int16_t max_coeff = INT16_MAX;
+  const int16_t min_coeff = INT16_MIN;
   for (int i = 0; i < last_nonzero_; ++i) {
     memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
     // Run once for min and once for max.
@@ -418,6 +418,30 @@ INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest,
                         ::testing::ValuesIn(ssse3_partial_idct_tests));
 #endif  // HAVE_SSSE3
 
+#if HAVE_AVX2
+const PartialInvTxfmParam avx2_partial_idct_tests[] = {
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_256_add_avx2>, TX_16X16, 256, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_38_add_avx2>, TX_16X16, 38, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_10_add_avx2>, TX_16X16, 10, 8, 1),
+  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
+             &wrapper<aom_idct16x16_1_add_avx2>, TX_16X16, 1, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1024_add_avx2>, TX_32X32, 1024, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_135_add_avx2>, TX_32X32, 135, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_34_add_avx2>, TX_32X32, 34, 8, 1),
+  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
+             &wrapper<aom_idct32x32_1_add_avx2>, TX_32X32, 1, 8, 1),
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, PartialIDctTest,
+                        ::testing::ValuesIn(avx2_partial_idct_tests));
+#endif  // HAVE_AVX2
+
 #if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH
 const PartialInvTxfmParam dspr2_partial_idct_tests[] = {
   make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
diff --git a/third_party/aom/test/quantize_func_test.cc b/third_party/aom/test/quantize_func_test.cc
new file mode 100644
index 000000000..32b1d5139
--- /dev/null
+++ b/third_party/aom/test/quantize_func_test.cc
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "aom/aom_codec.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/av1_quantize.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+using libaom_test::ACMRandom;
+
+#if !CONFIG_AOM_QM
+typedef void (*QuantizeFunc)(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                             int skip_block, const int16_t *zbin_ptr,
+                             const int16_t *round_ptr, const int16_t *quant_ptr,
+                             const int16_t *quant_shift_ptr,
+                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                             const int16_t *scan, const int16_t *iscan);
+#else
+typedef void (*QuantizeFunc)(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                             int skip_block, const int16_t *zbin_ptr,
+                             const int16_t *round_ptr, const int16_t *quant_ptr,
+                             const int16_t *quant_shift_ptr,
+                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                             const int16_t *scan, const int16_t *iscan,
+                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
+#endif
+
+typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, aom_bit_depth_t>
+    QuantizeParam;
+
+typedef struct {
+  QUANTS quant;
+  Dequants dequant;
+} QuanTable;
+
+const int kTestNum = 1000;
+
+class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
+ protected:
+  QuantizeTest()
+      : quant_ref_(GET_PARAM(0)), quant_(GET_PARAM(1)), tx_size_(GET_PARAM(2)),
+        bd_(GET_PARAM(3)) {}
+
+  virtual ~QuantizeTest() {}
+
+  virtual void SetUp() {
+    qtab_ = reinterpret_cast<QuanTable *>(aom_memalign(16, sizeof(*qtab_)));
+    const int n_coeffs = getCoeffNum();
+    coeff_ = reinterpret_cast<tran_low_t *>(
+        aom_memalign(16, 6 * n_coeffs * sizeof(tran_low_t)));
+    InitQuantizer();
+  }
+
+  virtual void TearDown() {
+    aom_free(qtab_);
+    qtab_ = NULL;
+    aom_free(coeff_);
+    coeff_ = NULL;
+    libaom_test::ClearSystemState();
+  }
+
+  void InitQuantizer() {
+    av1_build_quantizer(bd_, 0, 0, 0, &qtab_->quant, &qtab_->dequant);
+  }
+
+  void QuantizeRun(bool isLoop, int q = 0, int testNum = 1) {
+    tran_low_t *coeff_ptr = coeff_;
+    const intptr_t n_coeffs = getCoeffNum();
+    const int skip_block = 0;
+
+    tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
+    tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
+
+    tran_low_t *qcoeff = dqcoeff_ref + n_coeffs;
+    tran_low_t *dqcoeff = qcoeff + n_coeffs;
+    uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
+
+    // Testing uses 2-D DCT scan order table
+    const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT, 0);
+
+    // Testing uses luminance quantization table
+    const int16_t *zbin = qtab_->quant.y_zbin[q];
+    const int16_t *round_fp = qtab_->quant.y_round_fp[q];
+    const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];
+    const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
+    const int16_t *dequant = qtab_->dequant.y_dequant[q];
+    const size_t bufferSize = n_coeffs;
+
+    int i = 0;
+    while (i < testNum) {
+      if (isLoop) FillCoeffRandom();
+
+      memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref));
+
+      quant_ref_(coeff_ptr, n_coeffs, skip_block, zbin, round_fp, quant_fp,
+                 quant_shift, qcoeff_ref, dqcoeff_ref, dequant, &eob[0],
+                 sc->scan, sc->iscan);
+
+      ASM_REGISTER_STATE_CHECK(quant_(
+          coeff_ptr, n_coeffs, skip_block, zbin, round_fp, quant_fp,
+          quant_shift, qcoeff, dqcoeff, dequant, &eob[1], sc->scan, sc->iscan));
+
+      CompareResults(qcoeff_ref, qcoeff, bufferSize, "Qcoeff", q, i);
+      CompareResults(dqcoeff_ref, dqcoeff, bufferSize, "Dqcoeff", q, i);
+      ASSERT_EQ(eob[0], eob[1]) << "eobs mismatch on test: " << i;
+
+      i++;
+    }
+  }
+
+  void CompareResults(const tran_low_t *buf_ref, const tran_low_t *buf,
+                      int size, const char *text, int q, int number) {
+    int i;
+    for (i = 0; i < size; ++i) {
+      ASSERT_EQ(buf_ref[i], buf[i]) << text << " mismatch on test: " << number
+                                    << " at position: " << i << " Q: " << q;
+    }
+  }
+
+  int getCoeffNum() { return tx_size_2d[tx_size_]; }
+
+  void FillCoeffGeneric(bool isConstant, tran_low_t c = 0) {
+    const int n_coeffs = getCoeffNum();
+    int i;
+    if (isConstant) {
+      for (i = 0; i < n_coeffs; ++i) {
+        coeff_[i] = c;
+      }
+    } else {
+      FillCoeffZero();
+      int num = rnd_.Rand16() % n_coeffs;
+      for (i = 0; i < num; ++i) {
+        coeff_[i] = GetRandomCoeff();
+      }
+    }
+  }
+
+  void FillCoeffZero() { FillCoeffGeneric(true); }
+
+  void FillCoeffConstant() {
+    tran_low_t c = GetRandomCoeff();
+    FillCoeffGeneric(true, c);
+  }
+
+  void FillDcOnly() {
+    FillCoeffZero();
+    coeff_[0] = GetRandomCoeff();
+  }
+
+  void FillDcLargeNegative() {
+    FillCoeffZero();
+    // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
+    // like BUG=883 where the constant being compared was incorrectly
+    // initialized.
+    coeff_[0] = -8191;
+  }
+
+  void FillCoeffRandom() { FillCoeffGeneric(false); }
+
+  tran_low_t GetRandomCoeff() {
+    return clamp((int16_t)rnd_.Rand16(), INT16_MIN + 1, INT16_MAX);
+  }
+
+  ACMRandom rnd_;
+  QuanTable *qtab_;
+  tran_low_t *coeff_;
+  QuantizeFunc quant_ref_;
+  QuantizeFunc quant_;
+  TX_SIZE tx_size_;
+  aom_bit_depth_t bd_;
+};
+
+TEST_P(QuantizeTest, ZeroInput) {
+  FillCoeffZero();
+  QuantizeRun(false);
+}
+
+TEST_P(QuantizeTest, LargeNegativeInput) {
+  FillDcLargeNegative();
+  QuantizeRun(false);
+}
+
+TEST_P(QuantizeTest, DcOnlyInput) {
+  FillDcOnly();
+  QuantizeRun(false);
+}
+
+TEST_P(QuantizeTest, RandomInput) { QuantizeRun(true, 0, kTestNum); }
+
+TEST_P(QuantizeTest, MultipleQ) {
+  for (int q = 0; q < QINDEX_RANGE; ++q) {
+    QuantizeRun(true, q, kTestNum);
+  }
+}
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+const QuantizeParam kQParamArraySSE2[] = { make_tuple(
+    &av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X16, AOM_BITS_8) };
+
+INSTANTIATE_TEST_CASE_P(SSE2, QuantizeTest,
+                        ::testing::ValuesIn(kQParamArraySSE2));
+#endif
+
+#if !CONFIG_HIGHBITDEPTH && HAVE_SSSE3 && ARCH_X86_64
+const QuantizeParam kQParamArraySSSE3[] = {
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_ssse3, TX_16X16, AOM_BITS_8),
+  // TODO(any):
+  //  The following test couldn't pass yet
+  // make_tuple(av1_quantize_fp_c, av1_quantize_fp_32x32_ssse3, TX_32X32,
+  // AOM_BITS_8)
+};
+INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest,
+                        ::testing::ValuesIn(kQParamArraySSSE3));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/sad_test.cc b/third_party/aom/test/sad_test.cc
index c3b5dac42..7564a6760 100644
--- a/third_party/aom/test/sad_test.cc
+++ b/third_party/aom/test/sad_test.cc
@@ -740,13 +740,6 @@ INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
 
 //------------------------------------------------------------------------------
 // ARM functions
-#if HAVE_MEDIA
-const SadMxNParam media_tests[] = {
-  make_tuple(16, 16, &aom_sad16x16_media, -1),
-};
-INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::ValuesIn(media_tests));
-#endif  // HAVE_MEDIA
-
 #if HAVE_NEON
 const SadMxNParam neon_tests[] = {
   make_tuple(64, 64, &aom_sad64x64_neon, -1),
diff --git a/third_party/aom/test/simd_avx2_test.cc b/third_party/aom/test/simd_avx2_test.cc
new file mode 100644
index 000000000..d54d201b9
--- /dev/null
+++ b/third_party/aom/test/simd_avx2_test.cc
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#define ARCH AVX2
+#define ARCH_POSTFIX(name) name##_avx2
+#define SIMD_NAMESPACE simd_test_avx2
+#include "./simd_impl.h"
diff --git a/third_party/aom/test/simd_cmp_avx2.cc b/third_party/aom/test/simd_cmp_avx2.cc
new file mode 100644
index 000000000..47ae11c62
--- /dev/null
+++ b/third_party/aom/test/simd_cmp_avx2.cc
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#define ARCH AVX2
+#define ARCH_POSTFIX(name) name##_avx2
+#define SIMD_NAMESPACE simd_test_avx2
+#include "./simd_cmp_impl.h"
diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h
index 28bd64a5b..4a9c1f7be 100644
--- a/third_party/aom/test/simd_cmp_impl.h
+++ b/third_party/aom/test/simd_cmp_impl.h
@@ -16,7 +16,7 @@
 #include "aom_dsp/aom_simd.h"
 #undef SIMD_INLINE
 #define SIMD_INLINE static  // Don't enforce inlining
-#include "aom_dsp/simd/v128_intrinsics_c.h"
+#include "aom_dsp/simd/v256_intrinsics_c.h"
 
 // Machine tuned code goes into this file. This file is included from
 // simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
@@ -224,6 +224,104 @@ c_v128 c_imm_v128_align(c_v128 a, c_v128 b) {
   return c_v128_align(a, b, shift);
 }
 
+template <int shift>
+v256 imm_v256_shl_n_byte(v256 a) {
+  return v256_shl_n_byte(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_byte(v256 a) {
+  return v256_shr_n_byte(a, shift);
+}
+template <int shift>
+v256 imm_v256_shl_n_8(v256 a) {
+  return v256_shl_n_8(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_u8(v256 a) {
+  return v256_shr_n_u8(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_s8(v256 a) {
+  return v256_shr_n_s8(a, shift);
+}
+template <int shift>
+v256 imm_v256_shl_n_16(v256 a) {
+  return v256_shl_n_16(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_u16(v256 a) {
+  return v256_shr_n_u16(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_s16(v256 a) {
+  return v256_shr_n_s16(a, shift);
+}
+template <int shift>
+v256 imm_v256_shl_n_32(v256 a) {
+  return v256_shl_n_32(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_u32(v256 a) {
+  return v256_shr_n_u32(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_s32(v256 a) {
+  return v256_shr_n_s32(a, shift);
+}
+template <int shift>
+v256 imm_v256_align(v256 a, v256 b) {
+  return v256_align(a, b, shift);
+}
+
+template <int shift>
+c_v256 c_imm_v256_shl_n_byte(c_v256 a) {
+  return c_v256_shl_n_byte(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_byte(c_v256 a) {
+  return c_v256_shr_n_byte(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shl_n_8(c_v256 a) {
+  return c_v256_shl_n_8(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_u8(c_v256 a) {
+  return c_v256_shr_n_u8(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_s8(c_v256 a) {
+  return c_v256_shr_n_s8(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shl_n_16(c_v256 a) {
+  return c_v256_shl_n_16(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_u16(c_v256 a) {
+  return c_v256_shr_n_u16(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_s16(c_v256 a) {
+  return c_v256_shr_n_s16(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shl_n_32(c_v256 a) {
+  return c_v256_shl_n_32(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_u32(c_v256 a) {
+  return c_v256_shr_n_u32(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_s32(c_v256 a) {
+  return c_v256_shr_n_s32(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_align(c_v256 a, c_v256 b) {
+  return c_v256_align(a, b, shift);
+}
+
 // Wrappers around the the SAD and SSD functions
 uint32_t v64_sad_u8(v64 a, v64 b) {
   return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
@@ -250,6 +348,18 @@ uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) {
 uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) {
   return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b));
 }
+uint32_t v256_sad_u8(v256 a, v256 b) {
+  return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a, b));
+}
+uint32_t v256_ssd_u8(v256 a, v256 b) {
+  return v256_ssd_u8_sum(::v256_ssd_u8(v256_ssd_u8_init(), a, b));
+}
+uint32_t c_v256_sad_u8(c_v256 a, c_v256 b) {
+  return c_v256_sad_u8_sum(::c_v256_sad_u8(c_v256_sad_u8_init(), a, b));
+}
+uint32_t c_v256_ssd_u8(c_v256 a, c_v256 b) {
+  return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a, b));
+}
 
 namespace {
 
@@ -657,6 +767,265 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v64_store_unaligned),
                       MAP(v128_load_unaligned),
                       MAP(v128_store_unaligned),
+                      MAP(v256_sad_u8),
+                      MAP(v256_ssd_u8),
+                      MAP(v256_hadd_u8),
+                      MAP(v256_dotp_s16),
+                      MAP(v256_add_8),
+                      MAP(v256_add_16),
+                      MAP(v256_sadd_s16),
+                      MAP(v256_add_32),
+                      MAP(v256_sub_8),
+                      MAP(v256_ssub_u8),
+                      MAP(v256_ssub_s8),
+                      MAP(v256_sub_16),
+                      MAP(v256_ssub_u16),
+                      MAP(v256_ssub_s16),
+                      MAP(v256_sub_32),
+                      MAP(v256_ziplo_8),
+                      MAP(v256_ziphi_8),
+                      MAP(v256_ziplo_16),
+                      MAP(v256_ziphi_16),
+                      MAP(v256_ziplo_32),
+                      MAP(v256_ziphi_32),
+                      MAP(v256_ziplo_64),
+                      MAP(v256_ziphi_64),
+                      MAP(v256_unziphi_8),
+                      MAP(v256_unziplo_8),
+                      MAP(v256_unziphi_16),
+                      MAP(v256_unziplo_16),
+                      MAP(v256_unziphi_32),
+                      MAP(v256_unziplo_32),
+                      MAP(v256_pack_s32_s16),
+                      MAP(v256_pack_s16_u8),
+                      MAP(v256_pack_s16_s8),
+                      MAP(v256_or),
+                      MAP(v256_xor),
+                      MAP(v256_and),
+                      MAP(v256_andn),
+                      MAP(v256_mullo_s16),
+                      MAP(v256_mulhi_s16),
+                      MAP(v256_mullo_s32),
+                      MAP(v256_madd_s16),
+                      MAP(v256_madd_us8),
+                      MAP(v256_avg_u8),
+                      MAP(v256_rdavg_u8),
+                      MAP(v256_avg_u16),
+                      MAP(v256_min_u8),
+                      MAP(v256_max_u8),
+                      MAP(v256_min_s8),
+                      MAP(v256_max_s8),
+                      MAP(v256_min_s16),
+                      MAP(v256_max_s16),
+                      MAP(v256_cmpgt_s8),
+                      MAP(v256_cmplt_s8),
+                      MAP(v256_cmpeq_8),
+                      MAP(v256_cmpgt_s16),
+                      MAP(v256_cmplt_s16),
+                      MAP(v256_cmpeq_16),
+                      MAP(v256_shuffle_8),
+                      MAP(v256_pshuffle_8),
+                      MAP(imm_v256_align<1>),
+                      MAP(imm_v256_align<2>),
+                      MAP(imm_v256_align<3>),
+                      MAP(imm_v256_align<4>),
+                      MAP(imm_v256_align<5>),
+                      MAP(imm_v256_align<6>),
+                      MAP(imm_v256_align<7>),
+                      MAP(imm_v256_align<8>),
+                      MAP(imm_v256_align<9>),
+                      MAP(imm_v256_align<10>),
+                      MAP(imm_v256_align<11>),
+                      MAP(imm_v256_align<12>),
+                      MAP(imm_v256_align<13>),
+                      MAP(imm_v256_align<14>),
+                      MAP(imm_v256_align<15>),
+                      MAP(imm_v256_align<16>),
+                      MAP(imm_v256_align<17>),
+                      MAP(imm_v256_align<18>),
+                      MAP(imm_v256_align<19>),
+                      MAP(imm_v256_align<20>),
+                      MAP(imm_v256_align<21>),
+                      MAP(imm_v256_align<22>),
+                      MAP(imm_v256_align<23>),
+                      MAP(imm_v256_align<24>),
+                      MAP(imm_v256_align<25>),
+                      MAP(imm_v256_align<26>),
+                      MAP(imm_v256_align<27>),
+                      MAP(imm_v256_align<28>),
+                      MAP(imm_v256_align<29>),
+                      MAP(imm_v256_align<30>),
+                      MAP(imm_v256_align<31>),
+                      MAP(v256_from_v128),
+                      MAP(v256_zip_8),
+                      MAP(v256_zip_16),
+                      MAP(v256_zip_32),
+                      MAP(v256_mul_s16),
+                      MAP(v256_unpack_u8_s16),
+                      MAP(v256_unpack_s8_s16),
+                      MAP(v256_unpack_u16_s32),
+                      MAP(v256_unpack_s16_s32),
+                      MAP(v256_shl_8),
+                      MAP(v256_shr_u8),
+                      MAP(v256_shr_s8),
+                      MAP(v256_shl_16),
+                      MAP(v256_shr_u16),
+                      MAP(v256_shr_s16),
+                      MAP(v256_shl_32),
+                      MAP(v256_shr_u32),
+                      MAP(v256_shr_s32),
+                      MAP(v256_abs_s8),
+                      MAP(v256_abs_s16),
+                      MAP(v256_padd_s16),
+                      MAP(v256_unpacklo_u16_s32),
+                      MAP(v256_unpacklo_s16_s32),
+                      MAP(v256_unpackhi_u16_s32),
+                      MAP(v256_unpackhi_s16_s32),
+                      MAP(imm_v256_shr_n_byte<1>),
+                      MAP(imm_v256_shr_n_byte<2>),
+                      MAP(imm_v256_shr_n_byte<3>),
+                      MAP(imm_v256_shr_n_byte<4>),
+                      MAP(imm_v256_shr_n_byte<5>),
+                      MAP(imm_v256_shr_n_byte<6>),
+                      MAP(imm_v256_shr_n_byte<7>),
+                      MAP(imm_v256_shr_n_byte<8>),
+                      MAP(imm_v256_shr_n_byte<9>),
+                      MAP(imm_v256_shr_n_byte<10>),
+                      MAP(imm_v256_shr_n_byte<11>),
+                      MAP(imm_v256_shr_n_byte<12>),
+                      MAP(imm_v256_shr_n_byte<13>),
+                      MAP(imm_v256_shr_n_byte<14>),
+                      MAP(imm_v256_shr_n_byte<15>),
+                      MAP(imm_v256_shr_n_byte<16>),
+                      MAP(imm_v256_shr_n_byte<17>),
+                      MAP(imm_v256_shr_n_byte<18>),
+                      MAP(imm_v256_shr_n_byte<19>),
+                      MAP(imm_v256_shr_n_byte<20>),
+                      MAP(imm_v256_shr_n_byte<21>),
+                      MAP(imm_v256_shr_n_byte<22>),
+                      MAP(imm_v256_shr_n_byte<23>),
+                      MAP(imm_v256_shr_n_byte<24>),
+                      MAP(imm_v256_shr_n_byte<25>),
+                      MAP(imm_v256_shr_n_byte<26>),
+                      MAP(imm_v256_shr_n_byte<27>),
+                      MAP(imm_v256_shr_n_byte<28>),
+                      MAP(imm_v256_shr_n_byte<29>),
+                      MAP(imm_v256_shr_n_byte<30>),
+                      MAP(imm_v256_shr_n_byte<31>),
+                      MAP(imm_v256_shl_n_byte<1>),
+                      MAP(imm_v256_shl_n_byte<2>),
+                      MAP(imm_v256_shl_n_byte<3>),
+                      MAP(imm_v256_shl_n_byte<4>),
+                      MAP(imm_v256_shl_n_byte<5>),
+                      MAP(imm_v256_shl_n_byte<6>),
+                      MAP(imm_v256_shl_n_byte<7>),
+                      MAP(imm_v256_shl_n_byte<8>),
+                      MAP(imm_v256_shl_n_byte<9>),
+                      MAP(imm_v256_shl_n_byte<10>),
+                      MAP(imm_v256_shl_n_byte<11>),
+                      MAP(imm_v256_shl_n_byte<12>),
+                      MAP(imm_v256_shl_n_byte<13>),
+                      MAP(imm_v256_shl_n_byte<14>),
+                      MAP(imm_v256_shl_n_byte<15>),
+                      MAP(imm_v256_shl_n_byte<16>),
+                      MAP(imm_v256_shl_n_byte<17>),
+                      MAP(imm_v256_shl_n_byte<18>),
+                      MAP(imm_v256_shl_n_byte<19>),
+                      MAP(imm_v256_shl_n_byte<20>),
+                      MAP(imm_v256_shl_n_byte<21>),
+                      MAP(imm_v256_shl_n_byte<22>),
+                      MAP(imm_v256_shl_n_byte<23>),
+                      MAP(imm_v256_shl_n_byte<24>),
+                      MAP(imm_v256_shl_n_byte<25>),
+                      MAP(imm_v256_shl_n_byte<26>),
+                      MAP(imm_v256_shl_n_byte<27>),
+                      MAP(imm_v256_shl_n_byte<28>),
+                      MAP(imm_v256_shl_n_byte<29>),
+                      MAP(imm_v256_shl_n_byte<30>),
+                      MAP(imm_v256_shl_n_byte<31>),
+                      MAP(imm_v256_shl_n_8<1>),
+                      MAP(imm_v256_shl_n_8<2>),
+                      MAP(imm_v256_shl_n_8<3>),
+                      MAP(imm_v256_shl_n_8<4>),
+                      MAP(imm_v256_shl_n_8<5>),
+                      MAP(imm_v256_shl_n_8<6>),
+                      MAP(imm_v256_shl_n_8<7>),
+                      MAP(imm_v256_shr_n_u8<1>),
+                      MAP(imm_v256_shr_n_u8<2>),
+                      MAP(imm_v256_shr_n_u8<3>),
+                      MAP(imm_v256_shr_n_u8<4>),
+                      MAP(imm_v256_shr_n_u8<5>),
+                      MAP(imm_v256_shr_n_u8<6>),
+                      MAP(imm_v256_shr_n_u8<7>),
+                      MAP(imm_v256_shr_n_s8<1>),
+                      MAP(imm_v256_shr_n_s8<2>),
+                      MAP(imm_v256_shr_n_s8<3>),
+                      MAP(imm_v256_shr_n_s8<4>),
+                      MAP(imm_v256_shr_n_s8<5>),
+                      MAP(imm_v256_shr_n_s8<6>),
+                      MAP(imm_v256_shr_n_s8<7>),
+                      MAP(imm_v256_shl_n_16<1>),
+                      MAP(imm_v256_shl_n_16<2>),
+                      MAP(imm_v256_shl_n_16<4>),
+                      MAP(imm_v256_shl_n_16<6>),
+                      MAP(imm_v256_shl_n_16<8>),
+                      MAP(imm_v256_shl_n_16<10>),
+                      MAP(imm_v256_shl_n_16<12>),
+                      MAP(imm_v256_shl_n_16<14>),
+                      MAP(imm_v256_shr_n_u16<1>),
+                      MAP(imm_v256_shr_n_u16<2>),
+                      MAP(imm_v256_shr_n_u16<4>),
+                      MAP(imm_v256_shr_n_u16<6>),
+                      MAP(imm_v256_shr_n_u16<8>),
+                      MAP(imm_v256_shr_n_u16<10>),
+                      MAP(imm_v256_shr_n_u16<12>),
+                      MAP(imm_v256_shr_n_u16<14>),
+                      MAP(imm_v256_shr_n_s16<1>),
+                      MAP(imm_v256_shr_n_s16<2>),
+                      MAP(imm_v256_shr_n_s16<4>),
+                      MAP(imm_v256_shr_n_s16<6>),
+                      MAP(imm_v256_shr_n_s16<8>),
+                      MAP(imm_v256_shr_n_s16<10>),
+                      MAP(imm_v256_shr_n_s16<12>),
+                      MAP(imm_v256_shr_n_s16<14>),
+                      MAP(imm_v256_shl_n_32<1>),
+                      MAP(imm_v256_shl_n_32<4>),
+                      MAP(imm_v256_shl_n_32<8>),
+                      MAP(imm_v256_shl_n_32<12>),
+                      MAP(imm_v256_shl_n_32<16>),
+                      MAP(imm_v256_shl_n_32<20>),
+                      MAP(imm_v256_shl_n_32<24>),
+                      MAP(imm_v256_shl_n_32<28>),
+                      MAP(imm_v256_shr_n_u32<1>),
+                      MAP(imm_v256_shr_n_u32<4>),
+                      MAP(imm_v256_shr_n_u32<8>),
+                      MAP(imm_v256_shr_n_u32<12>),
+                      MAP(imm_v256_shr_n_u32<16>),
+                      MAP(imm_v256_shr_n_u32<20>),
+                      MAP(imm_v256_shr_n_u32<24>),
+                      MAP(imm_v256_shr_n_u32<28>),
+                      MAP(imm_v256_shr_n_s32<1>),
+                      MAP(imm_v256_shr_n_s32<4>),
+                      MAP(imm_v256_shr_n_s32<8>),
+                      MAP(imm_v256_shr_n_s32<12>),
+                      MAP(imm_v256_shr_n_s32<16>),
+                      MAP(imm_v256_shr_n_s32<20>),
+                      MAP(imm_v256_shr_n_s32<24>),
+                      MAP(imm_v256_shr_n_s32<28>),
+                      MAP(v256_zero),
+                      MAP(v256_dup_8),
+                      MAP(v256_dup_16),
+                      MAP(v256_dup_32),
+                      MAP(v256_low_u32),
+                      MAP(v256_low_v64),
+                      MAP(v256_from_64),
+                      MAP(v256_from_v64),
+                      MAP(v256_ziplo_128),
+                      MAP(v256_ziphi_128),
+                      MAP(v256_unpacklo_u8_s16),
+                      MAP(v256_unpackhi_u8_s16),
+                      MAP(v256_unpacklo_s8_s16),
+                      MAP(v256_unpackhi_s8_s16),
                       { NULL, NULL, NULL } };
 #undef MAP
 
@@ -922,6 +1291,14 @@ void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
           reinterpret_cast<fptr>(v128_load_aligned), simd, d,
           reinterpret_cast<fptr>(c_u64_store_aligned),
           reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg) == typeid(c_v256)) {
+      // U64_V256
+      error = CompareSimd1Arg<uint64_t, v256, CRet, CArg>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
     } else if (typeid(CRet) == typeid(c_v64) &&
                typeid(CArg) == typeid(c_v128)) {
       // V64_V128
@@ -970,6 +1347,62 @@ void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
           reinterpret_cast<fptr>(u32_load_aligned), simd, d,
           reinterpret_cast<fptr>(c_v128_store_aligned),
           reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(c_v256)) {
+      // V256_V256
+      error = CompareSimd1Arg<v256, v256, CRet, CArg>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(c_v128)) {
+      // V256_V128
+      error = CompareSimd1Arg<v256, v128, CRet, CArg>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(uint8_t)) {
+      // V256_U8
+      error = CompareSimd1Arg<v256, uint8_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(u8_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(uint16_t)) {
+      // V256_U16
+      error = CompareSimd1Arg<v256, uint16_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(u16_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(uint32_t)) {
+      // V256_U32
+      error = CompareSimd1Arg<v256, uint32_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg) == typeid(c_v256)) {
+      // U32_V256
+      error = CompareSimd1Arg<uint32_t, v256, CRet, CArg>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v64) &&
+               typeid(CArg) == typeid(c_v256)) {
+      // V64_V256
+      error = CompareSimd1Arg<v64, v256, CRet, CArg>(
+          reinterpret_cast<fptr>(v64_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v64_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
     } else {
       FAIL() << "Internal error: Unknown intrinsic function "
              << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
@@ -1140,6 +1573,67 @@ void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
           reinterpret_cast<fptr>(c_v128_load_aligned),
           reinterpret_cast<fptr>(c_u32_load_aligned),
           reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256)) {
+      // V256_V256V256
+      error = CompareSimd2Args<v256, v256, v256, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(int64_t) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256)) {
+      // S64_V256V256
+      error = CompareSimd2Args<int64_t, v256, v256, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint32_t) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256)) {
+      // U32_V256V256
+      error = CompareSimd2Args<uint32_t, v256, v256, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(u32_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u32_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // V256_V128V128
+      error = CompareSimd2Args<v256, v128, v128, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(uint32_t)) {
+      // V256_V256U32
+      error = CompareSimd2Args<v256, v256, uint32_t, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(u32_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_u32_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+
     } else {
       FAIL() << "Internal error: Unknown intrinsic function "
              << typeid(CRet).name() << " " << name << "("
@@ -1208,5 +1702,31 @@ template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t,
                                                      uint32_t, const char *);
 template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t,
                                                       uint32_t, const char *);
+template void TestSimd1Arg<c_v256, c_v128>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<c_v256, c_v256>(uint32_t, uint32_t, uint32_t,
+                                           const char *);
+template void TestSimd1Arg<uint64_t, c_v256>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v256, uint8_t>(uint32_t, uint32_t, uint32_t,
+                                            const char *);
+template void TestSimd1Arg<c_v256, uint16_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v256, uint32_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<uint32_t, c_v256>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
+template void TestSimd1Arg<c_v64, c_v256>(uint32_t, uint32_t, uint32_t,
+                                          const char *);
+template void TestSimd2Args<c_v256, c_v128, c_v128>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd2Args<c_v256, c_v256, c_v256>(uint32_t, uint32_t,
+                                                    uint32_t, const char *);
+template void TestSimd2Args<c_v256, c_v256, uint32_t>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
+template void TestSimd2Args<int64_t, c_v256, c_v256>(uint32_t, uint32_t,
+                                                     uint32_t, const char *);
+template void TestSimd2Args<uint32_t, c_v256, c_v256>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
 
 }  // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/simd_impl.h b/third_party/aom/test/simd_impl.h
index 5cfda675d..c3dfbc400 100644
--- a/third_party/aom/test/simd_impl.h
+++ b/third_party/aom/test/simd_impl.h
@@ -14,7 +14,7 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "aom_dsp/aom_simd_inline.h"
-#include "aom_dsp/simd/v128_intrinsics_c.h"
+#include "aom_dsp/simd/v256_intrinsics_c.h"
 
 namespace SIMD_NAMESPACE {
 
@@ -67,6 +67,19 @@ TYPEDEF_SIMD(V128_V128V128);
 TYPEDEF_SIMD(S64_V128V128);
 TYPEDEF_SIMD(V128_V128U32);
 TYPEDEF_SIMD(U32_V128V128);
+TYPEDEF_SIMD(V256_V128);
+TYPEDEF_SIMD(V256_V256);
+TYPEDEF_SIMD(U64_V256);
+TYPEDEF_SIMD(V256_V128V128);
+TYPEDEF_SIMD(V256_V256V256);
+TYPEDEF_SIMD(S64_V256V256);
+TYPEDEF_SIMD(V256_V256U32);
+TYPEDEF_SIMD(U32_V256V256);
+TYPEDEF_SIMD(V256_U8);
+TYPEDEF_SIMD(V256_U16);
+TYPEDEF_SIMD(V256_U32);
+TYPEDEF_SIMD(U32_V256);
+TYPEDEF_SIMD(V64_V256);
 
 // Google Test allows up to 50 tests per case, so split the largest
 typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2);
@@ -74,6 +87,9 @@ typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2);
 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2);
 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3);
 typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2);
+typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part2);
+typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part3);
+typedef ARCH_POSTFIX(V256_V256V256) ARCH_POSTFIX(V256_V256V256_Part2);
 
 // These functions are machine tuned located elsewhere
 template <typename c_ret, typename c_arg>
@@ -219,6 +235,70 @@ MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) {
   TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(U64_V256), TestIntrinsics) {
+  TestSimd1Arg<uint64_t, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V128), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256V256), TestIntrinsics) {
+  TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V128V128), TestIntrinsics) {
+  TestSimd2Args<c_v256, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V256V256), TestIntrinsics) {
+  TestSimd2Args<uint32_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(S64_V256V256), TestIntrinsics) {
+  TestSimd2Args<int64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256V256_Part2), TestIntrinsics) {
+  TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256U32), TestIntrinsics) {
+  TestSimd2Args<c_v256, c_v256, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256_Part2), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256_Part3), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_U8), TestIntrinsics) {
+  TestSimd1Arg<c_v256, uint8_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_U16), TestIntrinsics) {
+  TestSimd1Arg<c_v256, uint16_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_U32), TestIntrinsics) {
+  TestSimd1Arg<c_v256, uint32_t>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(U32_V256), TestIntrinsics) {
+  TestSimd1Arg<uint32_t, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V64_V256), TestIntrinsics) {
+  TestSimd1Arg<c_v64, c_v256>(kIterations, mask, maskwidth, name);
+}
+
 // Add a macro layer since INSTANTIATE_TEST_CASE_P will quote the name
 // so we need to expand it first with the prefix
 #define INSTANTIATE(name, type, ...) \
@@ -591,4 +671,252 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U));
 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128),
             SIMD_TUPLE(v128_dotp_s16, 0U, 0U));
 
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256V256), SIMD_TUPLE(v256_sad_u8, 0U, 0U),
+            SIMD_TUPLE(v256_ssd_u8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256),
+            SIMD_TUPLE(v256_dotp_s16, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V256_V256V256), SIMD_TUPLE(v256_add_8, 0U, 0U),
+    SIMD_TUPLE(v256_add_16, 0U, 0U), SIMD_TUPLE(v256_sadd_s16, 0U, 0U),
+    SIMD_TUPLE(v256_add_32, 0U, 0U), SIMD_TUPLE(v256_sub_8, 0U, 0U),
+    SIMD_TUPLE(v256_ssub_u8, 0U, 0U), SIMD_TUPLE(v256_ssub_s8, 0U, 0U),
+    SIMD_TUPLE(v256_sub_16, 0U, 0U), SIMD_TUPLE(v256_ssub_s16, 0U, 0U),
+    SIMD_TUPLE(v256_ssub_u16, 0U, 0U), SIMD_TUPLE(v256_sub_32, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_8, 0U, 0U), SIMD_TUPLE(v256_ziphi_8, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_16, 0U, 0U), SIMD_TUPLE(v256_ziphi_16, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_32, 0U, 0U), SIMD_TUPLE(v256_ziphi_32, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_64, 0U, 0U), SIMD_TUPLE(v256_ziphi_64, 0U, 0U),
+    SIMD_TUPLE(v256_ziplo_128, 0U, 0U), SIMD_TUPLE(v256_ziphi_128, 0U, 0U),
+    SIMD_TUPLE(v256_unziphi_8, 0U, 0U), SIMD_TUPLE(v256_unziplo_8, 0U, 0U),
+    SIMD_TUPLE(v256_unziphi_16, 0U, 0U), SIMD_TUPLE(v256_unziplo_16, 0U, 0U),
+    SIMD_TUPLE(v256_unziphi_32, 0U, 0U), SIMD_TUPLE(v256_unziplo_32, 0U, 0U),
+    SIMD_TUPLE(v256_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v256_pack_s16_u8, 0U, 0U),
+    SIMD_TUPLE(v256_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v256_or, 0U, 0U),
+    SIMD_TUPLE(v256_xor, 0U, 0U), SIMD_TUPLE(v256_and, 0U, 0U),
+    SIMD_TUPLE(v256_andn, 0U, 0U), SIMD_TUPLE(v256_mullo_s16, 0U, 0U),
+    SIMD_TUPLE(v256_mulhi_s16, 0U, 0U), SIMD_TUPLE(v256_mullo_s32, 0U, 0U),
+    SIMD_TUPLE(v256_madd_s16, 0U, 0U), SIMD_TUPLE(v256_madd_us8, 0U, 0U),
+    SIMD_TUPLE(v256_avg_u8, 0U, 0U), SIMD_TUPLE(v256_rdavg_u8, 0U, 0U),
+    SIMD_TUPLE(v256_avg_u16, 0U, 0U), SIMD_TUPLE(v256_min_u8, 0U, 0U),
+    SIMD_TUPLE(v256_max_u8, 0U, 0U), SIMD_TUPLE(v256_min_s8, 0U, 0U),
+    SIMD_TUPLE(v256_max_s8, 0U, 0U), SIMD_TUPLE(v256_min_s16, 0U, 0U),
+    SIMD_TUPLE(v256_max_s16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s8, 0U, 0U),
+    SIMD_TUPLE(v256_cmplt_s8, 0U, 0U));
+
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V256_V256V256_Part2), SIMD_TUPLE(v256_cmpeq_8, 0U, 0U),
+    SIMD_TUPLE(v256_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v256_cmplt_s16, 0U, 0U),
+    SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_shuffle_8, 15U, 8U),
+    SIMD_TUPLE(v256_pshuffle_8, 15U, 8U), SIMD_TUPLE(imm_v256_align<1>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<2>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<3>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<4>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<5>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<6>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<7>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<8>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<9>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<10>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<11>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<12>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<13>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<14>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<15>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<16>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<17>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<18>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<19>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<20>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<21>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<22>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<23>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<24>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<25>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<26>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<27>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<28>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<29>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<30>, 0U, 0U),
+    SIMD_TUPLE(imm_v256_align<31>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128V128),
+            SIMD_TUPLE(v256_from_v128, 0U, 0U), SIMD_TUPLE(v256_zip_8, 0U, 0U),
+            SIMD_TUPLE(v256_zip_16, 0U, 0U), SIMD_TUPLE(v256_zip_32, 0U, 0U),
+            SIMD_TUPLE(v256_mul_s16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128),
+            SIMD_TUPLE(v256_unpack_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpack_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpack_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v256_unpack_s16_s32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U),
+            SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U),
+            SIMD_TUPLE(v256_shl_16, 15U, 32U),
+            SIMD_TUPLE(v256_shr_u16, 15U, 32U),
+            SIMD_TUPLE(v256_shr_s16, 15U, 32U),
+            SIMD_TUPLE(v256_shl_32, 31U, 32U),
+            SIMD_TUPLE(v256_shr_u32, 31U, 32U),
+            SIMD_TUPLE(v256_shr_s32, 31U, 32U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256), SIMD_TUPLE(v256_abs_s8, 0U, 0U),
+            SIMD_TUPLE(v256_abs_s16, 0U, 0U), SIMD_TUPLE(v256_padd_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpacklo_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpacklo_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpacklo_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v256_unpacklo_s16_s32, 0U, 0U),
+            SIMD_TUPLE(v256_unpackhi_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpackhi_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v256_unpackhi_u16_s32, 0U, 0U),
+            SIMD_TUPLE(v256_unpackhi_s16_s32, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<17>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<18>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<19>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<21>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<22>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<23>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<25>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<26>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<27>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<29>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<30>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_byte<31>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<8>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part2),
+            SIMD_TUPLE(imm_v256_shl_n_byte<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<17>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<18>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<19>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<21>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<22>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<23>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<25>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<26>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<27>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<29>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<30>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_byte<31>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s8<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<10>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part3),
+            SIMD_TUPLE(imm_v256_shl_n_16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s16<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u32<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s32<28>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U8), SIMD_TUPLE(v256_dup_8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U16), SIMD_TUPLE(v256_dup_16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U32), SIMD_TUPLE(v256_dup_32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V256), SIMD_TUPLE(v256_low_v64, 0U, 0U));
+
 }  // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/superframe_test.cc b/third_party/aom/test/superframe_test.cc
index 0f54baeaf..a28d35d5f 100644
--- a/third_party/aom/test/superframe_test.cc
+++ b/third_party/aom/test/superframe_test.cc
@@ -119,23 +119,12 @@ TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {
 // to the decoder starting at the end of the buffer.
 #if CONFIG_EXT_TILE
 // Single tile does not work with ANS (see comment above).
-#if CONFIG_ANS || CONFIG_DAALA_EC
 const int tile_col_values[] = { 1, 2 };
-#else
-const int tile_col_values[] = { 1, 2, 32 };
-#endif
 const int tile_row_values[] = { 1, 2, 32 };
 AV1_INSTANTIATE_TEST_CASE(
     SuperframeTest,
     ::testing::Combine(::testing::Values(::libaom_test::kTwoPassGood),
                        ::testing::ValuesIn(tile_col_values),
                        ::testing::ValuesIn(tile_row_values)));
-#else
-#if !CONFIG_ANS && !CONFIG_DAALA_EC
-AV1_INSTANTIATE_TEST_CASE(
-    SuperframeTest,
-    ::testing::Combine(::testing::Values(::libaom_test::kTwoPassGood),
-                       ::testing::Values(0), ::testing::Values(0)));
-#endif  // !CONFIG_ANS
 #endif  // CONFIG_EXT_TILE
 }  // namespace
diff --git a/third_party/aom/test/test-data.mk b/third_party/aom/test/test-data.mk
index 168144a00..083b34953 100644
--- a/third_party/aom/test/test-data.mk
+++ b/third_party/aom/test/test-data.mk
@@ -1,22 +1,22 @@
 LIBAOM_TEST_SRCS-yes += test-data.mk
 
 # Encoder test source
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += hantro_collage_w352h288.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += hantro_odd.yuv
 
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
-LIBAOM_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_10_420.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_10_422.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_10_444.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_10_440.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_12_420.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_12_422.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_12_444.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_12_440.yuv
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_420_a10-1.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_420.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_422.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_444.y4m
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_440.yuv
 
 LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += desktop_credits.y4m
 LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.y4m
diff --git a/third_party/aom/test/test.cmake b/third_party/aom/test/test.cmake
index 8d3ab7059..d72a784ad 100644
--- a/third_party/aom/test/test.cmake
+++ b/third_party/aom/test/test.cmake
@@ -8,8 +8,15 @@
 ## Media Patent License 1.0 was not distributed with this source code in the
 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ##
+if (NOT AOM_TEST_TEST_CMAKE_)
+set(AOM_TEST_TEST_CMAKE_ 1)
+
+include(ProcessorCount)
+
 include("${AOM_ROOT}/test/test_data_util.cmake")
 
+set(AOM_UNIT_TEST_DATA_LIST_FILE "${AOM_ROOT}/test/test-data.sha1")
+
 set(AOM_UNIT_TEST_WRAPPER_SOURCES
     "${AOM_CONFIG_DIR}/usage_exit.c"
     "${AOM_ROOT}/test/test_libaom.cc")
@@ -19,6 +26,8 @@ set(AOM_UNIT_TEST_COMMON_SOURCES
     "${AOM_ROOT}/test/clear_system_state.h"
     "${AOM_ROOT}/test/codec_factory.h"
     "${AOM_ROOT}/test/convolve_test.cc"
+    "${AOM_ROOT}/test/decode_test_driver.cc"
+    "${AOM_ROOT}/test/decode_test_driver.h"
     "${AOM_ROOT}/test/function_equivalence_test.h"
     "${AOM_ROOT}/test/md5_helper.h"
     "${AOM_ROOT}/test/register_state_check.h"
@@ -50,8 +59,6 @@ endif ()
 
 set(AOM_UNIT_TEST_DECODER_SOURCES
     "${AOM_ROOT}/test/decode_api_test.cc"
-    "${AOM_ROOT}/test/decode_test_driver.cc"
-    "${AOM_ROOT}/test/decode_test_driver.h"
     "${AOM_ROOT}/test/ivf_video_source.h")
 
 set(AOM_UNIT_TEST_ENCODER_SOURCES
@@ -83,10 +90,6 @@ if (CONFIG_AV1)
       ${AOM_UNIT_TEST_COMMON_SOURCES}
       "${AOM_ROOT}/test/av1_convolve_optimz_test.cc"
       "${AOM_ROOT}/test/av1_convolve_test.cc"
-      "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc"
-      "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc"
-      "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc"
-      "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc"
       "${AOM_ROOT}/test/av1_txfm_test.cc"
       "${AOM_ROOT}/test/av1_txfm_test.h"
       "${AOM_ROOT}/test/intrapred_test.cc"
@@ -103,9 +106,7 @@ if (CONFIG_AV1)
     if (HAVE_SSE4_1)
       set(AOM_UNIT_TEST_COMMON_SOURCES
           ${AOM_UNIT_TEST_COMMON_SOURCES}
-          # TODO: not sure if this intrinsics or a wrapper calling intrin/asm.
-          #"${AOM_ROOT}/test/filterintra_predictors_test.cc")
-          )
+          "${AOM_ROOT}/test/filterintra_predictors_test.cc")
     endif ()
   endif ()
 
@@ -131,8 +132,13 @@ if (CONFIG_AV1_ENCODER)
       "${AOM_ROOT}/test/arf_freq_test.cc"
       "${AOM_ROOT}/test/av1_dct_test.cc"
       "${AOM_ROOT}/test/av1_fht16x16_test.cc"
+      "${AOM_ROOT}/test/av1_fht32x32_test.cc"
       "${AOM_ROOT}/test/av1_fht8x8_test.cc"
       "${AOM_ROOT}/test/av1_inv_txfm_test.cc"
+      "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc"
+      "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc"
+      "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc"
+      "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc"
       "${AOM_ROOT}/test/avg_test.cc"
       "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc"
       "${AOM_ROOT}/test/blend_a64_mask_test.cc"
@@ -167,8 +173,14 @@ if (CONFIG_AV1_ENCODER)
         "${AOM_ROOT}/test/av1_fht4x4_test.cc"
         "${AOM_ROOT}/test/av1_fht4x8_test.cc"
         "${AOM_ROOT}/test/av1_fht8x16_test.cc"
-        "${AOM_ROOT}/test/av1_fht8x4_test.cc"
-        "${AOM_ROOT}/test/fht32x32_test.cc")
+        "${AOM_ROOT}/test/av1_fht8x4_test.cc")
+        
+  endif ()
+
+  if (CONFIG_GLOBAL_MOTION)
+    set(AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1
+        ${AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1}
+        "${AOM_ROOT}/test/corner_match_test.cc")
   endif ()
 
   if (CONFIG_MOTION_VAR)
@@ -177,6 +189,12 @@ if (CONFIG_AV1_ENCODER)
         "${AOM_ROOT}/test/obmc_sad_test.cc"
         "${AOM_ROOT}/test/obmc_variance_test.cc")
   endif ()
+
+  if (CONFIG_TX64X64)
+    set(AOM_UNIT_TEST_ENCODER_SOURCES
+        ${AOM_UNIT_TEST_ENCODER_SOURCES}
+        "${AOM_ROOT}/test/av1_fht64x64_test.cc")
+  endif ()
 endif ()
 
 if (CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
@@ -209,7 +227,7 @@ if (CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
 endif ()
 
 if (CONFIG_HIGHBITDEPTH)
-  if (CONFIG_AV1)
+  if (CONFIG_AV1_ENCODER)
     set(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1
         ${AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1}
         "${AOM_ROOT}/test/av1_highbd_iht_test.cc"
@@ -245,8 +263,14 @@ endif ()
 # exist before this function is called.
 function (setup_aom_test_targets)
   add_library(test_aom_common OBJECT ${AOM_UNIT_TEST_COMMON_SOURCES})
-  add_library(test_aom_decoder OBJECT ${AOM_UNIT_TEST_DECODER_SOURCES})
-  add_library(test_aom_encoder OBJECT ${AOM_UNIT_TEST_ENCODER_SOURCES})
+
+  if (CONFIG_AV1_DECODER)
+    add_library(test_aom_decoder OBJECT ${AOM_UNIT_TEST_DECODER_SOURCES})
+  endif ()
+
+  if (CONFIG_AV1_ENCODER)
+    add_library(test_aom_encoder OBJECT ${AOM_UNIT_TEST_ENCODER_SOURCES})
+  endif ()
 
   set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} test_aom_common test_aom_decoder
       test_aom_encoder PARENT_SCOPE)
@@ -255,7 +279,7 @@ function (setup_aom_test_targets)
                  $<TARGET_OBJECTS:aom_common_app_util>
                  $<TARGET_OBJECTS:test_aom_common>)
 
-  if (CONFIG_DECODERS)
+  if (CONFIG_AV1_DECODER)
     target_sources(test_libaom PUBLIC
                    $<TARGET_OBJECTS:aom_decoder_app_util>
                    $<TARGET_OBJECTS:test_aom_decoder>)
@@ -265,7 +289,7 @@ function (setup_aom_test_targets)
     endif ()
   endif ()
 
-  if (CONFIG_ENCODERS)
+  if (CONFIG_AV1_ENCODER)
     target_sources(test_libaom PUBLIC
                    $<TARGET_OBJECTS:test_aom_encoder>
                    $<TARGET_OBJECTS:aom_encoder_app_util>)
@@ -273,14 +297,14 @@ function (setup_aom_test_targets)
     if (CONFIG_ENCODE_PERF_TESTS)
       target_sources(test_libaom PUBLIC ${AOM_ENCODE_PERF_TEST_SOURCES})
     endif ()
-  endif ()
 
-  target_link_libraries(test_libaom PUBLIC aom gtest)
+    add_executable(test_intra_pred_speed
+                   ${AOM_TEST_INTRA_PRED_SPEED_SOURCES}
+                   $<TARGET_OBJECTS:aom_common_app_util>)
+    target_link_libraries(test_intra_pred_speed ${AOM_LIB_LINK_TYPE} aom gtest)
+  endif ()
 
-  add_executable(test_intra_pred_speed
-                 ${AOM_TEST_INTRA_PRED_SPEED_SOURCES}
-                 $<TARGET_OBJECTS:aom_common_app_util>)
-  target_link_libraries(test_intra_pred_speed PUBLIC aom gtest)
+  target_link_libraries(test_libaom ${AOM_LIB_LINK_TYPE} aom gtest)
 
   if (CONFIG_LIBYUV)
     target_sources(test_libaom PUBLIC $<TARGET_OBJECTS:yuv>)
@@ -300,16 +324,73 @@ function (setup_aom_test_targets)
   if (HAVE_SSE4_1)
     add_intrinsics_source_to_target("-msse4.1" "test_libaom"
                                     "AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1")
+    if (CONFIG_AV1_ENCODER)
+      if (AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1)
+        add_intrinsics_source_to_target("-msse4.1" "test_libaom"
+                                        "AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1")
+      endif ()
+    endif ()
   endif ()
   if (HAVE_NEON)
     add_intrinsics_source_to_target("${AOM_NEON_INTRIN_FLAG}" "test_libaom"
                                     "AOM_UNIT_TEST_COMMON_INTRIN_NEON")
   endif ()
 
-  add_custom_target(testdata
-                    COMMAND ${CMAKE_COMMAND}
-                      -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}"
-                      -DAOM_ROOT="${AOM_ROOT}"
-                      -P "${AOM_ROOT}/test/test_worker.cmake"
-                    SOURCES ${AOM_TEST_DATA_LIST})
+  make_test_data_lists("${AOM_UNIT_TEST_DATA_LIST_FILE}"
+                       test_files test_file_checksums)
+  list(LENGTH test_files num_test_files)
+  list(LENGTH test_file_checksums num_test_file_checksums)
+
+  math(EXPR max_file_index "${num_test_files} - 1")
+  foreach (test_index RANGE ${max_file_index})
+    list(GET test_files ${test_index} test_file)
+    list(GET test_file_checksums ${test_index} test_file_checksum)
+    add_custom_target(testdata_${test_index}
+                      COMMAND ${CMAKE_COMMAND}
+                        -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}"
+                        -DAOM_ROOT="${AOM_ROOT}"
+                        -DAOM_TEST_FILE="${test_file}"
+                        -DAOM_TEST_CHECKSUM=${test_file_checksum}
+                        -P "${AOM_ROOT}/test/test_data_download_worker.cmake")
+    set(testdata_targets ${testdata_targets} testdata_${test_index})
+  endforeach ()
+
+  # Create a custom build target for running each test data download target.
+  add_custom_target(testdata)
+  add_dependencies(testdata ${testdata_targets})
+
+  # Pick a reasonable number of targets (this controls parallelization).
+  ProcessorCount(num_test_targets)
+  if (num_test_targets EQUAL 0)
+    # Just default to 10 targets when there's no processor count available.
+    set(num_test_targets 10)
+  endif ()
+
+  # TODO(tomfinegan): This needs some work for MSVC and Xcode. Executable suffix
+  # and config based executable output paths are the obvious issues.
+  math(EXPR max_shard_index "${num_test_targets} - 1")
+  foreach (shard_index RANGE ${max_shard_index})
+    set(test_name "test_${shard_index}")
+    add_custom_target(${test_name}
+                      COMMAND ${CMAKE_COMMAND}
+                      -DGTEST_SHARD_INDEX=${shard_index}
+                      -DGTEST_TOTAL_SHARDS=${num_test_targets}
+                      -DTEST_LIBAOM=$<TARGET_FILE:test_libaom>
+                      -P "${AOM_ROOT}/test/test_runner.cmake"
+                      DEPENDS testdata test_libaom)
+    set(test_targets ${test_targets} ${test_name})
+  endforeach ()
+  add_custom_target(runtests)
+  add_dependencies(runtests ${test_targets})
+
+  if (MSVC)
+    set_target_properties(${testdata_targets} PROPERTIES
+                          EXCLUDE_FROM_DEFAULT_BUILD TRUE)
+    set_target_properties(${test_targets} PROPERTIES
+                          EXCLUDE_FROM_DEFAULT_BUILD TRUE)
+    set_target_properties(testdata runtests PROPERTIES
+                          EXCLUDE_FROM_DEFAULT_BUILD TRUE)
+  endif ()
 endfunction ()
+
+endif ()  # AOM_TEST_TEST_CMAKE_
diff --git a/third_party/aom/test/test.mk b/third_party/aom/test/test.mk
index fb0ab371e..45bb21286 100644
--- a/third_party/aom/test/test.mk
+++ b/third_party/aom/test/test.mk
@@ -20,7 +20,6 @@ LIBAOM_TEST_SRCS-yes += util.h
 LIBAOM_TEST_SRCS-yes += video_source.h
 LIBAOM_TEST_SRCS-yes += transform_test_base.h
 LIBAOM_TEST_SRCS-yes += function_equivalence_test.h
-LIBAOM_TEST_SRCS-yes += warp_filter_test_util.h
 
 ##
 ## BLACK BOX TESTS
@@ -28,18 +27,18 @@ LIBAOM_TEST_SRCS-yes += warp_filter_test_util.h
 ## Black box tests only use the public API.
 ##
 LIBAOM_TEST_SRCS-yes                   += ../md5_utils.h ../md5_utils.c
-LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += ivf_video_source.h
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += ../y4minput.h ../y4minput.c
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += altref_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_api_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
-#LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += realtime_test.cc
-#LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += resize_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_video_source.h
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += yuv_video_source.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER)    += ivf_video_source.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += ../y4minput.h ../y4minput.c
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += altref_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += aq_segment_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += datarate_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += encode_api_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += error_resilience_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += i420_video_source.h
+#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += realtime_test.cc
+#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += resize_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += y4m_video_source.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += yuv_video_source.h
 
 #LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += level_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += active_map_refresh_test.cc
@@ -51,14 +50,14 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += lossless_test.cc
 
 LIBAOM_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBAOM_TEST_SRCS-yes                   += decode_test_driver.h
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_test_driver.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += encode_test_driver.cc
 LIBAOM_TEST_SRCS-yes                   += encode_test_driver.h
 
 ## IVF writing.
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += ../ivfenc.c ../ivfenc.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += ../ivfenc.c ../ivfenc.h
 
 ## Y4m parsing.
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_test.cc ../y4menc.c ../y4menc.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += y4m_test.cc ../y4menc.c ../y4menc.h
 
 ## WebM Parsing
 ifeq ($(CONFIG_WEBM_IO), yes)
@@ -66,14 +65,14 @@ LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.cc
 LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.cc
 LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.h
 LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.h
-LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += $(LIBWEBM_PARSER_SRCS)
-LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += ../tools_common.h
-LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.cc
-LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.h
-LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += webm_video_source.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += $(LIBWEBM_PARSER_SRCS)
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += ../tools_common.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += ../webmdec.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += ../webmdec.h
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += webm_video_source.h
 endif
 
-LIBAOM_TEST_SRCS-$(CONFIG_DECODERS)    += decode_api_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += decode_api_test.cc
 
 # Currently we only support decoder perf tests for av1. Also they read from WebM
 # files, so WebM IO is required.
@@ -82,7 +81,6 @@ ifeq ($(CONFIG_DECODE_PERF_TESTS)$(CONFIG_AV1_DECODER)$(CONFIG_WEBM_IO), \
 LIBAOM_TEST_SRCS-yes                   += decode_perf_test.cc
 endif
 
-# encode perf tests are av1 only
 ifeq ($(CONFIG_ENCODE_PERF_TESTS)$(CONFIG_AV1_ENCODER), yesyes)
 LIBAOM_TEST_SRCS-yes += encode_perf_test.cc
 endif
@@ -142,11 +140,13 @@ LIBAOM_TEST_SRCS-yes                   += simd_cmp_impl.h
 LIBAOM_TEST_SRCS-$(HAVE_SSE2)          += simd_cmp_sse2.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSSE3)         += simd_cmp_ssse3.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSE4_1)        += simd_cmp_sse4.cc
+LIBAOM_TEST_SRCS-$(HAVE_AVX2)          += simd_cmp_avx2.cc
 LIBAOM_TEST_SRCS-$(HAVE_NEON)          += simd_cmp_neon.cc
 LIBAOM_TEST_SRCS-yes                   += simd_impl.h
 LIBAOM_TEST_SRCS-$(HAVE_SSE2)          += simd_sse2_test.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSSE3)         += simd_ssse3_test.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSE4_1)        += simd_sse4_test.cc
+LIBAOM_TEST_SRCS-$(HAVE_AVX2)          += simd_avx2_test.cc
 LIBAOM_TEST_SRCS-$(HAVE_NEON)          += simd_neon_test.cc
 LIBAOM_TEST_SRCS-yes                   += intrapred_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_INTRABC)     += intrabc_test.cc
@@ -162,13 +162,21 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += error_block_test.cc
 #LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_quantize_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += arf_freq_test.cc
-
+ifneq ($(CONFIG_AOM_QM), yes)
+ifneq ($(CONFIG_NEW_QUANT), yes)
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += quantize_func_test.cc
+endif
+endif
 
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_dct_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht4x4_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x8_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x16_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht32x32_test.cc
+ifeq ($(CONFIG_TX64X64),yes)
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht64x64_test.cc
+endif
 ifeq ($(CONFIG_EXT_TX),yes)
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht4x8_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x4_test.cc
@@ -176,7 +184,6 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x16_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x8_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x32_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht32x16_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fht32x32_test.cc
 endif
 
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += sum_squares_test.cc
@@ -185,8 +192,8 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_1d_test.cc
 
 ifeq ($(CONFIG_EXT_INTER),yes)
-LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += masked_sad_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += masked_variance_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += masked_sad_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_wedge_utils_test.cc
 endif
 
@@ -217,7 +224,7 @@ endif
 ifeq ($(CONFIG_INTERNAL_STATS),yes)
 LIBAOM_TEST_SRCS-$(CONFIG_HIGHBITDEPTH) += hbd_metrics_test.cc
 endif
-LIBAOM_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += sad_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_txfm_test.h
 LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_txfm_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fwd_txfm1d_test.cc
@@ -226,13 +233,21 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fwd_txfm2d_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm2d_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_optimz_test.cc
-ifneq ($(findstring yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)),)
+ifneq ($(findstring yes,$(CONFIG_GLOBAL_MOTION)$(CONFIG_WARPED_MOTION)),)
+LIBAOM_TEST_SRCS-$(HAVE_SSE2) += warp_filter_test_util.h
 LIBAOM_TEST_SRCS-$(HAVE_SSE2) += warp_filter_test.cc warp_filter_test_util.cc
 endif
 ifeq ($(CONFIG_LOOP_RESTORATION),yes)
+LIBAOM_TEST_SRCS-$(HAVE_SSE2) += hiprec_convolve_test_util.h
+LIBAOM_TEST_SRCS-$(HAVE_SSE2) += hiprec_convolve_test.cc
+LIBAOM_TEST_SRCS-$(HAVE_SSE2) += hiprec_convolve_test_util.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += selfguided_filter_test.cc
 endif
 
+ifeq ($(CONFIG_GLOBAL_MOTION)$(CONFIG_AV1_ENCODER),yesyes)
+LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += corner_match_test.cc
+endif
+
 TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
 TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
 
diff --git a/third_party/aom/test/test_data_download_worker.cmake b/third_party/aom/test/test_data_download_worker.cmake
new file mode 100644
index 000000000..d7bf99edd
--- /dev/null
+++ b/third_party/aom/test/test_data_download_worker.cmake
@@ -0,0 +1,40 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+include("${AOM_ROOT}/test/test_data_util.cmake")
+
+if (NOT AOM_ROOT OR NOT AOM_CONFIG_DIR OR NOT AOM_TEST_FILE
+    OR NOT AOM_TEST_CHECKSUM)
+  message(FATAL_ERROR
+          "AOM_ROOT, AOM_CONFIG_DIR, AOM_TEST_FILE and AOM_TEST_CHECKSUM must be
+          defined.")
+endif ()
+
+set(AOM_TEST_DATA_URL
+    "https://storage.googleapis.com/downloads.webmproject.org/test_data/libvpx")
+set(AOM_TEST_DATA_PATH "$ENV{LIBAOM_TEST_DATA_PATH}")
+
+if ("${AOM_TEST_DATA_PATH}" STREQUAL "")
+  message(WARNING "Writing test data to ${AOM_CONFIG_DIR}, set "
+          "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.")
+  set(AOM_TEST_DATA_PATH "${AOM_CONFIG_DIR}")
+endif ()
+
+if (NOT EXISTS "${AOM_TEST_DATA_PATH}")
+  file(MAKE_DIRECTORY "${AOM_TEST_DATA_PATH}")
+endif ()
+
+expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_PATH}" "filepath")
+expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_URL}" "url")
+
+check_file("${filepath}" "${AOM_TEST_CHECKSUM}" "needs_download")
+if (needs_download)
+  download_test_file("${url}" "${AOM_TEST_CHECKSUM}" "${filepath}")
+endif ()
diff --git a/third_party/aom/test/test_data_util.cmake b/third_party/aom/test/test_data_util.cmake
index f096e4e12..e4641049d 100644
--- a/third_party/aom/test/test_data_util.cmake
+++ b/third_party/aom/test/test_data_util.cmake
@@ -11,15 +11,14 @@
 
 # Parses test/test-data.sha1 and writes captured file names and checksums to
 # $out_files and $out_checksums as lists.
-function (make_test_data_lists out_files out_checksums)
-  if (NOT AOM_TEST_DATA_LIST OR NOT EXISTS "${AOM_TEST_DATA_LIST}")
-    message(FATAL_ERROR "AOM_TEST_DATA_LIST (${AOM_TEST_DATA_LIST}) missing or "
-            "variable empty.")
+function (make_test_data_lists test_data_file out_files out_checksums)
+  if (NOT test_data_file OR NOT EXISTS "${test_data_file}")
+    message(FATAL_ERROR "Test info file missing or empty (${test_data_file})")
   endif ()
 
-  # Read test-data.sha1 into $files_and_checksums. $files_and_checksums becomes
-  # a list with an entry for each line from $AOM_TEST_DATA_LIST.
-  file(STRINGS "${AOM_TEST_DATA_LIST}" files_and_checksums)
+  # Read $test_data_file into $files_and_checksums. $files_and_checksums becomes
+  # a list with an entry for each line from $test_data_file.
+  file(STRINGS "${test_data_file}" files_and_checksums)
 
   # Iterate over the list of lines and split it into $checksums and $filenames.
   foreach (line ${files_and_checksums})
@@ -33,8 +32,10 @@ function (make_test_data_lists out_files out_checksums)
     set(filenames ${filenames} ${filename})
   endforeach ()
 
-  if (NOT checksums OR NOT filenames)
-    message(FATAL_ERROR "Parsing of ${AOM_TEST_DATA_LIST} failed.")
+  list(LENGTH filenames num_files)
+  list(LENGTH checksums num_checksums)
+  if (NOT checksums OR NOT filenames OR NOT num_files EQUAL num_checksums)
+    message(FATAL_ERROR "Parsing of ${test_data_file} failed.")
   endif ()
 
   set(${out_checksums} ${checksums} PARENT_SCOPE)
@@ -62,7 +63,9 @@ function (check_file local_path expected_checksum out_needs_update)
     unset(${out_needs_update} PARENT_SCOPE)
   else ()
     set(${out_needs_update} 1 PARENT_SCOPE)
+    return ()
   endif ()
+  message("${local_path} up to date.")
 endfunction ()
 
 # Downloads data from $file_url, confirms that $file_checksum matches, and
diff --git a/third_party/aom/test/test_intra_pred_speed.cc b/third_party/aom/test/test_intra_pred_speed.cc
index c4253628e..70d82484c 100644
--- a/third_party/aom/test/test_intra_pred_speed.cc
+++ b/third_party/aom/test/test_intra_pred_speed.cc
@@ -31,17 +31,16 @@ namespace {
 typedef void (*AvxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
                             const uint8_t *above, const uint8_t *left);
 
-#if CONFIG_ALT_INTRA
-const int kNumAv1IntraFuncs = 14;
-#else
-const int kNumAv1IntraFuncs = 13;
-#endif  // CONFIG_ALT_INTRA
+const int kNumAv1IntraFuncs = INTRA_MODES + 3;  // 4 DC predictor variants.
 const char *kAv1IntraPredNames[kNumAv1IntraFuncs] = {
-  "DC_PRED",    "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED",
-  "H_PRED",     "D45_PRED",     "D135_PRED",   "D117_PRED",   "D153_PRED",
-  "D207_PRED",  "D63_PRED",     "TM_PRED",
+  "DC_PRED",       "DC_LEFT_PRED",  "DC_TOP_PRED", "DC_128_PRED", "V_PRED",
+  "H_PRED",        "D45_PRED",      "D135_PRED",   "D117_PRED",   "D153_PRED",
+  "D207_PRED",     "D63_PRED",      "TM_PRED",
 #if CONFIG_ALT_INTRA
-  "SMOOTH_PRED"
+  "SMOOTH_PRED",
+#if CONFIG_SMOOTH_HV
+  "SMOOTH_V_PRED", "SMOOTH_H_PRED",
+#endif  // CONFIG_SMOOTH_HV
 #endif  // CONFIG_ALT_INTRA
 };
 
@@ -104,7 +103,11 @@ void TestIntraPred4(AvxPredFunc const *pred_funcs) {
     "b852f42e6c4991d415400332d567872f",
 #if CONFIG_ALT_INTRA
     "828c49a4248993cce4876fa26eab697f",
-    "718c8cee9011f92ef31f77a9a7560010"
+    "718c8cee9011f92ef31f77a9a7560010",
+#if CONFIG_SMOOTH_HV
+    "b37eeadbbd9e3bdff023a5097b59213a",
+    "d6fb9c659d82c78f0d0c891da6cba87f",
+#endif  // CONFIG_SMOOTH_HV
 #else
     "309a618577b27c648f9c5ee45252bc8f",
 #endif  // CONFIG_ALT_INTRA
@@ -129,7 +132,11 @@ void TestIntraPred8(AvxPredFunc const *pred_funcs) {
     "7a09adb0fa6c2bf889a99dd816622feb",
 #if CONFIG_ALT_INTRA
     "f6ade499c626d38eb70661184b79bc57",
-    "1ad5b106c79b792e514ba25e87139b5e"
+    "1ad5b106c79b792e514ba25e87139b5e",
+#if CONFIG_SMOOTH_HV
+    "fe0d359b91a1d8141483d2e032f1b75f",
+    "0cfd7603ced02829d1ce18b6795d73d0",
+#endif  // CONFIG_SMOOTH_HV
 #else
     "815b75c8e0d91cc1ae766dc5d3e445a3",
 #endif  // CONFIG_ALT_INTRA
@@ -154,7 +161,11 @@ void TestIntraPred16(AvxPredFunc const *pred_funcs) {
     "f7063ccbc29f87303d5c3d0555b08944",
 #if CONFIG_ALT_INTRA
     "7adcaaa3554eb71a81fc48cb9043984b",
-    "c0acea4397c1b4d54a21bbcec5731dff"
+    "c0acea4397c1b4d54a21bbcec5731dff",
+#if CONFIG_SMOOTH_HV
+    "f15b8712f0f064e98a7d804d3074afa7",
+    "01a09cdb8edd06d840c84643032fc02f",
+#endif  // CONFIG_SMOOTH_HV
 #else
     "b8a41aa968ec108af447af4217cba91b",
 #endif  // CONFIG_ALT_INTRA
@@ -179,7 +190,11 @@ void TestIntraPred32(AvxPredFunc const *pred_funcs) {
     "d520125ebd512c63c301bf67fea8e059",
 #if CONFIG_ALT_INTRA
     "297e8fbb5d33c29b12b228fa9d7c40a4",
-    "31b9296d70dd82238c87173e6d5e65fd"
+    "31b9296d70dd82238c87173e6d5e65fd",
+#if CONFIG_SMOOTH_HV
+    "f1041f77a34e86aaf30ea779ba84a2e8",
+    "83e2b744a6a3d82321744442b1db945c",
+#endif  // CONFIG_SMOOTH_HV
 #else
     "9e1370c6d42e08d357d9612c93a71cfc",
 #endif  // CONFIG_ALT_INTRA
@@ -193,11 +208,12 @@ void TestIntraPred32(AvxPredFunc const *pred_funcs) {
 // Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
 // to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
 #define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \
-                        d45e, d135, d117, d153, d207e, d63e, tm, smooth)    \
+                        d45e, d135, d117, d153, d207e, d63e, tm, smooth,    \
+                        smooth_v, smooth_h)                                 \
   TEST(arch, test_func) {                                                   \
     static const AvxPredFunc aom_intra_pred[] = {                           \
-      dc,   dc_left, dc_top, dc_128, v,    h,  d45e,                        \
-      d135, d117,    d153,   d207e,  d63e, tm, smooth                       \
+      dc,   dc_left, dc_top, dc_128, v,  h,      d45e,     d135,            \
+      d117, d153,    d207e,  d63e,   tm, smooth, smooth_v, smooth_h         \
     };                                                                      \
     test_func(aom_intra_pred);                                              \
   }
@@ -208,9 +224,18 @@ void TestIntraPred32(AvxPredFunc const *pred_funcs) {
 #if CONFIG_ALT_INTRA
 #define tm_pred_func aom_paeth_predictor_4x4_c
 #define smooth_pred_func aom_smooth_predictor_4x4_c
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_4x4_c
+#define smooth_h_pred_func aom_smooth_h_predictor_4x4_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
 #else
 #define tm_pred_func aom_tm_predictor_4x4_c
 #define smooth_pred_func NULL
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
 #endif  // CONFIG_ALT_INTRA
 
 INTRA_PRED_TEST(C, TestIntraPred4, aom_dc_predictor_4x4_c,
@@ -219,10 +244,13 @@ INTRA_PRED_TEST(C, TestIntraPred4, aom_dc_predictor_4x4_c,
                 aom_h_predictor_4x4_c, aom_d45e_predictor_4x4_c,
                 aom_d135_predictor_4x4_c, aom_d117_predictor_4x4_c,
                 aom_d153_predictor_4x4_c, aom_d207e_predictor_4x4_c,
-                aom_d63e_predictor_4x4_c, tm_pred_func, smooth_pred_func)
+                aom_d63e_predictor_4x4_c, tm_pred_func, smooth_pred_func,
+                smooth_v_pred_func, smooth_h_pred_func)
 
 #undef tm_pred_func
 #undef smooth_pred_func
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
 
 #if HAVE_SSE2
 #if CONFIG_ALT_INTRA
@@ -235,7 +263,7 @@ INTRA_PRED_TEST(SSE2, TestIntraPred4, aom_dc_predictor_4x4_sse2,
                 aom_dc_left_predictor_4x4_sse2, aom_dc_top_predictor_4x4_sse2,
                 aom_dc_128_predictor_4x4_sse2, aom_v_predictor_4x4_sse2,
                 aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 
 #undef tm_pred_func
 #endif  // HAVE_SSE2
@@ -243,7 +271,7 @@ INTRA_PRED_TEST(SSE2, TestIntraPred4, aom_dc_predictor_4x4_sse2,
 #if HAVE_SSSE3
 INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
                 NULL, NULL, aom_d153_predictor_4x4_ssse3, NULL,
-                aom_d63e_predictor_4x4_ssse3, NULL, NULL)
+                aom_d63e_predictor_4x4_ssse3, NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSSE3
 
 #if HAVE_DSPR2
@@ -254,7 +282,7 @@ INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 #endif  // CONFIG_ALT_INTRA
 INTRA_PRED_TEST(DSPR2, TestIntraPred4, aom_dc_predictor_4x4_dspr2, NULL, NULL,
                 NULL, NULL, aom_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, tm_pred_func, NULL)
+                NULL, NULL, tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_DSPR2
 
@@ -268,7 +296,7 @@ INTRA_PRED_TEST(NEON, TestIntraPred4, aom_dc_predictor_4x4_neon,
                 aom_dc_left_predictor_4x4_neon, aom_dc_top_predictor_4x4_neon,
                 aom_dc_128_predictor_4x4_neon, aom_v_predictor_4x4_neon,
                 aom_h_predictor_4x4_neon, NULL, aom_d135_predictor_4x4_neon,
-                NULL, NULL, NULL, NULL, tm_pred_func, NULL)
+                NULL, NULL, NULL, NULL, tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_NEON
 
@@ -282,7 +310,7 @@ INTRA_PRED_TEST(MSA, TestIntraPred4, aom_dc_predictor_4x4_msa,
                 aom_dc_left_predictor_4x4_msa, aom_dc_top_predictor_4x4_msa,
                 aom_dc_128_predictor_4x4_msa, aom_v_predictor_4x4_msa,
                 aom_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_MSA
 
@@ -292,9 +320,18 @@ INTRA_PRED_TEST(MSA, TestIntraPred4, aom_dc_predictor_4x4_msa,
 #if CONFIG_ALT_INTRA
 #define tm_pred_func aom_paeth_predictor_8x8_c
 #define smooth_pred_func aom_smooth_predictor_8x8_c
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_8x8_c
+#define smooth_h_pred_func aom_smooth_h_predictor_8x8_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
 #else
 #define tm_pred_func aom_tm_predictor_8x8_c
 #define smooth_pred_func NULL
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
 #endif  // CONFIG_ALT_INTRA
 INTRA_PRED_TEST(C, TestIntraPred8, aom_dc_predictor_8x8_c,
                 aom_dc_left_predictor_8x8_c, aom_dc_top_predictor_8x8_c,
@@ -302,9 +339,12 @@ INTRA_PRED_TEST(C, TestIntraPred8, aom_dc_predictor_8x8_c,
                 aom_h_predictor_8x8_c, aom_d45e_predictor_8x8_c,
                 aom_d135_predictor_8x8_c, aom_d117_predictor_8x8_c,
                 aom_d153_predictor_8x8_c, aom_d207e_predictor_8x8_c,
-                aom_d63e_predictor_8x8_c, tm_pred_func, smooth_pred_func)
+                aom_d63e_predictor_8x8_c, tm_pred_func, smooth_pred_func,
+                smooth_v_pred_func, smooth_h_pred_func)
 #undef tm_pred_func
 #undef smooth_pred_func
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
 
 #if HAVE_SSE2
 #if CONFIG_ALT_INTRA
@@ -316,14 +356,14 @@ INTRA_PRED_TEST(SSE2, TestIntraPred8, aom_dc_predictor_8x8_sse2,
                 aom_dc_left_predictor_8x8_sse2, aom_dc_top_predictor_8x8_sse2,
                 aom_dc_128_predictor_8x8_sse2, aom_v_predictor_8x8_sse2,
                 aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
 INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
                 NULL, NULL, aom_d153_predictor_8x8_ssse3, NULL, NULL, NULL,
-                NULL)
+                NULL, NULL, NULL)
 #endif  // HAVE_SSSE3
 
 #if HAVE_DSPR2
@@ -334,7 +374,7 @@ INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 #endif  // CONFIG_ALT_INTRA
 INTRA_PRED_TEST(DSPR2, TestIntraPred8, aom_dc_predictor_8x8_dspr2, NULL, NULL,
                 NULL, NULL, aom_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, tm_pred_func, NULL)
+                NULL, NULL, tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_DSPR2
 
@@ -348,7 +388,7 @@ INTRA_PRED_TEST(NEON, TestIntraPred8, aom_dc_predictor_8x8_neon,
                 aom_dc_left_predictor_8x8_neon, aom_dc_top_predictor_8x8_neon,
                 aom_dc_128_predictor_8x8_neon, aom_v_predictor_8x8_neon,
                 aom_h_predictor_8x8_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_NEON
 
@@ -362,7 +402,7 @@ INTRA_PRED_TEST(MSA, TestIntraPred8, aom_dc_predictor_8x8_msa,
                 aom_dc_left_predictor_8x8_msa, aom_dc_top_predictor_8x8_msa,
                 aom_dc_128_predictor_8x8_msa, aom_v_predictor_8x8_msa,
                 aom_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_MSA
 
@@ -372,9 +412,18 @@ INTRA_PRED_TEST(MSA, TestIntraPred8, aom_dc_predictor_8x8_msa,
 #if CONFIG_ALT_INTRA
 #define tm_pred_func aom_paeth_predictor_16x16_c
 #define smooth_pred_func aom_smooth_predictor_16x16_c
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_16x16_c
+#define smooth_h_pred_func aom_smooth_h_predictor_16x16_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
 #else
 #define tm_pred_func aom_tm_predictor_16x16_c
 #define smooth_pred_func NULL
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
 #endif  // CONFIG_ALT_INTRA
 INTRA_PRED_TEST(C, TestIntraPred16, aom_dc_predictor_16x16_c,
                 aom_dc_left_predictor_16x16_c, aom_dc_top_predictor_16x16_c,
@@ -382,9 +431,12 @@ INTRA_PRED_TEST(C, TestIntraPred16, aom_dc_predictor_16x16_c,
                 aom_h_predictor_16x16_c, aom_d45e_predictor_16x16_c,
                 aom_d135_predictor_16x16_c, aom_d117_predictor_16x16_c,
                 aom_d153_predictor_16x16_c, aom_d207e_predictor_16x16_c,
-                aom_d63e_predictor_16x16_c, tm_pred_func, smooth_pred_func)
+                aom_d63e_predictor_16x16_c, tm_pred_func, smooth_pred_func,
+                smooth_v_pred_func, smooth_h_pred_func)
 #undef tm_pred_func
 #undef smooth_pred_func
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
 
 #if HAVE_SSE2
 #if CONFIG_ALT_INTRA
@@ -397,20 +449,20 @@ INTRA_PRED_TEST(SSE2, TestIntraPred16, aom_dc_predictor_16x16_sse2,
                 aom_dc_top_predictor_16x16_sse2,
                 aom_dc_128_predictor_16x16_sse2, aom_v_predictor_16x16_sse2,
                 aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
 INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL,
                 NULL, NULL, NULL, aom_d153_predictor_16x16_ssse3, NULL, NULL,
-                NULL, NULL)
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSSE3
 
 #if HAVE_DSPR2
 INTRA_PRED_TEST(DSPR2, TestIntraPred16, aom_dc_predictor_16x16_dspr2, NULL,
                 NULL, NULL, NULL, aom_h_predictor_16x16_dspr2, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL)
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL)
 #endif  // HAVE_DSPR2
 
 #if HAVE_NEON
@@ -424,7 +476,7 @@ INTRA_PRED_TEST(NEON, TestIntraPred16, aom_dc_predictor_16x16_neon,
                 aom_dc_top_predictor_16x16_neon,
                 aom_dc_128_predictor_16x16_neon, aom_v_predictor_16x16_neon,
                 aom_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_NEON
 
@@ -438,7 +490,7 @@ INTRA_PRED_TEST(MSA, TestIntraPred16, aom_dc_predictor_16x16_msa,
                 aom_dc_left_predictor_16x16_msa, aom_dc_top_predictor_16x16_msa,
                 aom_dc_128_predictor_16x16_msa, aom_v_predictor_16x16_msa,
                 aom_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_MSA
 
@@ -448,9 +500,18 @@ INTRA_PRED_TEST(MSA, TestIntraPred16, aom_dc_predictor_16x16_msa,
 #if CONFIG_ALT_INTRA
 #define tm_pred_func aom_paeth_predictor_32x32_c
 #define smooth_pred_func aom_smooth_predictor_32x32_c
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_32x32_c
+#define smooth_h_pred_func aom_smooth_h_predictor_32x32_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
 #else
 #define tm_pred_func aom_tm_predictor_32x32_c
 #define smooth_pred_func NULL
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
 #endif  // CONFIG_ALT_INTRA
 INTRA_PRED_TEST(C, TestIntraPred32, aom_dc_predictor_32x32_c,
                 aom_dc_left_predictor_32x32_c, aom_dc_top_predictor_32x32_c,
@@ -458,9 +519,12 @@ INTRA_PRED_TEST(C, TestIntraPred32, aom_dc_predictor_32x32_c,
                 aom_h_predictor_32x32_c, aom_d45e_predictor_32x32_c,
                 aom_d135_predictor_32x32_c, aom_d117_predictor_32x32_c,
                 aom_d153_predictor_32x32_c, aom_d207e_predictor_32x32_c,
-                aom_d63e_predictor_32x32_c, tm_pred_func, smooth_pred_func)
+                aom_d63e_predictor_32x32_c, tm_pred_func, smooth_pred_func,
+                smooth_v_pred_func, smooth_h_pred_func)
 #undef tm_pred_func
 #undef smooth_pred_func
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
 
 #if HAVE_SSE2
 #if CONFIG_ALT_INTRA
@@ -473,14 +537,14 @@ INTRA_PRED_TEST(SSE2, TestIntraPred32, aom_dc_predictor_32x32_sse2,
                 aom_dc_top_predictor_32x32_sse2,
                 aom_dc_128_predictor_32x32_sse2, aom_v_predictor_32x32_sse2,
                 aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
 INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL,
                 NULL, NULL, NULL, aom_d153_predictor_32x32_ssse3, NULL, NULL,
-                NULL, NULL)
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSSE3
 
 #if HAVE_NEON
@@ -494,7 +558,7 @@ INTRA_PRED_TEST(NEON, TestIntraPred32, aom_dc_predictor_32x32_neon,
                 aom_dc_top_predictor_32x32_neon,
                 aom_dc_128_predictor_32x32_neon, aom_v_predictor_32x32_neon,
                 aom_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_NEON
 
@@ -508,7 +572,7 @@ INTRA_PRED_TEST(MSA, TestIntraPred32, aom_dc_predictor_32x32_msa,
                 aom_dc_left_predictor_32x32_msa, aom_dc_top_predictor_32x32_msa,
                 aom_dc_128_predictor_32x32_msa, aom_v_predictor_32x32_msa,
                 aom_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL)
+                tm_pred_func, NULL, NULL, NULL)
 #undef tm_pred_func
 #endif  // HAVE_MSA
 
diff --git a/third_party/aom/test/test_runner.cmake b/third_party/aom/test/test_runner.cmake
new file mode 100644
index 000000000..48ebaf570
--- /dev/null
+++ b/third_party/aom/test/test_runner.cmake
@@ -0,0 +1,20 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+if (NOT GTEST_TOTAL_SHARDS OR "${GTEST_SHARD_INDEX}" STREQUAL ""
+    OR NOT TEST_LIBAOM)
+  message(FATAL_ERROR
+          "The variables GTEST_SHARD_INDEX, GTEST_TOTAL_SHARDS and TEST_LIBAOM
+          must be defined.")
+endif ()
+
+set($ENV{GTEST_SHARD_INDEX} ${GTEST_SHARD_INDEX})
+set($ENV{GTEST_TOTAL_SHARDS} ${GTEST_TOTAL_SHARDS})
+execute_process(COMMAND ${TEST_LIBAOM})
diff --git a/third_party/aom/test/test_worker.cmake b/third_party/aom/test/test_worker.cmake
deleted file mode 100644
index fa1d58130..000000000
--- a/third_party/aom/test/test_worker.cmake
+++ /dev/null
@@ -1,49 +0,0 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-if (NOT AOM_ROOT OR NOT AOM_CONFIG_DIR)
-  message(FATAL_ERROR "AOM_ROOT AND AOM_CONFIG_DIR must be defined.")
-endif ()
-
-set(AOM_TEST_DATA_LIST "${AOM_ROOT}/test/test-data.sha1")
-set(AOM_TEST_DATA_URL "http://downloads.webmproject.org/test_data/libvpx")
-set(AOM_TEST_DATA_PATH "$ENV{LIBAOM_TEST_DATA_PATH}")
-
-include("${AOM_ROOT}/test/test_data_util.cmake")
-
-if (${AOM_TEST_DATA_PATH} STREQUAL "")
-  message(WARNING "Writing test data to ${AOM_CONFIG_DIR}, set "
-          "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.")
-  set(AOM_TEST_DATA_PATH "${AOM_CONFIG_DIR}")
-endif ()
-
-if (NOT EXISTS "${AOM_TEST_DATA_PATH}")
-  file(MAKE_DIRECTORY "${AOM_TEST_DATA_PATH}")
-endif ()
-
-make_test_data_lists("AOM_TEST_DATA_FILES" "AOM_TEST_DATA_CHECKSUMS")
-expand_test_file_paths("AOM_TEST_DATA_FILES" "${AOM_TEST_DATA_PATH}"
-                       "AOM_TEST_DATA_FILE_PATHS")
-expand_test_file_paths("AOM_TEST_DATA_FILES" "${AOM_TEST_DATA_URL}"
-                       "AOM_TEST_DATA_URLS")
-list(LENGTH AOM_TEST_DATA_FILES num_files)
-math(EXPR num_files "${num_files} - 1")
-
-foreach (file_num RANGE ${num_files})
-  list(GET AOM_TEST_DATA_FILES ${file_num} filename)
-  list(GET AOM_TEST_DATA_CHECKSUMS ${file_num} checksum)
-  list(GET AOM_TEST_DATA_FILE_PATHS ${file_num} filepath)
-  list(GET AOM_TEST_DATA_URLS ${file_num} url)
-
-  check_file("${filepath}" "${checksum}" "needs_download")
-  if (needs_download)
-    download_test_file("${url}" "${checksum}" "${filepath}")
-  endif ()
-endforeach ()
diff --git a/third_party/aom/test/variance_test.cc b/third_party/aom/test/variance_test.cc
index 5b1003ca7..c712f96e2 100644
--- a/third_party/aom/test/variance_test.cc
+++ b/third_party/aom/test/variance_test.cc
@@ -565,8 +565,8 @@ class SubpelVarianceTest
           aom_memalign(16, block_size_ * sizeof(uint16_t))));
       sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
           aom_memalign(16, block_size_ * sizeof(uint16_t))));
-      ref_ =
-          CONVERT_TO_BYTEPTR(new uint16_t[block_size_ + width_ + height_ + 1]);
+      ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
+          16, (block_size_ + width_ + height_ + 1) * sizeof(uint16_t)));
 #endif  // CONFIG_HIGHBITDEPTH
     }
     ASSERT_TRUE(src_ != NULL);
@@ -582,7 +582,7 @@ class SubpelVarianceTest
 #if CONFIG_HIGHBITDEPTH
     } else {
       aom_free(CONVERT_TO_SHORTPTR(src_));
-      delete[] CONVERT_TO_SHORTPTR(ref_);
+      aom_free(CONVERT_TO_SHORTPTR(ref_));
       aom_free(CONVERT_TO_SHORTPTR(sec_));
 #endif  // CONFIG_HIGHBITDEPTH
     }
@@ -1277,22 +1277,6 @@ INSTANTIATE_TEST_CASE_P(
         make_tuple(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0)));
 #endif  // HAVE_AVX2
 
-#if HAVE_MEDIA
-INSTANTIATE_TEST_CASE_P(MEDIA, AvxMseTest,
-                        ::testing::Values(MseParams(4, 4,
-                                                    &aom_mse16x16_media)));
-
-INSTANTIATE_TEST_CASE_P(
-    MEDIA, AvxVarianceTest,
-    ::testing::Values(VarianceParams(4, 4, &aom_variance16x16_media),
-                      VarianceParams(3, 3, &aom_variance8x8_media)));
-
-INSTANTIATE_TEST_CASE_P(
-    MEDIA, AvxSubpelVarianceTest,
-    ::testing::Values(make_tuple(4, 4, &aom_sub_pixel_variance16x16_media, 0),
-                      make_tuple(3, 3, &aom_sub_pixel_variance8x8_media, 0)));
-#endif  // HAVE_MEDIA
-
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(NEON, AvxSseTest,
                         ::testing::Values(SseParams(2, 2,
diff --git a/third_party/aom/test/warp_filter_test.cc b/third_party/aom/test/warp_filter_test.cc
index fd6608bfc..2e4e6c32b 100644
--- a/third_party/aom/test/warp_filter_test.cc
+++ b/third_party/aom/test/warp_filter_test.cc
@@ -22,12 +22,17 @@ using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
 
 namespace {
 
-TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(av1_warp_affine_sse2); }
+TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
 
-INSTANTIATE_TEST_CASE_P(SSE2, AV1WarpFilterTest,
-                        libaom_test::AV1WarpFilter::GetDefaultParams());
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse2));
 
-#if CONFIG_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_ssse3));
+
+#if CONFIG_HIGHBITDEPTH && HAVE_SSSE3
 TEST_P(AV1HighbdWarpFilterTest, CheckOutput) {
   RunCheckOutput(av1_highbd_warp_affine_ssse3);
 }
diff --git a/third_party/aom/test/warp_filter_test_util.cc b/third_party/aom/test/warp_filter_test_util.cc
index 1ce265b60..ac7518f47 100644
--- a/third_party/aom/test/warp_filter_test_util.cc
+++ b/third_party/aom/test/warp_filter_test_util.cc
@@ -13,23 +13,19 @@
 
 using std::tr1::tuple;
 using std::tr1::make_tuple;
-using std::vector;
-using libaom_test::ACMRandom;
-using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
-using libaom_test::AV1WarpFilter::WarpTestParam;
-#if CONFIG_HIGHBITDEPTH
-using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
-using libaom_test::AV1HighbdWarpFilter::HighbdWarpTestParam;
-#endif
-
-::testing::internal::ParamGenerator<WarpTestParam>
-libaom_test::AV1WarpFilter::GetDefaultParams() {
-  const WarpTestParam defaultParams[] = {
-    make_tuple(4, 4, 50000),  make_tuple(8, 8, 50000),
-    make_tuple(64, 64, 1000), make_tuple(4, 16, 20000),
-    make_tuple(32, 8, 10000),
+
+namespace libaom_test {
+
+namespace AV1WarpFilter {
+
+::testing::internal::ParamGenerator<WarpTestParam> BuildParams(
+    warp_affine_func filter) {
+  const WarpTestParam params[] = {
+    make_tuple(4, 4, 50000, filter),  make_tuple(8, 8, 50000, filter),
+    make_tuple(64, 64, 1000, filter), make_tuple(4, 16, 20000, filter),
+    make_tuple(32, 8, 10000, filter),
   };
-  return ::testing::ValuesIn(defaultParams);
+  return ::testing::ValuesIn(params);
 }
 
 AV1WarpFilterTest::~AV1WarpFilterTest() {}
@@ -84,6 +80,15 @@ void AV1WarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
         (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
       continue;
 
+    *alpha = ROUND_POWER_OF_TWO_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+    *beta = ROUND_POWER_OF_TWO_SIGNED(*beta, WARP_PARAM_REDUCE_BITS) *
+            (1 << WARP_PARAM_REDUCE_BITS);
+    *gamma = ROUND_POWER_OF_TWO_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+    *delta = ROUND_POWER_OF_TWO_SIGNED(*delta, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+
     // We have a valid model, so finish
     return;
   }
@@ -136,10 +141,12 @@ void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
   delete[] output;
   delete[] output2;
 }
+}  // namespace AV1WarpFilter
 
 #if CONFIG_HIGHBITDEPTH
-::testing::internal::ParamGenerator<HighbdWarpTestParam>
-libaom_test::AV1HighbdWarpFilter::GetDefaultParams() {
+namespace AV1HighbdWarpFilter {
+
+::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams() {
   const HighbdWarpTestParam defaultParams[] = {
     make_tuple(4, 4, 50000, 8),   make_tuple(8, 8, 50000, 8),
     make_tuple(64, 64, 1000, 8),  make_tuple(4, 16, 20000, 8),
@@ -207,6 +214,15 @@ void AV1HighbdWarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
         (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
       continue;
 
+    *alpha = ROUND_POWER_OF_TWO_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+    *beta = ROUND_POWER_OF_TWO_SIGNED(*beta, WARP_PARAM_REDUCE_BITS) *
+            (1 << WARP_PARAM_REDUCE_BITS);
+    *gamma = ROUND_POWER_OF_TWO_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+    *delta = ROUND_POWER_OF_TWO_SIGNED(*delta, WARP_PARAM_REDUCE_BITS) *
+             (1 << WARP_PARAM_REDUCE_BITS);
+
     // We have a valid model, so finish
     return;
   }
@@ -265,4 +281,6 @@ void AV1HighbdWarpFilterTest::RunCheckOutput(
   delete[] output;
   delete[] output2;
 }
+}  // namespace AV1HighbdWarpFilter
 #endif  // CONFIG_HIGHBITDEPTH
+}  // namespace libaom_test
diff --git a/third_party/aom/test/warp_filter_test_util.h b/third_party/aom/test/warp_filter_test_util.h
index 6a87e46d0..651a9f830 100644
--- a/third_party/aom/test/warp_filter_test_util.h
+++ b/third_party/aom/test/warp_filter_test_util.h
@@ -26,16 +26,18 @@ namespace libaom_test {
 
 namespace AV1WarpFilter {
 
-typedef void (*warp_affine_func)(int32_t *mat, uint8_t *ref, int width,
-                                 int height, int stride, uint8_t *pred,
-                                 int p_col, int p_row, int p_width,
-                                 int p_height, int p_stride, int subsampling_x,
-                                 int subsampling_y, int ref_frm, int16_t alpha,
-                                 int16_t beta, int16_t gamma, int16_t delta);
+typedef void (*warp_affine_func)(const int32_t *mat, const uint8_t *ref,
+                                 int width, int height, int stride,
+                                 uint8_t *pred, int p_col, int p_row,
+                                 int p_width, int p_height, int p_stride,
+                                 int subsampling_x, int subsampling_y,
+                                 int ref_frm, int16_t alpha, int16_t beta,
+                                 int16_t gamma, int16_t delta);
 
-typedef std::tr1::tuple<int, int, int> WarpTestParam;
+typedef std::tr1::tuple<int, int, int, warp_affine_func> WarpTestParam;
 
-::testing::internal::ParamGenerator<WarpTestParam> GetDefaultParams();
+::testing::internal::ParamGenerator<WarpTestParam> BuildParams(
+    warp_affine_func filter);
 
 class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParam> {
  public:
@@ -59,7 +61,7 @@ class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParam> {
 #if CONFIG_HIGHBITDEPTH
 namespace AV1HighbdWarpFilter {
 typedef void (*highbd_warp_affine_func)(
-    int32_t *mat, uint16_t *ref, int width, int height, int stride,
+    const int32_t *mat, const uint16_t *ref, int width, int height, int stride,
     uint16_t *pred, int p_col, int p_row, int p_width, int p_height,
     int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm,
     int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);