7 files changed, 51 insertions, 29 deletions
diff --git a/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c b/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
index 41b55c985..e001a1d70 100644
--- a/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
+++ b/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
@@ -11,6 +11,7 @@
 
 #include <immintrin.h>
 
+#include "aom_ports/msvc.h"
 #include "./aom_dsp_rtcd.h"
 
 // -----------------------------------------------------------------------------
diff --git a/third_party/aom/aom_ports/msvc.h b/third_party/aom/aom_ports/msvc.h
index 2d3ab9b65..5a41d29d2 100644
--- a/third_party/aom/aom_ports/msvc.h
+++ b/third_party/aom/aom_ports/msvc.h
@@ -43,5 +43,25 @@ static INLINE long lroundf(float x) {
 }
 #endif  // _MSC_VER < 1800
 
+#if HAVE_AVX
+#include <immintrin.h>
+// Note:
+// _mm256_insert_epi16 intrinsics is available from vs2017.
+// We define this macro for vs2015 and earlier. The
+// intrinsics used here are in vs2015 document:
+// https://msdn.microsoft.com/en-us/library/hh977022.aspx
+// Input parameters:
+// a: __m256i,
+// d: int16_t,
+// indx: imm8 (0 - 15)
+#if _MSC_VER <= 1900
+#define _mm256_insert_epi16(a, d, indx)                                      \
+  _mm256_insertf128_si256(                                                   \
+      a,                                                                     \
+      _mm_insert_epi16(_mm256_extractf128_si256(a, indx >> 3), d, indx % 8), \
+      indx >> 3)
+#endif  // _MSC_VER <= 1900
+#endif  // HAVE_AVX
+
 #endif  // _MSC_VER
 #endif  // AOM_PORTS_MSVC_H_
diff --git a/third_party/aom/av1/common/reconinter.c b/third_party/aom/av1/common/reconinter.c
index d7e39b45c..a1a22a0af 100644
--- a/third_party/aom/av1/common/reconinter.c
+++ b/third_party/aom/av1/common/reconinter.c
@@ -1728,9 +1728,9 @@ void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
   av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
 }
 
-void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
-                          BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
-                          int mi_row, int mi_col) {
+void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
+                          const YV12_BUFFER_CONFIG *src, int mi_row,
+                          int mi_col) {
   const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
                                      src->uv_crop_width };
   const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
diff --git a/third_party/aom/av1/common/reconinter.h b/third_party/aom/av1/common/reconinter.h
index fd69f9db3..0c3333339 100644
--- a/third_party/aom/av1/common/reconinter.h
+++ b/third_party/aom/av1/common/reconinter.h
@@ -446,9 +446,9 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize,
   dst->stride = stride;
 }
 
-void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
-                          BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
-                          int mi_row, int mi_col);
+void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
+                          const YV12_BUFFER_CONFIG *src, int mi_row,
+                          int mi_col);
 
 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
diff --git a/third_party/aom/av1/common/thread_common.c b/third_party/aom/av1/common/thread_common.c
index eec8629ff..4c9fa6962 100644
--- a/third_party/aom/av1/common/thread_common.c
+++ b/third_party/aom/av1/common/thread_common.c
@@ -86,7 +86,7 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
 
 #if !CONFIG_EXT_PARTITION_TYPES
 static INLINE enum lf_path get_loop_filter_path(
-    int y_only, struct macroblockd_plane planes[MAX_MB_PLANE]) {
+    int y_only, struct macroblockd_plane *planes) {
   if (y_only)
     return LF_PATH_444;
   else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
@@ -98,7 +98,7 @@ static INLINE enum lf_path get_loop_filter_path(
 }
 
 static INLINE void loop_filter_block_plane_ver(
-    AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
+    AV1_COMMON *cm, struct macroblockd_plane *planes, int plane,
     MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
     LOOP_FILTER_MASK *lfm) {
   if (plane == 0) {
@@ -120,7 +120,7 @@ static INLINE void loop_filter_block_plane_ver(
 }
 
 static INLINE void loop_filter_block_plane_hor(
-    AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
+    AV1_COMMON *cm, struct macroblockd_plane *planes, int plane,
     MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
     LOOP_FILTER_MASK *lfm) {
   if (plane == 0) {
@@ -286,10 +286,9 @@ static int loop_filter_row_worker(AV1LfSync *const lf_sync,
 #endif  //  CONFIG_PARALLEL_DEBLOCKING
 
 static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                                struct macroblockd_plane planes[MAX_MB_PLANE],
-                                int start, int stop, int y_only,
-                                AVxWorker *workers, int nworkers,
-                                AV1LfSync *lf_sync) {
+                                struct macroblockd_plane *planes, int start,
+                                int stop, int y_only, AVxWorker *workers,
+                                int nworkers, AV1LfSync *lf_sync) {
 #if CONFIG_EXT_PARTITION
   printf(
       "STOPPING: This code has not been modified to work with the "
@@ -415,7 +414,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
 }
 
 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                              struct macroblockd_plane planes[MAX_MB_PLANE],
+                              struct macroblockd_plane *planes,
                               int frame_filter_level,
 #if CONFIG_LOOPFILTER_LEVEL
                               int frame_filter_level_r,
diff --git a/third_party/aom/av1/common/thread_common.h b/third_party/aom/av1/common/thread_common.h
index 6d118e60b..7eddc662c 100644
--- a/third_party/aom/av1/common/thread_common.h
+++ b/third_party/aom/av1/common/thread_common.h
@@ -49,7 +49,7 @@ void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
 
 // Multi-threaded loopfilter that uses the tile threads.
 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
-                              struct macroblockd_plane planes[MAX_MB_PLANE],
+                              struct macroblockd_plane *planes,
                               int frame_filter_level,
 #if CONFIG_LOOPFILTER_LEVEL
                               int frame_filter_level_r,
diff --git a/third_party/aom/av1/common/x86/selfguided_sse4.c b/third_party/aom/av1/common/x86/selfguided_sse4.c
index 4006b8518..9de9177c1 100644
--- a/third_party/aom/av1/common/x86/selfguided_sse4.c
+++ b/third_party/aom/av1/common/x86/selfguided_sse4.c
@@ -10,9 +10,11 @@
    av1_selfguided_restoration)
 */
 static void calc_block(__m128i sum, __m128i sum_sq, __m128i n,
-                       __m128i one_over_n, __m128i s, int bit_depth, int idx,
-                       int32_t *A, int32_t *B) {
+                       __m128i *one_over_n_, __m128i *s_, int bit_depth,
+                       int idx, int32_t *A, int32_t *B) {
   __m128i a, b, p;
+  __m128i one_over_n = *one_over_n_;
+  __m128i s = *s_;
 #if CONFIG_HIGHBITDEPTH
   if (bit_depth > 8) {
     __m128i rounding_a = _mm_set1_epi32((1 << (2 * (bit_depth - 8))) >> 1);
@@ -147,7 +149,7 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
     __m128i s = _mm_set_epi32(
         sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1],
         sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][2 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
                B);
 
     n = _mm_set1_epi32(3 * h);
@@ -178,8 +180,8 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
                                              _mm_alignr_epi8(b2, b1, 8)));
       sum_sq_ = _mm_add_epi32(a1, _mm_add_epi32(_mm_alignr_epi8(a2, a1, 4),
                                                 _mm_alignr_epi8(a2, a1, 8)));
-      calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
-                 A, B);
+      calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+                 i * buf_stride + j, A, B);
     }
     __m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 3]);
     __m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 3]);
@@ -227,7 +229,7 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
     s = _mm_set_epi32(
         sgrproj_mtable[eps - 1][2 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1],
         sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
                A, B);
   }
 }
@@ -342,7 +344,7 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
     __m128i s = _mm_set_epi32(
         sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1],
         sgrproj_mtable[eps - 1][4 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
                B);
 
     // Re-align a1 and b1 so that they start at index i * buf_stride + 2
@@ -372,8 +374,8 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
                                           _mm_alignr_epi8(a2, a1, 8))),
           _mm_add_epi32(_mm_alignr_epi8(a2, a1, 12), a2));
 
-      calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
-                 A, B);
+      calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+                 i * buf_stride + j, A, B);
     }
     // If the width is not a multiple of 4, we need to reset j to width - 4
     // and adjust a1, a2, b1, b2 so that the loop invariant above is maintained
@@ -428,7 +430,7 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
     s = _mm_set_epi32(
         sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][4 * h - 1],
         sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
                A, B);
   }
 }
@@ -562,7 +564,7 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
     __m128i s = _mm_set_epi32(
         sgrproj_mtable[eps - 1][7 * h - 1], sgrproj_mtable[eps - 1][6 * h - 1],
         sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][4 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
                B);
 
     // Re-align a1 and b1 so that they start at index i * buf_stride + 1
@@ -599,8 +601,8 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
           _mm_add_epi32(_mm_add_epi32(a2, _mm_alignr_epi8(a3, a2, 4)),
                         _mm_alignr_epi8(a3, a2, 8)));
 
-      calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
-                 A, B);
+      calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+                 i * buf_stride + j, A, B);
     }
     __m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 1]);
     __m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 1]);
@@ -657,7 +659,7 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
     s = _mm_set_epi32(
         sgrproj_mtable[eps - 1][4 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1],
         sgrproj_mtable[eps - 1][6 * h - 1], sgrproj_mtable[eps - 1][7 * h - 1]);
-    calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+    calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
                A, B);
   }
 }