1 files changed, 159 insertions, 286 deletions
diff --git a/third_party/aom/av1/common/mvref_common.h b/third_party/aom/av1/common/mvref_common.h
index 348887e43..716b4a247 100644
--- a/third_party/aom/av1/common/mvref_common.h
+++ b/third_party/aom/av1/common/mvref_common.h
@@ -18,103 +18,36 @@
 extern "C" {
 #endif
 
-#define MVREF_NEIGHBOURS 9
-#define MVREF_ROWS 3
-#define MVREF_COLS 4
+#define MVREF_ROW_COLS 3
+
+// Set the upper limit of the motion vector component magnitude.
+// This would make a motion vector fit in 26 bits. Plus 3 bits for the
+// reference frame index. A tuple of motion vector can hence be stored within
+// 32 bit range for efficient load/store operations.
+#define REFMVS_LIMIT ((1 << 12) - 1)
 
 typedef struct position {
   int row;
   int col;
 } POSITION;
 
-typedef enum {
-  BOTH_ZERO = 0,
-  ZERO_PLUS_PREDICTED = 1,
-  BOTH_PREDICTED = 2,
-  NEW_PLUS_NON_INTRA = 3,
-  BOTH_NEW = 4,
-  INTRA_PLUS_NON_INTRA = 5,
-  BOTH_INTRA = 6,
-  INVALID_CASE = 9
-} motion_vector_context;
-
-// This is used to figure out a context for the ref blocks. The code flattens
-// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
-// adding 9 for each intra block, 3 for each zero mv and 1 for each new
-// motion vector. This single number is then converted into a context
-// with a single lookup ( counter_to_context ).
-static const int mode_2_counter[] = {
-  9,  // DC_PRED
-  9,  // V_PRED
-  9,  // H_PRED
-  9,  // D45_PRED
-  9,  // D135_PRED
-  9,  // D117_PRED
-  9,  // D153_PRED
-  9,  // D207_PRED
-  9,  // D63_PRED
-  9,  // SMOOTH_PRED
-#if CONFIG_SMOOTH_HV
-  9,    // SMOOTH_V_PRED
-  9,    // SMOOTH_H_PRED
-#endif  // CONFIG_SMOOTH_HV
-  9,    // TM_PRED
-  0,    // NEARESTMV
-  0,    // NEARMV
-  3,    // ZEROMV
-  1,    // NEWMV
-#if CONFIG_COMPOUND_SINGLEREF
-  0,    // SR_NEAREST_NEARMV
-        //  1,    // SR_NEAREST_NEWMV
-  1,    // SR_NEAR_NEWMV
-  3,    // SR_ZERO_NEWMV
-  1,    // SR_NEW_NEWMV
-#endif  // CONFIG_COMPOUND_SINGLEREF
-  0,    // NEAREST_NEARESTMV
-  0,    // NEAR_NEARMV
-  1,    // NEAREST_NEWMV
-  1,    // NEW_NEARESTMV
-  1,    // NEAR_NEWMV
-  1,    // NEW_NEARMV
-  3,    // ZERO_ZEROMV
-  1,    // NEW_NEWMV
-};
+// clamp_mv_ref
+#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
 
-// There are 3^3 different combinations of 3 counts that can be either 0,1 or
-// 2. However the actual count can never be greater than 2 so the highest
-// counter we need is 18. 9 is an invalid counter that's never used.
-static const int counter_to_context[19] = {
-  BOTH_PREDICTED,        // 0
-  NEW_PLUS_NON_INTRA,    // 1
-  BOTH_NEW,              // 2
-  ZERO_PLUS_PREDICTED,   // 3
-  NEW_PLUS_NON_INTRA,    // 4
-  INVALID_CASE,          // 5
-  BOTH_ZERO,             // 6
-  INVALID_CASE,          // 7
-  INVALID_CASE,          // 8
-  INTRA_PLUS_NON_INTRA,  // 9
-  INTRA_PLUS_NON_INTRA,  // 10
-  INVALID_CASE,          // 11
-  INTRA_PLUS_NON_INTRA,  // 12
-  INVALID_CASE,          // 13
-  INVALID_CASE,          // 14
-  INVALID_CASE,          // 15
-  INVALID_CASE,          // 16
-  INVALID_CASE,          // 17
-  BOTH_INTRA             // 18
-};
+static INLINE int get_relative_dist(const AV1_COMMON *cm, int a, int b) {
+  if (!cm->seq_params.enable_order_hint) return 0;
 
-static const int idx_n_column_to_subblock[4][2] = {
-  { 1, 2 }, { 1, 3 }, { 3, 2 }, { 3, 3 }
-};
+  const int bits = cm->seq_params.order_hint_bits_minus_1 + 1;
 
-// clamp_mv_ref
-#if CONFIG_EXT_PARTITION
-#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
-#else
-#define MV_BORDER (8 << 3)  // Allow 8 pels in 1/8th pel units
-#endif                      // CONFIG_EXT_PARTITION
+  assert(bits >= 1);
+  assert(a >= 0 && a < (1 << bits));
+  assert(b >= 0 && b < (1 << bits));
+
+  int diff = a - b;
+  int m = 1 << (bits - 1);
+  diff = (diff & (m - 1)) - (diff & m);
+  return diff;
+}
 
 static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
   clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
@@ -125,19 +58,16 @@ static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
 
 // This function returns either the appropriate sub block or block's mv
 // on whether the block_size < 8x8 and we have check_sub_blocks set.
-static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
-                                      int search_col, int block_idx) {
+static INLINE int_mv get_sub_block_mv(const MB_MODE_INFO *candidate,
+                                      int which_mv, int search_col) {
   (void)search_col;
-  (void)block_idx;
-  return candidate->mbmi.mv[which_mv];
+  return candidate->mv[which_mv];
 }
 
-static INLINE int_mv get_sub_block_pred_mv(const MODE_INFO *candidate,
-                                           int which_mv, int search_col,
-                                           int block_idx) {
+static INLINE int_mv get_sub_block_pred_mv(const MB_MODE_INFO *candidate,
+                                           int which_mv, int search_col) {
   (void)search_col;
-  (void)block_idx;
-  return candidate->mbmi.mv[which_mv];
+  return candidate->mv[which_mv];
 }
 
 // Performs mv sign inversion if indicated by the reference frame combination.
@@ -152,48 +82,11 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
   return mv;
 }
 
-#define CLIP_IN_ADD(mv, bw, bh, xd) clamp_mv_ref(mv, bw, bh, xd)
-
-// This macro is used to add a motion vector mv_ref list if it isn't
-// already in the list.  If it's the second motion vector it will also
-// skip all additional processing and jump to done!
-#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done)      \
-  do {                                                                       \
-    (mv_ref_list)[(refmv_count)] = (mv);                                     \
-    CLIP_IN_ADD(&(mv_ref_list)[(refmv_count)].as_mv, (bw), (bh), (xd));      \
-    if (refmv_count && (mv_ref_list)[1].as_int != (mv_ref_list)[0].as_int) { \
-      (refmv_count) = 2;                                                     \
-      goto Done;                                                             \
-    }                                                                        \
-    (refmv_count) = 1;                                                       \
-  } while (0)
-
-// If either reference frame is different, not INTRA, and they
-// are different from each other scale and add the mv to our list.
-#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \
-                                 mv_ref_list, bw, bh, xd, Done)               \
-  do {                                                                        \
-    if (is_inter_block(mbmi)) {                                               \
-      if ((mbmi)->ref_frame[0] != ref_frame)                                  \
-        ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias),        \
-                        refmv_count, mv_ref_list, bw, bh, xd, Done);          \
-      if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != ref_frame)          \
-        ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias),        \
-                        refmv_count, mv_ref_list, bw, bh, xd, Done);          \
-    }                                                                         \
-  } while (0)
-
 // Checks that the given mi_row, mi_col and search point
 // are inside the borders of the tile.
 static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
-                            int mi_rows, const AV1_COMMON *cm,
-                            const POSITION *mi_pos) {
-#if CONFIG_DEPENDENT_HORZTILES
-  const int dependent_horz_tile_flag = cm->dependent_horz_tiles;
-#else
+                            int mi_rows, const POSITION *mi_pos) {
   const int dependent_horz_tile_flag = 0;
-  (void)cm;
-#endif
   if (dependent_horz_tile_flag && !tile->tg_horz_boundary) {
     return !(mi_row + mi_pos->row < 0 ||
              mi_col + mi_pos->col < tile->mi_col_start ||
@@ -208,14 +101,8 @@ static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
 }
 
 static INLINE int find_valid_row_offset(const TileInfo *const tile, int mi_row,
-                                        int mi_rows, const AV1_COMMON *cm,
-                                        int row_offset) {
-#if CONFIG_DEPENDENT_HORZTILES
-  const int dependent_horz_tile_flag = cm->dependent_horz_tiles;
-#else
+                                        int mi_rows, int row_offset) {
   const int dependent_horz_tile_flag = 0;
-  (void)cm;
-#endif
   if (dependent_horz_tile_flag && !tile->tg_horz_boundary)
     return clamp(row_offset, -mi_row, mi_rows - mi_row - 1);
   else
@@ -229,87 +116,49 @@ static INLINE int find_valid_col_offset(const TileInfo *const tile, int mi_col,
                tile->mi_col_end - mi_col - 1);
 }
 
-static INLINE void lower_mv_precision(MV *mv, int allow_hp
-#if CONFIG_AMVR
-                                      ,
-                                      int is_integer
-#endif
-                                      ) {
-#if CONFIG_AMVR
+static INLINE void lower_mv_precision(MV *mv, int allow_hp, int is_integer) {
   if (is_integer) {
     integer_mv_precision(mv);
   } else {
-#endif
     if (!allow_hp) {
       if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
       if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
     }
-#if CONFIG_AMVR
   }
-#endif
-}
-
-static INLINE uint8_t av1_get_pred_diff_ctx(const int_mv pred_mv,
-                                            const int_mv this_mv) {
-  if (abs(this_mv.as_mv.row - pred_mv.as_mv.row) <= 4 &&
-      abs(this_mv.as_mv.col - pred_mv.as_mv.col) <= 4)
-    return 2;
-  else
-    return 1;
-}
-
-static INLINE int av1_nmv_ctx(const uint8_t ref_mv_count,
-                              const CANDIDATE_MV *ref_mv_stack, int ref,
-                              int ref_mv_idx) {
-  if (ref_mv_stack[ref_mv_idx].weight >= REF_CAT_LEVEL && ref_mv_count > 0)
-    return ref_mv_stack[ref_mv_idx].pred_diff[ref];
-
-  return 0;
 }
 
-#if CONFIG_EXT_COMP_REFS
-static INLINE int8_t av1_uni_comp_ref_idx(const MV_REFERENCE_FRAME *const rf) {
+static INLINE int8_t get_uni_comp_ref_idx(const MV_REFERENCE_FRAME *const rf) {
   // Single ref pred
   if (rf[1] <= INTRA_FRAME) return -1;
 
   // Bi-directional comp ref pred
   if ((rf[0] < BWDREF_FRAME) && (rf[1] >= BWDREF_FRAME)) return -1;
 
-  for (int8_t ref_idx = 0; ref_idx < UNIDIR_COMP_REFS; ++ref_idx) {
+  for (int8_t ref_idx = 0; ref_idx < TOTAL_UNIDIR_COMP_REFS; ++ref_idx) {
     if (rf[0] == comp_ref0(ref_idx) && rf[1] == comp_ref1(ref_idx))
       return ref_idx;
   }
   return -1;
 }
-#endif  // CONFIG_EXT_COMP_REFS
 
 static INLINE int8_t av1_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
   if (rf[1] > INTRA_FRAME) {
-#if CONFIG_EXT_COMP_REFS
-    int8_t uni_comp_ref_idx = av1_uni_comp_ref_idx(rf);
-#if !USE_UNI_COMP_REFS
-    // NOTE: uni-directional comp refs disabled
-    assert(uni_comp_ref_idx < 0);
-#endif  // !USE_UNI_COMP_REFS
+    const int8_t uni_comp_ref_idx = get_uni_comp_ref_idx(rf);
     if (uni_comp_ref_idx >= 0) {
-      assert((TOTAL_REFS_PER_FRAME + FWD_REFS * BWD_REFS + uni_comp_ref_idx) <
+      assert((REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx) <
              MODE_CTX_REF_FRAMES);
-      return TOTAL_REFS_PER_FRAME + FWD_REFS * BWD_REFS + uni_comp_ref_idx;
+      return REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx;
     } else {
-#endif  // CONFIG_EXT_COMP_REFS
-      return TOTAL_REFS_PER_FRAME + FWD_RF_OFFSET(rf[0]) +
+      return REF_FRAMES + FWD_RF_OFFSET(rf[0]) +
              BWD_RF_OFFSET(rf[1]) * FWD_REFS;
-#if CONFIG_EXT_COMP_REFS
     }
-#endif  // CONFIG_EXT_COMP_REFS
   }
 
   return rf[0];
 }
 
 // clang-format off
-static MV_REFERENCE_FRAME ref_frame_map[COMP_REFS][2] = {
-#if CONFIG_EXT_REFS
+static MV_REFERENCE_FRAME ref_frame_map[TOTAL_COMP_REFS][2] = {
   { LAST_FRAME, BWDREF_FRAME },  { LAST2_FRAME, BWDREF_FRAME },
   { LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME },
 
@@ -317,58 +166,51 @@ static MV_REFERENCE_FRAME ref_frame_map[COMP_REFS][2] = {
   { LAST3_FRAME, ALTREF2_FRAME }, { GOLDEN_FRAME, ALTREF2_FRAME },
 
   { LAST_FRAME, ALTREF_FRAME },  { LAST2_FRAME, ALTREF_FRAME },
-  { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
-
-  // TODO(zoeliu): Temporarily disable uni-directional comp refs
-#if CONFIG_EXT_COMP_REFS
-  , { LAST_FRAME, LAST2_FRAME }, { LAST_FRAME, LAST3_FRAME },
-  { LAST_FRAME, GOLDEN_FRAME }, { BWDREF_FRAME, ALTREF_FRAME }
-  // TODO(zoeliu): When ALTREF2 is enabled, we may add:
-  //               {BWDREF_FRAME, ALTREF2_FRAME}
-#endif  // CONFIG_EXT_COMP_REFS
-#else  // !CONFIG_EXT_REFS
-  { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
-#endif  // CONFIG_EXT_REFS
+  { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
+
+  { LAST_FRAME, LAST2_FRAME }, { LAST_FRAME, LAST3_FRAME },
+  { LAST_FRAME, GOLDEN_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
+
+  // NOTE: Following reference frame pairs are not supported to be explicitly
+  //       signalled, but they are possibly chosen by the use of skip_mode,
+  //       which may use the most recent one-sided reference frame pair.
+  { LAST2_FRAME, LAST3_FRAME }, { LAST2_FRAME, GOLDEN_FRAME },
+  { LAST3_FRAME, GOLDEN_FRAME }, {BWDREF_FRAME, ALTREF2_FRAME},
+  { ALTREF2_FRAME, ALTREF_FRAME }
 };
 // clang-format on
 
 static INLINE void av1_set_ref_frame(MV_REFERENCE_FRAME *rf,
                                      int8_t ref_frame_type) {
-  if (ref_frame_type >= TOTAL_REFS_PER_FRAME) {
-    rf[0] = ref_frame_map[ref_frame_type - TOTAL_REFS_PER_FRAME][0];
-    rf[1] = ref_frame_map[ref_frame_type - TOTAL_REFS_PER_FRAME][1];
+  if (ref_frame_type >= REF_FRAMES) {
+    rf[0] = ref_frame_map[ref_frame_type - REF_FRAMES][0];
+    rf[1] = ref_frame_map[ref_frame_type - REF_FRAMES][1];
   } else {
     rf[0] = ref_frame_type;
     rf[1] = NONE_FRAME;
-#if CONFIG_INTRABC
     assert(ref_frame_type > NONE_FRAME);
-#else
-    assert(ref_frame_type > INTRA_FRAME);
-#endif
-    assert(ref_frame_type < TOTAL_REFS_PER_FRAME);
   }
 }
 
+static uint16_t compound_mode_ctx_map[3][COMP_NEWMV_CTXS] = {
+  { 0, 1, 1, 1, 1 },
+  { 1, 2, 3, 4, 4 },
+  { 4, 4, 5, 6, 7 },
+};
+
 static INLINE int16_t av1_mode_context_analyzer(
-    const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf,
-    BLOCK_SIZE bsize, int block) {
-  int16_t mode_ctx = 0;
-  int8_t ref_frame_type = av1_ref_frame_type(rf);
-
-  if (block >= 0) {
-    mode_ctx = mode_context[rf[0]] & 0x00ff;
-#if !CONFIG_CB4X4
-    if (block > 0 && bsize < BLOCK_8X8 && bsize > BLOCK_4X4)
-      mode_ctx |= (1 << SKIP_NEARESTMV_SUB8X8_OFFSET);
-#else
-    (void)block;
-    (void)bsize;
-#endif
+    const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf) {
+  const int8_t ref_frame = av1_ref_frame_type(rf);
 
-    return mode_ctx;
-  }
+  if (rf[1] <= INTRA_FRAME) return mode_context[ref_frame];
+
+  const int16_t newmv_ctx = mode_context[ref_frame] & NEWMV_CTX_MASK;
+  const int16_t refmv_ctx =
+      (mode_context[ref_frame] >> REFMV_OFFSET) & REFMV_CTX_MASK;
 
-  return mode_context[ref_frame_type];
+  const int16_t comp_ctx = compound_mode_ctx_map[refmv_ctx >> 1][AOMMIN(
+      newmv_ctx, COMP_NEWMV_CTXS - 1)];
+  return comp_ctx;
 }
 
 static INLINE uint8_t av1_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
@@ -379,92 +221,99 @@ static INLINE uint8_t av1_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
 
   if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
       ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
-    return 2;
+    return 1;
 
   if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
       ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
-    return 3;
+    return 2;
 
   return 0;
 }
 
-#if CONFIG_FRAME_MARKER
 void av1_setup_frame_buf_refs(AV1_COMMON *cm);
-#if CONFIG_FRAME_SIGN_BIAS
 void av1_setup_frame_sign_bias(AV1_COMMON *cm);
-#endif  // CONFIG_FRAME_SIGN_BIAS
-#if CONFIG_MFMV
+void av1_setup_skip_mode_allowed(AV1_COMMON *cm);
 void av1_setup_motion_field(AV1_COMMON *cm);
-#endif  // CONFIG_MFMV
-#endif  // CONFIG_FRAME_MARKER
+void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx, int gld_map_idx);
+
+static INLINE void av1_collect_neighbors_ref_counts(MACROBLOCKD *const xd) {
+  av1_zero(xd->neighbors_ref_counts);
+
+  uint8_t *const ref_counts = xd->neighbors_ref_counts;
+
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int above_in_image = xd->up_available;
+  const int left_in_image = xd->left_available;
+
+  // Above neighbor
+  if (above_in_image && is_inter_block(above_mbmi)) {
+    ref_counts[above_mbmi->ref_frame[0]]++;
+    if (has_second_ref(above_mbmi)) {
+      ref_counts[above_mbmi->ref_frame[1]]++;
+    }
+  }
+
+  // Left neighbor
+  if (left_in_image && is_inter_block(left_mbmi)) {
+    ref_counts[left_mbmi->ref_frame[0]]++;
+    if (has_second_ref(left_mbmi)) {
+      ref_counts[left_mbmi->ref_frame[1]]++;
+    }
+  }
+}
 
-void av1_copy_frame_mvs(const AV1_COMMON *const cm, MODE_INFO *mi, int mi_row,
-                        int mi_col, int x_mis, int y_mis);
+void av1_copy_frame_mvs(const AV1_COMMON *const cm, MB_MODE_INFO *mi,
+                        int mi_row, int mi_col, int x_mis, int y_mis);
 
-typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
 void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
-                      uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
-                      int16_t *compound_mode_context, int_mv *mv_ref_list,
-                      int mi_row, int mi_col, find_mv_refs_sync sync,
-                      void *const data, int16_t *mode_context);
+                      MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                      uint8_t ref_mv_count[MODE_CTX_REF_FRAMES],
+                      CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
+                      int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES],
+                      int_mv *global_mvs, int mi_row, int mi_col,
+                      int16_t *mode_context);
 
 // check a list of motion vectors by sad score using a number rows of pixels
 // above and a number cols of pixels in the left to select the one with best
 // score to use as ref motion vector
-#if CONFIG_AMVR
 void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
                            int_mv *near_mv, int is_integer);
-#else
-void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
-                           int_mv *near_mv);
-#endif
 
-void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                   int block, int ref, int mi_row, int mi_col,
-                                   CANDIDATE_MV *ref_mv_stack,
-                                   uint8_t *ref_mv_count, int_mv *mv_list,
-                                   int_mv *nearest_mv, int_mv *near_mv);
-
-// This function keeps a mode count for a given MB/SB
-void av1_update_mv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                           MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
-                           int_mv *mv_ref_list, int block, int mi_row,
-                           int mi_col, int16_t *mode_context);
-
-#if CONFIG_WARPED_MOTION
-#if WARPED_MOTION_SORT_SAMPLES
-int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len);
-int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
-                int *pts, int *pts_inref, int *pts_mv);
-#else
+int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize);
 int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
                 int *pts, int *pts_inref);
-#endif  // WARPED_MOTION_SORT_SAMPLES
-#endif  // CONFIG_WARPED_MOTION
 
-#if CONFIG_INTRABC
-static INLINE void av1_find_ref_dv(int_mv *ref_dv, int mi_row, int mi_col) {
-  // TODO(aconverse@google.com): Handle tiles and such
+#define INTRABC_DELAY_PIXELS 256  //  Delay of 256 pixels
+#define INTRABC_DELAY_SB64 (INTRABC_DELAY_PIXELS / 64)
+#define USE_WAVE_FRONT 1  // Use only top left area of frame for reference.
+
+static INLINE void av1_find_ref_dv(int_mv *ref_dv, const TileInfo *const tile,
+                                   int mib_size, int mi_row, int mi_col) {
   (void)mi_col;
-  if (mi_row < MAX_MIB_SIZE) {
+  if (mi_row - mib_size < tile->mi_row_start) {
     ref_dv->as_mv.row = 0;
-    ref_dv->as_mv.col = -MI_SIZE * MAX_MIB_SIZE;
+    ref_dv->as_mv.col = -MI_SIZE * mib_size - INTRABC_DELAY_PIXELS;
   } else {
-    ref_dv->as_mv.row = -MI_SIZE * MAX_MIB_SIZE;
+    ref_dv->as_mv.row = -MI_SIZE * mib_size;
     ref_dv->as_mv.col = 0;
   }
+  ref_dv->as_mv.row *= 8;
+  ref_dv->as_mv.col *= 8;
 }
 
-static INLINE int is_dv_valid(const MV dv, const TileInfo *const tile,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize) {
+static INLINE int av1_is_dv_valid(const MV dv, const AV1_COMMON *cm,
+                                  const MACROBLOCKD *xd, int mi_row, int mi_col,
+                                  BLOCK_SIZE bsize, int mib_size_log2) {
   const int bw = block_size_wide[bsize];
   const int bh = block_size_high[bsize];
   const int SCALE_PX_TO_MV = 8;
   // Disallow subpixel for now
   // SUBPEL_MASK is not the correct scale
-  if ((dv.row & (SCALE_PX_TO_MV - 1) || dv.col & (SCALE_PX_TO_MV - 1)))
+  if (((dv.row & (SCALE_PX_TO_MV - 1)) || (dv.col & (SCALE_PX_TO_MV - 1))))
     return 0;
+
+  const TileInfo *const tile = &xd->tile;
   // Is the source top-left inside the current tile?
   const int src_top_edge = mi_row * MI_SIZE * SCALE_PX_TO_MV + dv.row;
   const int tile_top_edge = tile->mi_row_start * MI_SIZE * SCALE_PX_TO_MV;
@@ -479,20 +328,44 @@ static INLINE int is_dv_valid(const MV dv, const TileInfo *const tile,
   const int src_right_edge = (mi_col * MI_SIZE + bw) * SCALE_PX_TO_MV + dv.col;
   const int tile_right_edge = tile->mi_col_end * MI_SIZE * SCALE_PX_TO_MV;
   if (src_right_edge > tile_right_edge) return 0;
-  // Is the bottom right within an already coded SB?
-  const int active_sb_top_edge =
-      (mi_row & ~MAX_MIB_MASK) * MI_SIZE * SCALE_PX_TO_MV;
-  const int active_sb_bottom_edge =
-      ((mi_row & ~MAX_MIB_MASK) + MAX_MIB_SIZE) * MI_SIZE * SCALE_PX_TO_MV;
-  const int active_sb_left_edge =
-      (mi_col & ~MAX_MIB_MASK) * MI_SIZE * SCALE_PX_TO_MV;
-  if (src_bottom_edge > active_sb_bottom_edge) return 0;
-  if (src_bottom_edge > active_sb_top_edge &&
-      src_right_edge > active_sb_left_edge)
+
+  // Special case for sub 8x8 chroma cases, to prevent referring to chroma
+  // pixels outside current tile.
+  for (int plane = 1; plane < av1_num_planes(cm); ++plane) {
+    const struct macroblockd_plane *const pd = &xd->plane[plane];
+    if (is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
+                            pd->subsampling_y)) {
+      if (bw < 8 && pd->subsampling_x)
+        if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0;
+      if (bh < 8 && pd->subsampling_y)
+        if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0;
+    }
+  }
+
+  // Is the bottom right within an already coded SB? Also consider additional
+  // constraints to facilitate HW decoder.
+  const int max_mib_size = 1 << mib_size_log2;
+  const int active_sb_row = mi_row >> mib_size_log2;
+  const int active_sb64_col = (mi_col * MI_SIZE) >> 6;
+  const int sb_size = max_mib_size * MI_SIZE;
+  const int src_sb_row = ((src_bottom_edge >> 3) - 1) / sb_size;
+  const int src_sb64_col = ((src_right_edge >> 3) - 1) >> 6;
+  const int total_sb64_per_row =
+      ((tile->mi_col_end - tile->mi_col_start - 1) >> 4) + 1;
+  const int active_sb64 = active_sb_row * total_sb64_per_row + active_sb64_col;
+  const int src_sb64 = src_sb_row * total_sb64_per_row + src_sb64_col;
+  if (src_sb64 >= active_sb64 - INTRABC_DELAY_SB64) return 0;
+
+#if USE_WAVE_FRONT
+  const int gradient = 1 + INTRABC_DELAY_SB64 + (sb_size > 64);
+  const int wf_offset = gradient * (active_sb_row - src_sb_row);
+  if (src_sb_row > active_sb_row ||
+      src_sb64_col >= active_sb64_col - INTRABC_DELAY_SB64 + wf_offset)
     return 0;
+#endif
+
   return 1;
 }
-#endif  // CONFIG_INTRABC
 
 #ifdef __cplusplus
 }  // extern "C"